In [None]:
#!pip install wandb
#import wandb
#wandb.init()

In [None]:
import os         #Import required modules
import cv2
import math
import random
import numpy as np
import datetime as dt
import tensorflow as tf

import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model



In [None]:
seed_constant = 23   #Initialize random number generator
np.random.seed(seed_constant)
random.seed(seed_constant)
tf.random.set_seed(seed_constant)

In [None]:
from google.colab import drive    
drive.mount('/gdrive')
%cd /gdrive                      

In [None]:
%cd 'My Drive/'                   

%cd 'Action Recognition'


In [None]:
%ls

In [None]:
#Constants

datasetName= 'hmdb51'              #Chosen Dataset

In [None]:
image_height, image_width = 64, 64        #Set pixel values
images_per_class = 8000               #Set number of frames from each video class
dataset_directory = "hmdb51"              #Set dataset name
classes_list = ["pullup", "punch", "dive", "fencing", "ride_bike", "golf"]   #Choose classes from dataset
model_output_size = len(classes_list)     #test


In [None]:
def frames_extraction(video_path):  #helper function to extract frames from videos
    frames_list = []                #empty list for frames
    video_reader = cv2.VideoCapture(video_path) #Read frames from video
    while True:     #Iterate through frames
        success, frame = video_reader.read() #Whilst frames are available
        if not success:
            break
        resized_frame = cv2.resize(frame, (image_height, image_width))    #Resize to pre-defined pixel values
        normalized_frame = resized_frame / 255                            #Normalize frames to range 0-1
        frames_list.append(normalized_frame)                              #Add to frame list for this video
    video_reader.release()                                                #Close and release contents
    return frames_list                                                     #Return frames from this video

In [None]:
def create_dataset():    #Create dataset function
    temp_features = [] #empty list to hold each videos frames
    features = []  #final list of frames will be in this list
    labels = []  #Final list of labels will be in this list

    for class_index, class_name in enumerate(classes_list):  #Iterate through chosen classes
        print(f'Extracting Data of Class: {class_name}')        
        files_list = os.listdir(os.path.join(dataset_directory, class_name)) #Got to class folder and get video list
        for file_name in files_list:
            video_file_path = os.path.join(dataset_directory, class_name, file_name)
            frames = frames_extraction(video_file_path)   #Extract frames for current video
            temp_features.extend(frames)      #Add to temp frames list

        features.extend(random.sample(temp_features, images_per_class))       #Choose 8000 random frames from current class (all videos in class)
        labels.extend([class_index] * images_per_class)                         #Assign 8000 labels correctly
        temp_features.clear()

    features = np.asarray(features)   #Convert both to numpy array
    labels = np.array(labels)  
    return features, labels

In [None]:
features, labels = create_dataset()   #Fetch data

In [None]:
print (features.shape)
print (labels.shape)
#print (features)

In [None]:
one_hot_encoded_labels = to_categorical(labels) #convert labels into one-hot-encoded vectors

In [None]:
features_train, features_test, labels_train, labels_test = train_test_split(features, one_hot_encoded_labels, test_size = 0.2, shuffle = True, random_state = seed_constant)  #Convert to test and train sets

In [None]:
def create_model():  #Create NN
    model = Sequential() #Keras sequential model

    model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu', input_shape = (image_height, image_width, 3)))   #Add various layers
    model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu'))
    model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu'))
    model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu'))
    model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size = (2, 2)))
    model.add(GlobalAveragePooling2D())
    model.add(Dense(288, activation = 'relu'))
    model.add(Dense(288, activation = 'relu'))
    model.add(BatchNormalization())
    model.add(Dense(model_output_size, activation = 'softmax'))

    model.summary()  #Show model summary

    return model


model = create_model()


In [None]:
plot_model(model,show_shapes = True, show_layer_names = True)  #Plot model diagram

In [None]:
from keras import optimizers
import keras
optimizer = keras.optimizers.Adam(lr=0.0001)

early_stopping_callback = EarlyStopping(monitor = 'val_loss', patience = 10, mode = 'min', restore_best_weights = True) #Early stopping on patience 10


model.compile(loss = 'categorical_crossentropy', optimizer = optimizer, metrics = ["accuracy"]) #Choose optimizer, loss and metrics

model_training_history = model.fit(x = features_train, y = labels_train, epochs = 40, batch_size = 16 , shuffle = True, validation_split = 0.2, callbacks = [early_stopping_callback])

In [None]:
model_evaluation_history = model.evaluate(features_test, labels_test)

from sklearn.metrics import classification_report

y_pred = model.predict(features_test, batch_size=4, verbose=1)
y_pred_bool = np.argmax(y_pred, axis=1)

l_test=np.argmax(labels_test, axis=1)

print(classification_report(l_test, y_pred_bool))






In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(l_test, y_pred_bool)

print (cm)

In [None]:
# Creating a useful name for our model, incase you're saving multiple models (OPTIONAL)
date_time_format = '%Y_%m_%d__%H_%M_%S'
current_date_time_dt = dt.datetime.now()
current_date_time_string = dt.datetime.strftime(current_date_time_dt, date_time_format)
model_evaluation_loss, model_evaluation_accuracy = model_evaluation_history
model_name = f'Model___Date_Time_{current_date_time_string}___Loss_{model_evaluation_loss}___Accuracy_{model_evaluation_accuracy}.h5'

# Saving your Model
model.save(model_name)

In [None]:
def plot_metric(metric_name_1, metric_name_2, plot_name):
  # Get Metric values using metric names as identifiers
  metric_value_1 = model_training_history.history[metric_name_1]
  metric_value_2 = model_training_history.history[metric_name_2]

  # Constructing a range object which will be used as time 
  epochs = range(len(metric_value_1))
  
  # Plotting the Graph
  plt.plot(epochs, metric_value_1, 'blue', label = metric_name_1)
  plt.plot(epochs, metric_value_2, 'red', label = metric_name_2)
  
  # Adding title to the plot
  plt.title(str(plot_name))

  # Adding legend to the plot
  plt.legend()

In [None]:
plot_metric('loss', 'val_loss', 'Total Loss vs Total Validation Loss')

In [None]:
plot_metric('accuracy', 'val_accuracy', 'Total Accuracy vs Total Validation Accuracy')

In [None]:
from collections import Counter

def get_first_mode(a):
    c = Counter(a)  
    mode_count = max(c.values())
    mode = {key for key, count in c.items() if count == mode_count}
    first_mode = next(x for x in a if x in mode)
    return first_mode

In [None]:
def frames_extraction2(video_path):
    frames_list = []
    
    vidObj = cv2.VideoCapture(video_path)
    
  


    # Used as counter variable 
    count = 0
 
    while True: 
         
        success, image = vidObj.read() 

        if success == False:
          print("Defected frame")
          break

        if count == 0:
            image = cv2.resize(image, (image_height, image_width))
            normalized_image = image / 255
            frames_list.append(normalized_image)
        
        else:
          if count % 25 == 0:
            image = cv2.resize(image, (image_height, image_width))
            normalized_image = image / 255
            frames_list.append(normalized_image)
        
        count += 1
            
       
            
    return frames_list

In [None]:
#Evaluating a different dataset

from tqdm import tqdm
from statistics import mode


predict = []
actual = []
dataset_directory2="UCF50"

# Declaring Empty Lists to store the features and labels values.
temp_features = [] 
features = []
labels = []

cc=0

# Iterating through all the classes mentioned in the classes list
for class_index, class_name in enumerate(classes_list):
    print(f'Extracting Data of Class: {class_name}')
    
    # Getting the list of video files present in the specific class name directory
    files_list = os.listdir(os.path.join(dataset_directory2, class_name))

    # Iterating through all the files present in the files list
    for file_name in files_list:

        # Construct the complete video path
        video_file_path = os.path.join(dataset_directory2, class_name, file_name)

        # Calling the frame_extraction method for every video file path
        frames = frames_extraction2(video_file_path)

        temppred=[]

        for i in frames:
          temppred.append(model.predict_classes(np.expand_dims(i, axis = 0))[0])
        
        print (temppred)
        print ("mode", get_first_mode(temppred), cc)
        cc+=1
        predict.append(get_first_mode(temppred))
        actual.append(class_index)





In [None]:
print(classification_report(actual, predict))


In [None]:
print(confusion_matrix(actual, predict))