In [None]:
#!pip install wandb
#import wandb
#wandb.init()

In [None]:
#Importing Modules

#Importing Keras and import sub-modules needed
import keras
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model 
from keras.layers import *
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

#Importing miscallaneous modules
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
 
 #Importing sklearn modules to calculate different metrics and create different tables
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import multilabel_confusion_matrix
from tensorflow.keras.utils import to_categorical


In [None]:
from google.colab import drive    #Access Google Drive which is used as location for all code, datasets and relevant files
drive.mount('/gdrive')
#go to root of Google Drive
%cd /gdrive   

In [None]:
#Navigate to folder where all the datasets are 
%cd 'My Drive'   
%cd 'Action Recognition'

In [None]:
data_dir = "hmdb51/" #Choose dataset by naming dataset folder name
img_height , img_width = 64, 64 #Set pixel values for frames
seq_len = 70 #Set number of frames/samples per video
classes = ["pullup", "punch", "dive", "fencing", "ride_bike", "golf"] #Select class names from chosen dataset

In [None]:
def frames_extraction(video_path):                              #Helper function to extract frames from videos
    frames_list = []                                            #Create empty list
    
    vidObj = cv2.VideoCapture(video_path)
    count = 1                                                   #Counter variable used to keep track of number of frames extracted
 
    while count <= seq_len: 
         
        success, image = vidObj.read()                          #CV2 function used to read images from videos
        if success:                                             #While end of video not reached
            image = cv2.resize(image, (img_height, img_width))  #Extract frame and resize to the pixel values pre-set
            frames_list.append(image)                           #Add to frame list
            count += 1                                          #Increment count
        else:
            print("Defected frame")                             #Print message if frame not able to be read
            break
 
            
    return frames_list                                          #Exit once processing on current video is complete
 
def create_data(input_dir):                                     #Create data function
    X = []                                                      #Empty lists for videos and corresponding labels
    Y = []
     
    classes_list = os.listdir(input_dir)                        #Get all folder names in dataset, i.e full class list of dataset (test purposes)
    print (classes_list)                                        
    for c in classes:                                           #Iterate through list of chosen classes
        print(c)                                                
        files_list = os.listdir(os.path.join(input_dir, c))     #Get list of all video names in current folder
        for f in files_list:                                    #Iterate through this list
           frames = frames_extraction(os.path.join(os.path.join(input_dir, c), f))    #Extract set number of frames from cuurent video
           if len(frames) == seq_len:                           #Make sure desired number of frames was extracted
             X.append(frames)
             y = [0]*len(classes)                               #Add corrresponding video label class to list of labels
             y[classes.index(c)] = 1
             Y.append(y)
     
    X = np.asarray(X)                                           #Convert lists to numpy arrays
    Y = np.asarray(Y)
    return X, Y                                                 #Return final data


In [None]:
X, Y = create_data(data_dir)                                    #Fetch data for chosen dataset

#print (X.shape)
#print (Y.shape)



In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, shuffle=True, random_state=0) #Split video data and labels into test and train sets

print (X_train.shape)
print (y_train.shape)
print (X_test.shape)
print (y_test.shape)



In [None]:
model = Sequential()  #Model initiated and layers added whilst specifying hyperparameters

model.add(ConvLSTM2D(filters = 64, kernel_size = (3, 3), return_sequences = False, data_format = "channels_last", input_shape = (seq_len, img_height, img_width, 3)))
model.add(Dropout(0.2))

model.add(Conv2D(filters = 128, kernel_size = (3, 3), activation = 'relu'))
model.add(Dropout(0.2))

model.add(Flatten())

model.add(Dense(256, activation="relu"))
model.add(Dense(256, activation="relu"))
model.add(Dropout(0.3))

model.add(Dense(6, activation = "softmax"))
model.summary()  #Print summary of model

opt = keras.optimizers.SGD(lr=0.001)    #Specify training algorithm and learning rate
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=["accuracy"])  #Specify Loss and Accuracy metrics as well

In [None]:
earlystop = EarlyStopping(monitor = 'val_loss', patience = 10, mode = 'min', restore_best_weights = True) #Add early stopping by specifying patience value; wait for x amount of epochs where no improvement is seen before reverting to best weights
callbacks = [earlystop]
 
history = model.fit(x = X_train, y = y_train, epochs=40, batch_size = 8 , shuffle=True, validation_split=0.2, callbacks=callbacks) #Fit model, specify number of epochs, validation % of training set, batch size 


In [None]:
model_evaluation_history = model.evaluate(X_test, y_test)  #Evaluate model on test set


from sklearn.metrics import classification_report #Produce report with extra metrics

y_pred = model.predict(X_test, batch_size=4, verbose=1)

y_pred = np.argmax(y_pred, axis = 1)
y_test = np.argmax(y_test, axis = 1)
 
print(classification_report(y_test, y_pred))

In [None]:
from sklearn.metrics import confusion_matrix  #Produce confusion matrix to show each class' performance
cm = confusion_matrix(y_test, y_pred)

print (cm)

In [None]:
def plot_metric(metric_name_1, metric_name_2, plot_name):
                                                                        # Fetch loss/accuracy values
  metric_value_1 = history.history[metric_name_1]
  metric_value_2 = history.history[metric_name_2]
                                                                        
  epochs = range(len(metric_value_1))                                   # Get epochs
                                                                        # Plot Graph
  plt.plot(epochs, metric_value_1, 'blue', label = metric_name_1)
  plt.plot(epochs, metric_value_2, 'red', label = metric_name_2)
                                                                      
  plt.title(str(plot_name))
                                                                     
  plt.legend()

In [None]:
plot_metric('loss', 'val_loss', 'Total Loss vs Total Validation Loss')  #Plot loss/val loss graph

In [None]:
plot_metric('accuracy', 'val_accuracy', 'Total Accuracy vs Total Validation Accuracy')  #Plot accuracy/val accuracy graph

In [None]:
#evaluate on new different data set with similar classes

data_dir2 = "UCF50/"  
 
X1, Y1 = create_data(data_dir2) #use previous helper functions to extract frames from videos


In [None]:
Eval_Hist = model.evaluate(X1, Y1)  #evaluate on whole set from new dataset


In [None]:
Y2 = model.predict(X1, batch_size=4, verbose=1)  #produce extra metrics for prediction on new dataset

Y2 = np.argmax(Y2, axis = 1)
Y1 = np.argmax(Y1, axis = 1)
 
print(classification_report(Y1, Y2))

In [None]:
print(confusion_matrix(Y1, Y2))  #produce confusion matrix for prediction on new dataset