### Import Libraries

In [1]:
import os,cv2,math,random
import numpy as np
import datetime as dt
import tensorflow as tf
from collections import deque
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical,plot_model
from tensorflow.keras.callbacks import EarlyStopping
import pandas as pd

### Preprocess Data

In [2]:
IMG_HEIGHT = 100
IMG_WIDTH = 100

SEQ_LEN = 10

DATA_DIR = 'data'
CLASSES = ['close','open','pick','pitch','yaw','roll','place']

In [3]:
class FrameExtractor:
    def __init__(self,dir_name,SEQ_LEN=10,IMG_HEIGHT=100,IMG_WIDTH=100):
        self.dir_name = dir_name
        self.frames_set = []
        self.SEQ_LEN = SEQ_LEN
        self.IMG_WIDTH = IMG_WIDTH
        self.IMG_HEIGHT = IMG_HEIGHT
        
    def extract_frames(self):
        videos = os.listdir(self.dir_name)
        
        for video_path in videos:
            frames = get_frames(video_path)
            self.frames_set.push(frames)
            
    def get_frames(self,path):
        video_reader = cv2.VideoCapture(path)
        
        #Total number of frames
        video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
        
        #Skip interval
        skip_count = max(int(video_frames_count/self.SEQ_LEN),1)
        
        frames = []
        
        for frame_counter in range(self.SEQ_LEN):
            #Set current frame position
            video_reader.set(cv2.CAP_PROP_POS_FRAMES,frame_counter*skip_count)
            
            #Read frame
            sucess,frame = video_reader.read()
            
            #Check status of frame
            if not sucess:
                break
            
            #Resize frame
            resized_frame = cv2.resize(frame,(self.IMG_HEIGHT,self.IMG_WIDTH))
            
            #Normalize image
            normalized_frame = resized_frame / 255
            
            frames.append(frame)
        
        video_reader.release()
        
        return frames
    
    
class Build_DataSet:
    def __init__(self,folder_name,classes):
        self.folder_name = folder_name
        self.classes = np.array(classes)
        self.data = None
    
    def build(self):
        frame_extractor = FrameExtractor(self.folder_name)
        frame_extractor.extract_frames()
        self.data = np.asarray(frame_extractor.frames_set)
            

### Split data


In [4]:
dataset = Build_DataSet(DATA_DIR,CLASSES)
labels = dataset.classes
features = dataset.data


In [5]:
#One-Hot-Encoding
ohe_labels = to_categorical(list(range(labels.shape[0])))
pd.DataFrame(ohe_labels,columns=labels).T

Unnamed: 0,0,1,2,3,4,5,6
approach,1.0,0.0,0.0,0.0,0.0,0.0,0.0
pick,0.0,1.0,0.0,0.0,0.0,0.0,0.0
lift,0.0,0.0,1.0,0.0,0.0,0.0,0.0
move,0.0,0.0,0.0,1.0,0.0,0.0,0.0
bring down,0.0,0.0,0.0,0.0,1.0,0.0,0.0
place,0.0,0.0,0.0,0.0,0.0,1.0,0.0
move away,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [None]:
features_train,features_test,labels_train,labels_test = train_test_split(features,ohe_labels,test_size=0.3,shuffle=True)

### Model 1. CNN-LSTM

In [51]:
class CNN_LSTM:
    def __init__(self):
        self.cnn_lstm()
    def convlstm(sellf):
        self.model = Sequential()
        
        # [(W−K+2P)/S] + 1
        # size =2,stride = 2 (shift kernal to 2px)
        # W is the input volume(),K is the Kernel size(5),P is the padding (0),S is the stride (2)
        # input_shape = (10,100,100,3)
        
        #1
        self.model.add(TimeDistributed(Conv2D(filters=16,kernel_size=(3,3),activation='relu',padding='same'
                                              ,input_shape=(SEQ_LEN,IMG_HEIGHT,IMG_WIDTH,3))))
        self.model.add(TimeDistributed(MaxPooling2D(pool_size=(4,4))))
        self.model.add(TimeDistributed(Dropout(0.25)))
        
        #2
        self.model.add(TimeDistributed(Conv2D(filters=32,kernel_size=(3,3),activation='relu',padding='same')))
        self.model.add(TimeDistributed(MaxPooling2D(pool_size=(4,4))))
        self.model.add(TimeDistributed(Dropout(0.25)))
        
        #3
        self.model.add(TimeDistributed(Conv2D(filters=64,kernel_size=(3,3),activation='relu',padding='same')))
        self.model.add(TimeDistributed(MaxPooling2D(pool_size=(2,2))))
        self.model.add(TimeDistributed(Dropout(0.25)))
        
        #4
        self.model.add(TimeDistributed(Conv2D(filters=64,kernel_size=(3,3),activation='relu',padding='same')))
        self.model.add(TimeDistributed(MaxPooling2D(pool_size=(2,2))))
        #self.model.add(TimeDistributed(Dropout(0.25)))
        
        
        #Flattening
        self.model.add(TimeDistributed(Flatten()))
        
        #LSTM
        self.add(LSTM(32))
        
        #Dense layer
        self.model.add(Dense(len(CLASSES),activation="softmax"))
        
        #Display summary
        self.model.summary()
        
    def cnn_lstm(self):
        self.model = Sequential()
        
        # [(W−K+2P)/S] + 1
        # size =2,stride = 2 (shift kernal to 2px)
        # W is the input volume(),K is the Kernel size(5),P is the padding (0),S is the stride (2)
        # input_shape = (10,100,100,3)
        
        #1
        self.model.add(ConvLSTM2D(filters=4,kernel_size=(3,3),activation='tanh',
                             data_format="channels_last",recurrent_dropout=0.2,
                             return_sequences=True,input_shape=(SEQ_LEN,IMG_HEIGHT,IMG_WIDTH,3)))
        self.model.add(MaxPooling3D(pool_size=(1,2,3),padding="same",data_format="channels_last"))
        self.model.add(TimeDistributed(Dropout(0.2)))
        
        #2
        self.model.add(ConvLSTM2D(filters=8,kernel_size=(3,3),activation='tanh',
                             data_format="channels_last",recurrent_dropout=0.2,
                             return_sequences=True))
        self.model.add(MaxPooling3D(pool_size=(1,2,3),padding="same",data_format="channels_last"))
        self.model.add(TimeDistributed(Dropout(0.2)))
        
        #3
        self.model.add(ConvLSTM2D(filters=14,kernel_size=(3,3),activation='tanh',
                             data_format="channels_last",recurrent_dropout=0.2,
                             return_sequences=True))
        self.model.add(MaxPooling3D(pool_size=(1,2,3),padding="same",data_format="channels_last"))
        self.model.add(TimeDistributed(Dropout(0.2)))
        
        #4
        self.model.add(ConvLSTM2D(filters=16,kernel_size=(3,3),activation='tanh',
                             data_format="channels_last",recurrent_dropout=0.2,
                             return_sequences=True))
        self.model.add(MaxPooling3D(pool_size=(1,2,3),padding="same",data_format="channels_last"))
        #model.add(TimeDistributed(Dropout(0.2)))
        
        #Flattening
        self.model.add(Flatten())
        
        #Dense layer
        self.model.add(Dense(len(CLASSES),activation="softmax"))
        
        #Display summary
        self.model.summary()
        

    def config_model(self,num_iterations,num_epocs=50,batch_size=4,learning_rate=0.0001,val_split=0.2,shuffle=True):
        #Model Hyperparameters
        self.num_epocs = num_epocs
        self.batch_size = batch_size
        self.num_iterations = num_iterations
        self.learning_rate = learning_rate
        self.loss_func = "categorical_crossentropy"
        self.optimizer = "Adam"
        self.val_split = val_split
        self.shuffle = shuffle

        print('***************** Model hyperparameters ********************\n')
        print(f"\t\t\tNo. Iterations  : {self.num_iterations}\n\
                        No. epocs       : {self.num_epocs}\n\
                        Batch Size      : {self.batch_size}\n\
                        Learning Rate   : {self.learning_rate}\n\
                        Loss Function   : Cross Entropy Loss\n\
                        Optimizer       : Adam Optimizer\n ")
        
    def build(self):
        self.early_stopping = EarlyStopping(monitor='val_loss',patience=10,mode='min',restore_best_weights=True)
        self.model.compile(loss=self.loss_func,optimizer=self.optimizer,metrics=['accuracy'])
        
    def train(self,X,Y):
        print("Training the model...")
        self.history = self.model.fit(x=X,y=Y,epochs=self.num_epocs,batch_size=self.batch_size,
                                 shuffle=self.shuffle,validation_split=self.val_split,callbacks=[self.early_stopping])
        print("Training finished.")
        
    def test(self,X,Y):
        print("Testing the model...")
        self.test_results = self.model.evaluate(x=X,y=Y)
        print(self.test_results)
        print("Testing finished.")
        
    def save(self):
        loss,accuracy = self.test_results
        
        time_format = "%Y_%M_%d_%H_%M_%S"
        cur_time = dt.datetime.now()
        time_string = dt.datetime.strftime(cur_time,time_format)
        
        model_file_name = f"model-{time_format}__Loss-{loss}__Accuracy-{accuracy}.h5"
        
        print("Saving the model as ",model_file_name)
        
        self.model.save(model_file_name)
        
    def plot(self):
        fig,ax = plt.subplots(2)
        fig.suptitle("Training plots")
        
        epochs = range(self.num_epocs)
        
        #Plot of training loss vs validation loss
        ax[0].plot(epochs,self.history['loss'],self.history['val_loss'],
                 'blue',label="Plot of training loss vs validation loss")
        ax[0].x_label = "Epochs"
        ax[0].y_label = "Loss"
        
        #Plot of training loss vs validation loss
        ax[1].plot(epochs,self.history['accuracy'],self.history['val_accuracy'],
                 'blue',label="Plot of training accuracy vs validation accuracy")
        ax[1].x_label = "Epochs"
        ax[1].y_label = "Accuracy"
        
        plt.show()

In [52]:
model = CNN_LSTM()

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv_lstm2d_40 (ConvLSTM2D)  (None, 10, 98, 98, 4)    1024      
                                                                 
 max_pooling3d_40 (MaxPoolin  (None, 10, 49, 33, 4)    0         
 g3D)                                                            
                                                                 
 time_distributed_30 (TimeDi  (None, 10, 49, 33, 4)    0         
 stributed)                                                      
                                                                 
 conv_lstm2d_41 (ConvLSTM2D)  (None, 10, 47, 31, 8)    3488      
                                                                 
 max_pooling3d_41 (MaxPoolin  (None, 10, 24, 11, 8)    0         
 g3D)                                                            
                                                     

### Model 2. LR-CN

In [53]:
plot_model(model.model,to_file="model.png",show_shapes=True,show_layer_names=True)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.


In [54]:
model.config_model(5)

***************** Model hyperparameters ********************

			No. Iterations  : 5
                        No. epocs       : 50
                        Batch Size      : 4
                        Learning Rate   : 0.0001
                        Loss Function   : Cross Entropy Loss
                        Optimizer       : Adam Optimizer
 


### Training the model

In [55]:
model.train(features_train,labels_train)
model.save()

NameError: name 'features_train' is not defined

### Testing the model

In [56]:
model.test(features_test,labels_test)

NameError: name 'features_test' is not defined