# Gesture Recognition
In this group project, you are going to build a 3D Conv model that will be able to predict the 5 gestures correctly. Please import the following libraries to get started.

In [1]:
#Importing necessary libraries
import numpy as np
import os
from imageio import imread
import cv2
import datetime
import os

from keras.models import Sequential, Model
from keras.layers import Dense, GRU, LSTM, Flatten, TimeDistributed, Flatten, BatchNormalization, Activation, Dropout, GlobalAveragePooling2D
from keras.layers.convolutional import Conv3D, MaxPooling3D, Conv2D, MaxPooling2D
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras import optimizers
from keras.regularizers import l2

#### We set the random seed to reproduce the results.

In [2]:
np.random.seed(30)
import random as rn
rn.seed(30)
from keras import backend as K
from keras.regularizers import l2
import tensorflow as tf
tf.random.set_seed(30)

#### We read the folder names for training and validation. We also set the `batch_size` here. 

In [3]:
train_doc = np.random.permutation(open('train.csv').readlines())
val_doc = np.random.permutation(open('val.csv').readlines())
batch_size = 15 #experiment with the batch size

## Generator

In [4]:
#Image size is 120 
default_y = 120
default_z = 120
#create a list of image numbers you want to use for a particular video
image_idx = list(range(0,30,1))

In [5]:
# function for the generator for CNN3D architecture
def generator(source_path, folder_list, batch_size):
    print( 'Source path = ', source_path, '; batch size =', batch_size)
    img_idx = image_idx #create a list of image numbers you want to use for a particular video
    while True:
        t = np.random.permutation(folder_list)
        num_batches = len(folder_list)//batch_size # calculate the number of batches
        for batch in range(num_batches): # we iterate over the number of batches
            batch_data = np.zeros((batch_size,len(img_idx),default_y,default_z,3)) # x is the number of images you use for each video, (y,z) is the final size of the input images and 3 is the number of channels RGB
            batch_labels = np.zeros((batch_size,5)) # batch_labels is the one hot representation of the output
            for folder in range(batch_size): # iterate over the batch_size
                imgs = os.listdir(source_path+'/'+ t[folder + (batch*batch_size)].split(';')[0]) # read all the images in the folder
                for idx,item in enumerate(img_idx): #  Iterate iver the frames/images of a folder to read them in
                    image = imread(source_path+'/'+ t[folder + (batch*batch_size)].strip().split(';')[0]+'/'+imgs[item]).astype(np.float32)
                    
                    image = cv2.resize(image,(default_y,default_z))
                    #crop the images and resize them. Note that the images are of 2 different shape 
                    #and the conv3D will throw error if the inputs in a batch have different shapes
                    
                    batch_data[folder,idx,:,:,0] = (image[:, :, 0] - np.min(image[:, :, 0]))/(np.max(image[:, :, 0])-np.min(image[:, :, 0])) #normalise and feed in the image
                    batch_data[folder,idx,:,:,1] = (image[:, :, 1] - np.min(image[:, :, 1]))/(np.max(image[:, :, 1])-np.min(image[:, :, 1])) #normalise and feed in the image
                    batch_data[folder,idx,:,:,2] = (image[:, :, 2] - np.min(image[:, :, 2]))/(np.max(image[:, :, 2])-np.min(image[:, :, 2])) #normalise and feed in the image
                    
                batch_labels[folder, int(t[folder + (batch*batch_size)].strip().split(';')[2])] = 1
            yield batch_data, batch_labels #you yield the batch_data and the batch_labels, remember what does yield do

        
        
        remaining_batch = len(folder_list) - (num_batches * batch_size)      
        batch_data = np.zeros((remaining_batch,len(img_idx),default_y,default_z,3)) # x is the number of images you use for each video, (y,z) is the final size of the input images and 3 is the number of channels RGB
        batch_labels = np.zeros((remaining_batch,5))
        for folder in range(remaining_batch): # iterate over the batch_size
                imgs = os.listdir(source_path+'/'+ t[folder + (num_batches*batch_size)].split(';')[0]) # read all the images in the folder
                for idx,item in enumerate(img_idx): #  Iterate iver the frames/images of a folder to read them in
                    image = imread(source_path+'/'+ t[folder + (num_batches*batch_size)].strip().split(';')[0]+'/'+imgs[item]).astype(np.float32)
                    
                    image = cv2.resize(image,(default_y,default_z))
                    #crop the images and resize them. Note that the images are of 2 different shape 
                    #and the conv3D will throw error if the inputs in a batch have different shapes
                    
                    batch_data[folder,idx,:,:,0] = (image[:, :, 0] - np.min(image[:, :, 0]))/(np.max(image[:, :, 0])-np.min(image[:, :, 0]))#normalise and feed in the image
                    batch_data[folder,idx,:,:,1] = (image[:, :, 1] - np.min(image[:, :, 1]))/(np.max(image[:, :, 1])-np.min(image[:, :, 1])) #normalise and feed in the image
                    batch_data[folder,idx,:,:,2] = (image[:, :, 2] - np.min(image[:, :, 2]))/(np.max(image[:, :, 2])-np.min(image[:, :, 2])) #normalise and feed in the image
                    
                batch_labels[folder, int(t[folder + (batch*remaining_batch)].strip().split(';')[2])] = 1
        yield batch_data, batch_labels                                                              


Note here that a video is represented above in the generator as (number of images, height, width, number of channels). Take this into consideration while creating the model architecture.

In [6]:
curr_dt_time = datetime.datetime.now()
train_path = 'train'
val_path = 'val'
num_train_sequences = len(train_doc)
print('# training sequences =', num_train_sequences)
num_val_sequences = len(val_doc)
print('# validation sequences =', num_val_sequences)
num_epochs = 50 # choose the number of epochs
print ('# epochs =', num_epochs)

# training sequences = 663
# validation sequences = 100
# epochs = 50


# Model building process
We will make the model using different functionalities that Keras provides. We have used `Conv3D` and `MaxPooling3D` and not `Conv2D` and `Maxpooling2D` for a 3D convolution model. The last layer is the softmax. We will write the model, the next step is to `compile` the model then we print the `summary` of the model and we will  see the total number of parameters you have to train.

### Let us create the `train_generator` and the `val_generator` which will be used in `.fit_generator`.

In [7]:
train_generator = generator(train_path, train_doc, batch_size)
val_generator = generator(val_path, val_doc, batch_size)

# Model 1

In [8]:
#Giving a model name
model_name = 'model_init' + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'
    
if not os.path.exists(model_name):
    os.mkdir(model_name)
        

filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.h5'
#creating a checkpoint
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)

LR = ReduceLROnPlateau(monitor='val_loss', factor=0.2, verbose=1, patience=4) # REducelronplateau code
#Creating a callbacks
callbacks_list = [checkpoint, LR]



### The `steps_per_epoch` and `validation_steps` are used by `fit_generator` to decide the number of next() calls it need to make.

In [9]:
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1

### Build the model.

In [10]:

model = Sequential()
model.add(Conv3D(8, (2, 2, 2), padding='same',
                 input_shape=(len(image_idx), default_z, default_y, 3)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv3D(16, (2, 2, 2)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(Dropout(0.05))

model.add(Conv3D(16, (2, 2, 2), padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv3D(32, (2, 2, 2)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(Dropout(0.05))

model.add(Conv3D(32, (2, 2, 2), padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv3D(32, (2, 2, 2)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(Dropout(0.05))

model.add(Flatten())
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))

#
model.add(Dense(5))
model.add(Activation('softmax'))

- No. Convolution Layers - 6
- Convolution Filter Size- (2,2,2)
- Pooling filter size - (2,2,2)
- Batch Size - 15
- Epochs - 10
- Image size - (120, 120)

### Compile the model and print the summary

In [11]:
optimiser = tf.keras.optimizers.Adam(learning_rate = 0.001) #optimizer with learning rate =0.001
model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
print (model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d (Conv3D)             (None, 30, 120, 120, 8)   200       
                                                                 
 activation (Activation)     (None, 30, 120, 120, 8)   0         
                                                                 
 batch_normalization (BatchN  (None, 30, 120, 120, 8)  32        
 ormalization)                                                   
                                                                 
 conv3d_1 (Conv3D)           (None, 29, 119, 119, 16)  1040      
                                                                 
 activation_1 (Activation)   (None, 29, 119, 119, 16)  0         
                                                                 
 batch_normalization_1 (Batc  (None, 29, 119, 119, 16)  64       
 hNormalization)                                        

### Fit the Model

In [None]:
model.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=10, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, workers=1, initial_epoch=0)

Source path =  train ; batch size = 15
Epoch 1/10

Epoch 1: saving model to model_init_2022-05-1722_06_33.726637\model-00001-4.15101-0.32428-2.87815-0.17000.h5
Epoch 2/10
Epoch 2: saving model to model_init_2022-05-1722_06_33.726637\model-00002-1.61003-0.35445-3.40708-0.14000.h5
Epoch 3/10
Epoch 3: saving model to model_init_2022-05-1722_06_33.726637\model-00003-1.45171-0.43439-5.29215-0.16000.h5
Epoch 4/10
Epoch 4: saving model to model_init_2022-05-1722_06_33.726637\model-00004-1.37141-0.45852-5.08423-0.17000.h5
Epoch 5/10
Epoch 5: saving model to model_init_2022-05-1722_06_33.726637\model-00005-1.24391-0.53243-2.93043-0.19000.h5

Epoch 5: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 6/10
Epoch 6: saving model to model_init_2022-05-1722_06_33.726637\model-00006-1.08217-0.55053-2.31894-0.21000.h5
Epoch 7/10
Epoch 7: saving model to model_init_2022-05-1722_06_33.726637\model-00007-1.01134-0.62293-1.97046-0.31000.h5
Epoch 8/10
Epoch 8: saving model to model_

### Findings
 - Model is overfitting 
 - Train loss: 0.8216
 - Train categorical_accuracy: 0.7210 
 - val_loss: 1.2255 
 - val_categorical_accuracy: 0.6100

# Model 2

### Building the model

In [None]:

model = Sequential()
model.add(Conv3D(8, (2, 2, 2), padding='same',
                 input_shape=(len(image_idx), default_z, default_y, 3)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv3D(16, (2, 2, 2)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(Dropout(0.1))

model.add(Conv3D(16, (2, 2, 2), padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv3D(32, (2, 2, 2)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(Dropout(0.1))

model.add(Conv3D(32, (2, 2, 2), padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv3D(32, (2, 2, 2)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))


model.add(Dense(5))
model.add(Activation('softmax'))

### Compiling and printing the summary

In [None]:
optimiser = tf.keras.optimizers.Adam(lr = 0.001) #optimizer with learning rate =0.001
model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
print (model.summary())

### Fit the Model

In [None]:
model.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=25, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, workers=1, initial_epoch=0)

### Findings
- Model is overfitting
- Train loss: 0.5430
- Train categorical_accuracy: 0.7768
- val_loss: 1.1338
- val_categorical_accuracy: 0.6900

# Model 3

### Build the model

In [None]:

model = Sequential()
model.add(Conv3D(8, (2, 2, 2), padding='same',
                 input_shape=(len(image_idx), default_z, default_y, 3)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv3D(16, (2, 2, 2)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling3D(pool_size=(2, 2, 2)))
#Remove drop out 
#model.add(Dropout(0.1))

model.add(Conv3D(16, (2, 2, 2), padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv3D(32, (2, 2, 2)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(Dropout(0.1))

model.add(Conv3D(32, (2, 2, 2), padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv3D(32, (3, 3, 3)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(128, activation = 'relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(256, activation = 'relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))


model.add(Dense(5))
model.add(Activation('softmax'))

### Compile the model and print the summary

In [None]:
optimiser = tf.keras.optimizers.Adam(lr = 0.001) # optimizer with learning rate = 0.001
model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
print (model.summary())

In [None]:
model.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=25, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, workers=1, initial_epoch=0)

### Findings

- Overfitting was reduced in the model by removing dropout layer
- Train loss: 0.3063
- Train categorical_accuracy: 0.9005
- val_loss: 0.8488
- val_categorical_accuracy: 0.8200