# Gesture Recognition
In this group project, you are going to build a 3D Conv model that will be able to predict the 5 gestures correctly. Please import the following libraries to get started.

In [2]:
import numpy as np
import os
import datetime
import os
import cv2

We set the random seed so that the results don't vary drastically.

In [3]:
np.random.seed(30)
import random as rn
rn.seed(30)
from keras import backend as K
import tensorflow as tf
tf.random.set_seed(30)
tf.config.list_physical_devices('GPU')

2024-03-13 16:43:58.165985: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-13 16:43:58.166013: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-13 16:43:58.166037: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-13 16:43:58.170849: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-03-13 16:43:59.848564: I tensorflow/compiler/

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

## Model

In [4]:
from keras.models import Sequential, Model
from keras.layers import Dense, GRU, Flatten, TimeDistributed, Flatten, BatchNormalization, Activation, Dropout
from keras.layers import Conv3D, MaxPooling3D
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.optimizers import Adam

# Parameters initialization
nb_rows = 120   # X dimension of the image
nb_cols = 120   # Y dimesnion of the image
#total_frames = 30
nb_frames = 30  # lenght of the video frames
nb_channel = 3 # numbe rof channels in images 3 for color(RGB) and 1 for Gray

nb_filters = [8, 16, 32, 64]
nb_dense = [256, 128, 5]

# Input
input_shape = (nb_frames, nb_rows, nb_cols, nb_channel)

# Define model
model = Sequential()

model.add(Conv3D(nb_filters[0], kernel_size=(3,3,3), input_shape=input_shape, padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))

model.add(MaxPooling3D(pool_size=(2,2,2)))

model.add(Conv3D(nb_filters[1], kernel_size=(3,3,3), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))

model.add(MaxPooling3D(pool_size=(2,2,2)))

model.add(Conv3D(nb_filters[2], kernel_size=(1,3,3), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))

model.add(MaxPooling3D(pool_size=(2,2,2)))

model.add(Conv3D(nb_filters[3], kernel_size=(1,3,3), padding='same'))
model.add(Activation('relu'))
model.add(Dropout(0.25))

model.add(MaxPooling3D(pool_size=(2,2,2)))

#Flatten Layers
model.add(Flatten())

model.add(Dense(nb_dense[0], activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(nb_dense[1], activation='relu'))
model.add(Dropout(0.5))

#softmax layer
model.add(Dense(nb_dense[2], activation='softmax'))

2024-03-13 16:44:01.901180: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-03-13 16:44:01.901310: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-03-13 16:44:01.901381: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [5]:
class DataGenerator:
    def __init__(self, width=120, height=120, frames=30, channel=3, crop=True, normalize=False, affine=False, flip=False, edge=False):
        self.width = width   # X dimension of the image
        self.height = height # Y dimesnion of the image
        self.frames = frames # length/depth of the video frames
        self.channel = channel # number of channels in images 3 for color(RGB) and 1 for Gray
        self.affine = affine # augment data with affine transform of the image
        self.flip = flip
        self.normalize =  normalize
        self.edge = edge # edge detection
        self.crop = crop

    # Helper function to generate a random affine transform on the image
    def __get_random_affine(self): # private method
        dx, dy = np.random.randint(-1.7, 1.8, 2)
        M = np.float32([[1, 0, dx], [0, 1, dy]])
        return M

    # Helper function to initialize all the batch image data and labels
    def __init_batch_data(self, batch_size): # private method
        batch_data = np.zeros((batch_size, self.frames, self.width, self.height, self.channel))
        batch_labels = np.zeros((batch_size,5)) # batch_labels is the one hot representation of the output
        return batch_data, batch_labels

    def __load_batch_images(self, source_path, folder_list, batch_num, batch_size, t): # private method

        batch_data,batch_labels = self.__init_batch_data(batch_size)
        # We will also build a agumented batch data
        if self.affine:
            batch_data_aug,batch_labels_aug = self.__init_batch_data(batch_size)
        if self.flip:
            batch_data_flip,batch_labels_flip = self.__init_batch_data(batch_size)

        #create a list of image numbers you want to use for a particular video
        img_idx = [x for x in range(0, self.frames)]

        for folder in range(batch_size): # iterate over the batch_size
            # read all the images in the folder
            imgs = sorted(os.listdir(source_path+'/'+ t[folder + (batch_num*batch_size)].split(';')[0]))
            # Generate a random affine to be used in image transformation for buidling agumented data set
            M = self.__get_random_affine()

            #  Iterate over the frames/images of a folder to read them in
            for idx, item in enumerate(img_idx):
                image = cv2.imread(source_path+'/'+ t[folder + (batch_num*batch_size)].strip().split(';')[0]+'/'+imgs[item], cv2.IMREAD_COLOR)
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

                #crop the images and resize them. Note that the images are of 2 different shape
                #and the conv3D will throw error if the inputs in a batch have different shapes
                if self.crop:
                    image = self.__crop(image)
                # If normalize is set normalize the image else use the raw image.
                if self.normalize:
                    resized = self.__normalize(self.__resize(image))
                else:
                    resized = self.__resize(image)
                # If the input is edge detected image then use the sobelx, sobely and laplacian as 3 channel of the edge detected image
                if self.edge:
                    resized = self.__edge(resized)

                batch_data[folder,idx] = resized
                if self.affine:
                    batch_data_aug[folder,idx] = self.__affine(resized, M)
                if self.flip:
                    batch_data_flip[folder,idx] = self.__flip(resized)

            batch_labels[folder, int(t[folder + (batch_num*batch_size)].strip().split(';')[2])] = 1

            if self.affine:
                batch_labels_aug[folder, int(t[folder + (batch_num*batch_size)].strip().split(';')[2])] = 1

            if self.flip:
                if int(t[folder + (batch_num*batch_size)].strip().split(';')[2])==0:
                    batch_labels_flip[folder, 1] = 1
                elif int(t[folder + (batch_num*batch_size)].strip().split(';')[2])==1:
                    batch_labels_flip[folder, 0] = 1
                else:
                    batch_labels_flip[folder, int(t[folder + (batch_num*batch_size)].strip().split(';')[2])] = 1

        if self.affine:
            batch_data = np.append(batch_data, batch_data_aug, axis = 0)
            batch_labels = np.append(batch_labels, batch_labels_aug, axis = 0)
        if self.flip:
            batch_data = np.append(batch_data, batch_data_flip, axis = 0)
            batch_labels = np.append(batch_labels, batch_labels_flip, axis = 0)

        return batch_data, batch_labels

    def generator(self, source_path, folder_list, batch_size): # public method
        print( 'Source path = ', source_path, '; batch size =', batch_size)
        while True:
            t = np.random.permutation(folder_list)
            num_batches = len(folder_list)//batch_size # calculate the number of batches
            for batch in range(num_batches): # we iterate over the number of batches
                # you yield the batch_data and the batch_labels, remember what does yield do
                yield self.__load_batch_images(source_path, folder_list, batch, batch_size, t)

            # Code for the remaining data points which are left after full batches
            if (len(folder_list) != batch_size*num_batches):
                batch_size = len(folder_list) - (batch_size*num_batches)
                yield self.__load_batch_images(source_path, folder_list, num_batches, batch_size, t)

    # Helper function to perfom affice transform on the image
    def __affine(self, image, M):
        return cv2.warpAffine(image, M, (image.shape[0], image.shape[1]))

    # Helper function to flip the image
    def __flip(self, image):
        return np.flip(image,1)

    # Helper function to normalise the data
    def __normalize(self, image):
        return image/127.5-1

    # Helper function to resize the image
    def __resize(self, image):
        return cv2.resize(image, (self.width,self.height), interpolation = cv2.INTER_AREA)

    # Helper function to crop the image
    def __crop(self, image):
        if image.shape[0] != image.shape[1]:
            return image[0:120, 20:140]
        else:
            return image

    # Helper function for edge detection
    def __edge(self, image):
        edge = np.zeros((image.shape[0], image.shape[1], image.shape[2]))
        edge[:,:,0] = cv2.Laplacian(cv2.GaussianBlur(image[:,:,0],(3,3),0),cv2.CV_64F)
        edge[:,:,1] = cv2.Laplacian(cv2.GaussianBlur(image[:,:,1],(3,3),0),cv2.CV_64F)
        edge[:,:,2] = cv2.Laplacian(cv2.GaussianBlur(image[:,:,2],(3,3),0),cv2.CV_64F)
        return edge

In [18]:
def train(batch_size, num_epochs, model: tf.keras.models.Model, train_generator, val_generator, train_doc, val_doc, model_save_path=None, optimiser=None):

    curr_dt_time = datetime.datetime.now()

    num_train_sequences = len(train_doc)
    print('# training sequences =', num_train_sequences)
    num_val_sequences = len(val_doc)
    print('# validation sequences =', num_val_sequences)
    print('# batch size =', batch_size)
    print('# epochs =', num_epochs)

    #optimizer = Adam(lr=rate)
    #write your optimizer
    if optimiser == None:
        optimiser = Adam()
    model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
    print (model.summary())

    model_name = 'model_init' + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'

    if not os.path.exists(model_name):
        os.mkdir(model_name)

    filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.h5'

    checkpoint = ModelCheckpoint(
        filepath,
        monitor='val_loss',
        verbose=1,
        save_best_only=False,
        save_weights_only=False,
        mode='auto',
    )
    LR = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, cooldown=1, verbose=1)
    callbacks_list = [checkpoint, LR]

    if (num_train_sequences%batch_size) == 0:
        steps_per_epoch = int(num_train_sequences/batch_size)
    else:
        steps_per_epoch = (num_train_sequences//batch_size) + 1

    if (num_val_sequences%batch_size) == 0:
        validation_steps = int(num_val_sequences/batch_size)
    else:
        validation_steps = (num_val_sequences//batch_size) + 1

    model.fit(
        train_generator,
        steps_per_epoch=steps_per_epoch,
        epochs=num_epochs,
        verbose=1,
        callbacks=callbacks_list,
        validation_data=val_generator,
        validation_steps=validation_steps,
        class_weight=None,
        workers=1,
        initial_epoch=0,
    )

    # Save model (saved_model format)
    if model_save_path is None:
        model.save('saved_model')
    else:
        model.save(model_save_path)

    K.clear_session()

In [19]:
batch_size = 10
num_epochs = 20
train_path = './data/train/train'
val_path = './data/val/val'
train_doc = np.random.permutation(open('./data/train.csv').readlines())
val_doc = np.random.permutation(open('./data/val.csv').readlines())
model_save_path = 'saved_model'
tflite_save_path = f'{model_save_path}/model.tflite'

################ AP 0.22
# train_gen = DataGenerator(affine=True, flip=True, normalize=True, crop=True, edge=True)
# val_gen = DataGenerator()
################ AP 0.50
# train_gen = DataGenerator(affine=True, flip=True, normalize=True, crop=True, edge=False)
# val_gen = DataGenerator()
################ AP 0.48
# train_gen = DataGenerator(affine=True, flip=True, normalize=True, crop=False, edge=False)
# val_gen = DataGenerator()
################ AP 0.43
# train_gen = DataGenerator(affine=True, flip=False, normalize=True, crop=False, edge=False)
# val_gen = DataGenerator()
################ AP 0.41
# train_gen = DataGenerator(affine=False, flip=False, normalize=True, crop=False, edge=False)
# val_gen = DataGenerator()
################ AP 0.86
train_gen = DataGenerator(affine=False, flip=False, normalize=False, crop=False, edge=False)
val_gen = DataGenerator()

train_generator = train_gen.generator(train_path, train_doc, batch_size)
val_generator = val_gen.generator(val_path, val_doc, batch_size)

train(batch_size, num_epochs, model, train_generator, val_generator, train_doc, val_doc, model_save_path)

# training sequences = 663
# validation sequences = 100
# batch size = 10
# epochs = 20
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d (Conv3D)             (None, 30, 120, 120, 8)   656       
                                                                 
 batch_normalization (Batch  (None, 30, 120, 120, 8)   32        
 Normalization)                                                  
                                                                 
 activation (Activation)     (None, 30, 120, 120, 8)   0         
                                                                 
 max_pooling3d (MaxPooling3  (None, 15, 60, 60, 8)     0         
 D)                                                              
                                                                 
 conv3d_1 (Conv3D)           (None, 15, 60, 60, 16)    3472      
                                  

None
Source path =  ./data/train/train ; batch size = 10
Epoch 1/20

Epoch 1: saving model to model_init_2024-03-1317_45_32.240306/model-00001-0.07631-0.97134-1.15141-0.83000.h5
Epoch 2/20
 4/67 [>.............................] - ETA: 1s - loss: 0.0016 - categorical_accuracy: 1.0000    

  saving_api.save_model(


Epoch 2: saving model to model_init_2024-03-1317_45_32.240306/model-00002-0.95686-0.88557-1.84726-0.75000.h5
Epoch 3/20
Epoch 3: saving model to model_init_2024-03-1317_45_32.240306/model-00003-0.18489-0.92040-1.05545-0.82000.h5
Epoch 4/20
Epoch 4: saving model to model_init_2024-03-1317_45_32.240306/model-00004-0.34420-0.93035-0.79504-0.84000.h5
Epoch 5/20
Epoch 5: saving model to model_init_2024-03-1317_45_32.240306/model-00005-0.16818-0.94527-1.33440-0.81000.h5
Epoch 6/20
Epoch 6: saving model to model_init_2024-03-1317_45_32.240306/model-00006-0.11692-0.96020-1.01514-0.86000.h5

Epoch 6: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 7/20
Epoch 7: saving model to model_init_2024-03-1317_45_32.240306/model-00007-0.16581-0.95522-0.84664-0.82000.h5
Epoch 8/20
Epoch 8: saving model to model_init_2024-03-1317_45_32.240306/model-00008-0.13147-0.96020-0.59516-0.87000.h5
Epoch 9/20
Epoch 9: saving model to model_init_2024-03-1317_45_32.240306/model-00009-0.10227-0

INFO:tensorflow:Assets written to: saved_model/assets


## (テスト用) モデルをONNXに変換

In [29]:
onnx_save_path = f'{model_save_path}/model.onnx'

!python -m tf2onnx.convert --saved-model $model_save_path --output $onnx_save_path --opset 11
!onnxsim $onnx_save_path $onnx_save_path

!sbi4onnx \
--input_onnx_file_path $onnx_save_path \
--output_onnx_file_path $onnx_save_path \
--initialization_character_string batch

!sor4onnx \
--input_onnx_file_path $onnx_save_path \
--old_new "conv3d_input" "input" \
--mode inputs \
--search_mode prefix_match \
--output_onnx_file_path $onnx_save_path

!sor4onnx \
--input_onnx_file_path $onnx_save_path \
--old_new "dense_2" "output" \
--mode outputs \
--search_mode prefix_match \
--output_onnx_file_path $onnx_save_path

!sne4onnx \
--input_onnx_file_path saved_model/model.onnx \
--input_op_names StatefulPartitionedCall/sequential/conv3d/Conv3D__6:0 \
--output_op_names output \
--output_onnx_file_path saved_model/model.onnx

!sor4onnx \
--input_onnx_file_path saved_model/model.onnx \
--old_new "StatefulPartitionedCall/sequential/conv3d/Conv3D__6:0" "input" \
--mode inputs \
--search_mode prefix_match \
--output_onnx_file_path saved_model/model.onnx

!onnxsim saved_model/model.onnx saved_model/model.onnx


2024-03-13 18:27:53.908443: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-13 18:27:53.908470: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-13 18:27:53.908494: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-13 18:27:56,203 - INFO - Signatures found in model: [serving_default].
2024-03-13 18:27:56,203 - INFO - Output names: ['dense_2']
2024-03-13 18:27:56,359 - INFO - Using tensorflow=2.14.0, onnx=1.15.0, tf2onnx=1.16.1/15c810
2024-03-13 18:27:56,359 - INFO - Using opset <onnx, 11>
2024-03-13 18:27:56,376 - INFO - Computed 0 values for constant folding
202

## 推論用に ArgMax -> Bool 変換後処理の追加

In [None]:
# 保存したモデルのロード
model = tf.keras.models.load_model(model_save_path)
# ArgMax -> Bool を追加
class PostModel(tf.keras.Model):
    def __init__(self, model):
        super(PostModel, self).__init__()
        self.base_model = model

    def call(self, inputs):
        x = self.base_model(inputs)
        return tf.cast(tf.math.argmax(x, axis=1), dtype=tf.bool)

# 新しいモデルのインスタンスを生成
model = PostModel(model)