In [2]:
import numpy as np
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
import os
import math
import pandas as pd
import matplotlib.image as img
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential

In [3]:
# Utility functions
# return gray image
def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.2989, 0.5870, 0.1140])

# training targets stored
targets_temp = pd.read_csv('/kaggle/input/gesturecsvs/jester-v1-train.csv',header=None,sep = ";").to_dict()
targets = {}
for index in range(len(targets_temp[0])):
  targets[targets_temp[0][index]] = targets_temp[1][index]

# validation targets stored
targets_validation_temp = pd.read_csv('/kaggle/input/gesturecsvs/jester-v1-train.csv',header=None,sep = ";").to_dict()
targets_validation = {}
# print(list(targets_validation_temp[0].values()).index(34870))
valDirs = os.listdir('/kaggle/working/val_data')
for dir in valDirs:
  indexDir = int(dir)
  targets_validation[indexDir] = targets_validation_temp[1][list(targets_validation_temp[0].values()).index(indexDir)]
    
# Finally, classes label you want to use all labels 
label = pd.read_csv('/kaggle/input/gesturecsvs/jester-v1-labels.csv',header=None, usecols=[0])
label.head()
targets_name = label[0].tolist()
len(targets_name)

# Get the data directories
path = "/kaggle/input/gesturevid/20bn50_part1/"
path_cv = "/kaggle/working/val_data/"

dirs = os.listdir(path)
dirs_cv = os.listdir(path_cv)

# number of samples for training and validation
print(len(dirs))
print(len(dirs_cv))

# The videos do not have the same number of frames, here we try to unify.

hm_frames = 30 # number of frames
# unify number of frames for each training
def get_unify_frames(path):
    offset = 0
    # pick frames
    frames = os.listdir(path)
    frames_count = len(frames)
    # unify number of frames 
    if hm_frames > frames_count:
        # duplicate last frame if video is shorter than necessary
        frames += [frames[-1]] * (hm_frames - frames_count)
    elif hm_frames < frames_count:
        # If there are more frames, then sample starting offset
        #diff = (frames_count - hm_frames)
        #offset = diff-1 
        frames = frames[0:hm_frames]
    return frames

# Resize frames
def resize_frame(frame):
    frame = img.imread(frame)
    frame = cv2.resize(frame, (64, 64))
    return frame

# Function to empty the RAM
def release_list(a):
   del a[:]
   del a
    
#Defining loss function in use
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

# Loss
train_loss = tf.keras.metrics.Mean(name='train_loss')
valid_loss = tf.keras.metrics.Mean(name='valid_loss')
# Accuracy
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
valid_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='valid_accuracy')

@tf.function
def train_step(image, targets):
    with tf.GradientTape() as tape:
        # Make a prediction on all the batch
        predictions = model(image)
        # Get the error/loss on these predictions
        loss = loss_fn(targets, predictions)
    # Compute the gradient which respect to the loss
    grads = tape.gradient(loss, model.trainable_variables)
    # Change the weights of the model
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    # The metrics are accumulate over time. You don't need to average it yourself.
    train_loss(loss)
    train_accuracy(targets, predictions)
    

@tf.function
def valid_step(image, targets):
    predictions = model(image)
    t_loss = loss_fn(targets, predictions)
    # Set the metrics for the test
    valid_loss(t_loss)
    valid_accuracy(targets, predictions)



59281
500


In [4]:
# Adjusting validation data
with tf.device('/device:GPU:0'):
  counter_validation = 0
  cv_targets = []
  new_frames_cv = []
  for directory in dirs_cv:
      new_frame = []
      # Frames in each folder
      frames = get_unify_frames(path_cv+directory)
      if len(frames)==hm_frames:
          for frame in frames:
              new_frame.append(rgb2gray(cv2.imread(path_cv+directory+'/'+frame)))
              new_frames_cv.append(new_frame)
              if len(new_frame) == 15:
#                   print(directory)
                  new_frames_cv.append(new_frame)
                  cv_targets.append(targets_name.index(targets_validation[int(directory)]))
                  counter_validation +=1
                  new_frame = []

  # convert validation data to np float32
  cv_data = np.array(new_frames_cv[0:counter_validation], dtype=np.float32)
  print(cv_data.shape)

  #release new frames cv
  release_list(new_frames_cv)

  # Normalisation: validation
  print('old mean', cv_data.mean())
  scaler = StandardScaler()
  scaled_images_cv  = scaler.fit_transform(cv_data.reshape(-1, 15*64*64))
  print('new mean',scaled_images_cv.mean())
  scaled_images_cv  = scaled_images_cv.reshape(-1, 15, 64, 64, 1)
  print(scaled_images_cv.shape)

  cv_dataset = tf.data.Dataset.from_tensor_slices((scaled_images_cv, cv_targets))



(1000, 15, 64, 64)
old mean 109.43539
new mean 9.480318e-08
(1000, 15, 64, 64, 1)


In [5]:
# Convolutions
conv1 = tf.compat.v2.keras.layers.Conv3D(32, (3, 3, 3), activation='relu', name="conv1", data_format='channels_last')
pool1 = tf.keras.layers.MaxPool3D(pool_size=(2, 2, 2), data_format='channels_last')
conv2 = tf.compat.v2.keras.layers.Conv3D(64, (3, 3, 3), activation='relu', name="conv1", data_format='channels_last')
pool2 = tf.keras.layers.MaxPool3D(pool_size=(2, 2,2), data_format='channels_last')

# LSTM & Flatten
convLSTM =tf.keras.layers.ConvLSTM2D(40, (3, 3))
flatten =  tf.keras.layers.Flatten(name="flatten")

# Dense layers
d1 = tf.keras.layers.Dense(128, activation='relu', name="d1")
out = tf.keras.layers.Dense(27, activation='softmax', name="output")

# Create the 3D CNN model for hand gesture recognition
model = Sequential()
model.add(tf.compat.v2.keras.layers.Conv3D(32, (3, 3, 3), activation='relu', name="conv1", data_format='channels_last'))
model.add(pool1)
model.add(conv2)
model.add(pool2)
model.add(convLSTM)
model.add(flatten)
model.add(d1)
model.add(out)


In [6]:
ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer=optimizer, model=model)
manager = tf.train.CheckpointManager(ckpt, '/kaggle/working/tf_ckpts', max_to_keep=10)
ckpt.restore(manager.latest_checkpoint)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7ff2d412c310>

In [None]:
# Every 4000 videos
delta = 4000
start = 0
loop = 1
for i in range(start, start+delta):
  print("##################### LOOP STARTED" + str(loop) + "#########################")
  counter_training = 0 
  training_targets = [] 
  new_frames = [] # training data after resize & unify
  # Preparing training data
  with tf.device('/device:GPU:0'):
    # Adjust training data
    for directory in dirs[start:start+delta]:
      new_frame = [] 
      frames = get_unify_frames(path+directory)
      if len(frames) == hm_frames: # just to be sure
          for frame in frames:
              new_frame.append(rgb2gray( tf.image.per_image_standardization(cv2.imread(path+directory+'/'+frame))))
              new_frames.append(new_frame)
              if len(new_frame) == 15: # partition each training on two trainings.
                  new_frames.append(new_frame) # append each partition to training data
                  training_targets.append(targets_name.index(targets[int(directory)]))
                  counter_training +=1
                  new_frame = []
              
    training_data = np.array(new_frames[0:counter_training], dtype=np.float32)
    # print(training_data.shape)    

    #release new frames
    release_list(new_frames)

    # Normalisation: training
    # print('old mean', training_data.mean())
    scaler = StandardScaler()
    scaled_images  = scaler.fit_transform(training_data.reshape(-1, 15*64*64))
    # print('new mean', scaled_images.mean())
    scaled_images  = scaled_images.reshape(-1, 15, 64, 64, 1)
    # print("scaled images shape:")
    # print(scaled_images.shape)

    # release training_data array
    del(training_data)

    # use tensorflow Dataset
    train_dataset = tf.data.Dataset.from_tensor_slices((scaled_images, training_targets))

    if (loop != 1):
      # Load the previous saved weights
      ckpt.restore(manager.latest_checkpoint)
    
    else:
      #Necessary step to build model once with data
      model(scaled_images[0:2])
      model.summary()
    
    #release 
    del(scaled_images)
    
    #Training 
    epoch = 10
    batch_size = 32
    b = 0
    training_acc = []
    validation_acc = []
    for epoch in range(epoch):
        # Training set
        for images_batch, targets_batch in train_dataset.batch(batch_size):
            train_step(images_batch, targets_batch)
            # print("--Train step done--")
            template = '\r Batch {}/{}, Loss: {}, Accuracy: {}'
            print(template.format(
                b, len(training_targets), train_loss.result(), 
                train_accuracy.result()*100
            ), end="")
            b += batch_size
        # Validation set
        for images_batch, targets_batch in cv_dataset.batch(batch_size):
            valid_step(images_batch, targets_batch)

        template = '\nEpoch {}, Valid Loss: {}, Valid Accuracy: {}'
        print(template.format(
            epoch+1,
            valid_loss.result(), 
            valid_accuracy.result()*100)
        )
        training_acc.append(float(train_accuracy.result()*100))
        validation_acc.append(float(valid_accuracy.result()*100))
        ckpt.step.assign_add(1)
        # save_path = manager.save()
        # print(" checkpoint for step {}: {}".format(int(ckpt.step), save_path))
        valid_loss.reset_states()
        valid_accuracy.reset_states()
        train_accuracy.reset_states()
        train_loss.reset_states()

    if (loop % 2 == 0):
      # plot Accuracy / epoch
      plt.plot([1,2,3,4,5,6,7,8,9,10],training_acc, '-' )
      plt.plot([1,2,3,4,5,6,7,8,9,10],validation_acc, '-' )

      plt.ylabel('Accuracy')
      plt.xlabel('Epochs')
      plt.show()
    # save the model for use in the application
    save_path = manager.save()
    
    #release
    del(train_dataset)
    
    # print(" checkpoint for step {}: {}".format(int(ckpt.step), save_path))    
    start = start + delta
    loop = loop + 1
    if(start > 50000):
      break

##################### LOOP STARTED1#########################
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1 (Conv3D)               multiple                  896       
_________________________________________________________________
max_pooling3d (MaxPooling3D) multiple                  0         
_________________________________________________________________
conv1 (Conv3D)               multiple                  55360     
_________________________________________________________________
max_pooling3d_1 (MaxPooling3 multiple                  0         
_________________________________________________________________
conv_lst_m2d (ConvLSTM2D)    multiple                  149920    
_________________________________________________________________
flatten (Flatten)            multiple                  0         
_____________________________________________________________