- Load and filter annotations.
- Create a directory structure for preprocessed data.
- Copy and rename frames based on their labels.
- (Optional) Apply data augmentation.
- Create a data generator for batch processing.
- Define and train your model.

In [1]:
import pandas as pd
import numpy as np
from glob import glob
import skimage.transform
from skimage import io
from sklearn.preprocessing import OneHotEncoder
import joblib

import tensorflow as tf

2024-09-30 11:21:00.755503: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-30 11:21:00.783601: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


####  Selecting Gestures

In [69]:
gesture_list = ['Thumb Up','Thumb Down','No gesture','Swiping Right','Swiping Left','Zooming In With Full Hand']

#### Train Data (based on chosen gestures)
- Making a new csv file, `new_jester_train.csv` which contains chosen labels (training data)

#### Validation Data
- Making a new csv file, `new_jester_val.csv` for our chosen labels (validation data)

In [70]:
# Train data
file_prefix = "new_jester"
df = pd.read_csv('./annotations/jester-v1-train.csv',header=None,names=['id','labels']) #no ; separator present in train data
df = df[df['labels'].isin(gesture_list)]
df.to_csv('{}_train.csv'.format(file_prefix),sep=';',index=False)

In [71]:
df.shape

(25538, 2)

In [72]:
df.head()

Unnamed: 0,id,labels
7,136859,Thumb Up
8,68574,Swiping Right
13,20706,No gesture
14,42237,Thumb Down
22,94060,Thumb Down


In [73]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 25538 entries, 7 to 118560
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      25538 non-null  object
 1   labels  25538 non-null  object
dtypes: object(2)
memory usage: 598.5+ KB


In [74]:
df['labels'].value_counts().reset_index()

Unnamed: 0,labels,count
0,Thumb Down,4390
1,Thumb Up,4373
2,No gesture,4278
3,Zooming In With Full Hand,4251
4,Swiping Left,4162
5,Swiping Right,4084


### We see that the dataset is balanced

In [75]:
#df.isnull().sum()
df = df.dropna()
df = df.drop_duplicates()
#removing leading/trailing spaces
df['labels'] = df['labels'].str.strip()

#### Reads the training annotations from a CSV file, filters the gestures based on gesture_list, and saves the filtered data to a new CSV file.

#### similarly, processing the validation annotations

In [76]:
# Validation data
df = pd.read_csv('./annotations/jester-v1-validation.csv',sep=';',header=None,names=['id','labels']) # ; separator 
df = df[df['labels'].isin(gesture_list)]
df.to_csv('{}_val.csv'.format(file_prefix),sep=';',index=False)
print(df.shape)
print(df.head())

(3114, 2)
       id                     labels
0    9223                   Thumb Up
2   42920               Swiping Left
3  106485                 Thumb Down
6   35341  Zooming In With Full Hand
7   94928              Swiping Right


In [77]:
#df.isnull().sum()
df = df.dropna()
df = df.drop_duplicates()
#removing leading/trailing spaces
df['labels'] = df['labels'].str.strip()

In [78]:
df.shape

(3114, 2)

In [79]:
df.head(5)

Unnamed: 0,id,labels
0,9223,Thumb Up
2,42920,Swiping Left
3,106485,Thumb Down
6,35341,Zooming In With Full Hand
7,94928,Swiping Right


#### Creating class for Loading and preprocessing video frame data

In [88]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, file_path, batch_size=2, image_dim=(256,256), frames_count=36, n_channels=1, base_dir='./20bn-jester-v1/', n_classes=27,validation=False):
        self.image_dim = image_dim
        self.batch_size = batch_size
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = True 
        self.frames_count = frames_count
        self.df = pd.read_csv(file_path,sep=";")
        self.df.id = self.df.id.map(str)
        if "train" in file_path:
            self.encoder = OneHotEncoder(sparse=False)
            self.encoder.fit(self.df.labels.values[:,None])
            joblib.dump(self.encoder,"{}_encoder_joblib.joblib".format('_'.join(file_path.split('_')[:-1])))
            np.save("encoder_classes_{}_npy.npy".format(n_classes),self.encoder.categories_)
        else:
            self.encoder = joblib.load("{}_encoder_joblib.joblib".format('_'.join(file_path.split('_')[:-1])))
        self.base_dir = base_dir
        self.on_epoch_end()
        
    #Getting length of the Dataset 
    def __len__(self):
        ## Decides step_size
        return self.df.shape[0] // self.batch_size

    #Getting a Batch of Data
    def __getitem__(self, index):
        'Generate one batch of data'
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        list_IDs_temp = self.df.loc[indexes,"id"].to_list()
        X, y = self.__data_generation(indexes)
        return X, y 

    #End of Epoch Handling (on_epoch_end method)
    #Shuffles the order of the data indexes after each epoch to ensure varied training.
    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(self.df.shape[0])
        np.random.shuffle(self.indexes)

    
    #Data Generation (__data_generation method):

    #Initializes empty arrays for the images and labels
    #Loads images from disk based on the current batch of IDs
    #Resizes and converts images to arrays.
    #Encodes the labels using the previously fitted encoder
    def __data_generation(self, indexes):
        X = np.empty((self.batch_size,self.frames_count, *self.image_dim, self.n_channels))
        y = np.empty((self.batch_size,1), dtype=str)
        y = []
        for i, ID in enumerate(indexes):
            files_list = self.standardize_frame_count(glob(self.base_dir + self.df.loc[ID,"id"] + "/*.jpg"),self.df.loc[ID])
            for idx,filename in enumerate(files_list):
                X[i,idx] = tf.keras.preprocessing.image.img_to_array(tf.keras.preprocessing.image.load_img(filename,color_mode='grayscale',target_size=self.image_dim))
            y.append(self.df.loc[ID,"labels"])
        encoded = self.encoder.transform(np.array(y)[:,None])
        return X,encoded


    #Adjusts the number of frames (images) for each video to match frames_count
    #If there are fewer frames than needed, duplicates frames from the middle
    #If there are more frames than needed, removes excess frames
    def standardize_frame_count(self,files,error_check):
        shape = len(files)
        if shape < self.frames_count:
            to_add = self.frames_count - shape
            mid  = len(files)//2
            dup = [files[mid]]*to_add
            files = files[:mid] + dup + files[mid+1:]
        elif shape > self.frames_count:
            to_remove = (shape - self.frames_count)
            to_remove = int(to_remove) if int(to_remove) == to_remove else int(to_remove) + 1
            files = files[to_remove:]
        return files
    

In [96]:
params = {'batch_size': 56,
          'n_classes': len(gesture_list),
          'n_channels': 3,
          'image_dim': (32,32)
          }

In [97]:
training_generator = DataGenerator(file_path="{}_train.csv".format(file_prefix),**params)
validation_generator = DataGenerator(file_path="{}_val.csv".format(file_prefix),**params)
#now, use model.fit(training_generator,validation_data=validation_generator,validation_steps=m,epochs=n,verbose=1)

