In [39]:
# Imports
import tensorflow as tf
from tensorflow.keras import layers, models, applications
from tensorflow.keras import regularizers
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.layers import RandomZoom, RandomRotation, RandomFlip, Rescaling, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing import image
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import os
import random
import pandas as pd


In [40]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Set seed
SEED = 338424

# Global variables
IMG_SIZE = (64, 64)
BATCH_SIZE = 32
num_classes = 18 # Number of folders in dataset
AUTOTUNE = tf.data.AUTOTUNE

In [41]:
# Load Dataset
dataset_dir = 'dataset/hagridset'
full_ds = tf.keras.utils.image_dataset_from_directory(
    dataset_dir,
    shuffle=True,
    seed=SEED,
    image_size=(IMG_SIZE),
    batch_size=BATCH_SIZE,
    label_mode='categorical'
)

# Split into training, validation, and test sets
train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1

# Total length of the dataset
total_size = len(full_ds)

# Compute indices for the splits
train_size = int(total_size * train_ratio)
val_size = int(total_size * val_ratio)
test_size = total_size - (train_size + val_size)

# # Split the dataset
# train_ds = full_ds.take(train_size)
# val_ds = full_ds.skip(train_size).take(val_size)
# test_ds = full_ds.skip(train_size + val_size)

# Split the dataset and shuffle
train_ds = full_ds.take(train_size).shuffle(train_size, seed=SEED)
val_ds = full_ds.skip(train_size).take(val_size).shuffle(val_size, seed=SEED)
test_ds = full_ds.skip(train_size + val_size).shuffle(test_size, seed=SEED)

# Count samples in each subset
def count_samples(dataset):
    sample_count = sum(1 for _ in dataset.unbatch())
    return sample_count

# Output the number of samples for each dataset
print(f'Using {count_samples(train_ds)} samples in the Training set')
print(f'Using {count_samples(val_ds)} samples in the Validation set')
print(f'Using {count_samples(test_ds)} samples in the Test set')

Found 125912 files belonging to 18 classes.
Using 88128 samples in the Training set
Using 25184 samples in the Validation set
Using 12600 samples in the Test set


In [42]:
# Get class names
class_names = full_ds.class_names
class_names

['call',
 'dislike',
 'fist',
 'four',
 'like',
 'mute',
 'ok',
 'one',
 'palm',
 'peace',
 'peace_inverted',
 'rock',
 'stop',
 'stop_inverted',
 'three',
 'three2',
 'two_up',
 'two_up_inverted']

# **Light CNN**

In [5]:
# Define the Light CNN Model from Scratch
def build_scratch_cnn_light():
    model = models.Sequential()
    model.add(tf.keras.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3)))
    model.add(layers.Rescaling(1.0 / 255))  # Normalize pixel values
    model.add(layers.Conv2D(16, 3, padding='same', activation='relu'))  # Reduced filter size
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Conv2D(32, 3, padding='same', activation='relu'))  # Smaller second layer
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Conv2D(64, 3, padding='same', activation='relu'))  # Smaller third layer
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))  # Reduced fully connected layer
    model.add(layers.Dropout(0.3))  # Reduced dropout

    model.add(layers.Dense(num_classes, activation='softmax'))

    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
                  metrics=['accuracy'])
    return model

# Instantiate and summarize the lighter model
scratch_model_light = build_scratch_cnn_light()
scratch_model_light.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 64, 64, 3)         0         
                                                                 
 conv2d (Conv2D)             (None, 64, 64, 16)        448       
                                                                 
 batch_normalization (BatchN  (None, 64, 64, 16)       64        
 ormalization)                                                   
                                                                 
 max_pooling2d (MaxPooling2D  (None, 32, 32, 16)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 32)        4640      
                                                                 
 batch_normalization_1 (Batc  (None, 32, 32, 32)       1

In [6]:
# Train Light CNN Model
history_custom = scratch_model_light.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [7]:
# Evaluate the Light CNN Model
scratch_model_light.evaluate(test_ds)



[0.8618804812431335, 0.7296825647354126]

In [None]:
# Save the Deep CNN Model
scratch_model_light.save('scratch_model_deep.h5')

In [None]:
# Load the model
from tensorflow.keras.models import load_model

# Load the model from the .h5 file
scratch_model_light = load_model('scratch_model_light.h5')

In [9]:
# Testing own images
def predict_gesture(model, img_path, class_names):
    img = image.load_img(img_path, target_size=(64, 64))
    img_array = image.img_to_array(img)
    img_array = tf.expand_dims(img_array, 0)  # Create batch dimension

    predictions = model.predict(img_array)
    predicted_class = class_names[np.argmax(predictions)]
    return predicted_class

gesture_images = ['myImages/three2.png',
                  'myImages/rock.png',
                  'myImages/ok.jpg',
                  'myImages/two_up_inverted.png']

for img_path in gesture_images:
    predicted_gesture = predict_gesture(scratch_model_light, img_path, class_names)
    print(f"Prediction for {img_path}: {predicted_gesture}")

Prediction for myImages/three2.png: three2
Prediction for myImages/rock.png: peace_inverted
Prediction for myImages/ok.jpg: call
Prediction for myImages/two_up_inverted.png: two_up_inverted


# **Deep CNN**

In [14]:
# Define the Deep CNN Model from Scratch
def build_scratch_cnn():
    model = models.Sequential()
    model.add(tf.keras.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3)))
    model.add(layers.Rescaling(1.0 / 255))  # Normalize pixel values
    model.add(layers.Conv2D(32, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Conv2D(32, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Conv2D(64, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Conv2D(64, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Conv2D(128, 3, padding='same', activation='relu',
                            kernel_regularizer=regularizers.l2(0.001)))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Dropout(0.3))
    model.add(layers.Conv2D(128, 3, padding='same', activation='relu',
                            kernel_regularizer=regularizers.l2(0.001)))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Dropout(0.3))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5)) 
    
    model.add(layers.Dense(num_classes, activation='softmax'))

    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
                  metrics=['accuracy'])
    return model

# Instantiate and summarize the model
scratch_model_deep = build_scratch_cnn()
scratch_model_deep.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_2 (Rescaling)     (None, 128, 128, 3)       0         
                                                                 
 conv2d_10 (Conv2D)          (None, 128, 128, 32)      896       
                                                                 
 batch_normalization_10 (Bat  (None, 128, 128, 32)     128       
 chNormalization)                                                
                                                                 
 max_pooling2d_10 (MaxPoolin  (None, 64, 64, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_11 (Conv2D)          (None, 64, 64, 32)        9248      
                                                                 
 batch_normalization_11 (Bat  (None, 64, 64, 32)      

In [15]:
# Train Deep CNN Model
history_custom = scratch_model_deep.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [16]:
# Evaluate the Deep CNN Model
scratch_model_deep.evaluate(test_ds)



[0.6875975131988525, 0.8775396943092346]

In [24]:
# Save the Deep CNN Model
scratch_model_deep.save('scratch_model_deep.h5')

In [25]:
# Load the model
from tensorflow.keras.models import load_model

# Load the model from the .h5 file
scratch_model_deep = load_model('scratch_model_deep.h5')

In [27]:
# Testing own images
def predict_gesture(model, img_path, class_names):
    img = image.load_img(img_path, target_size=(128, 128))
    img_array = image.img_to_array(img)
    img_array = tf.expand_dims(img_array, 0)  # Create batch dimension

    predictions = model.predict(img_array)
    predicted_class = class_names[np.argmax(predictions)]
    return predicted_class

gesture_images = ['myImages/three2.png',
                  'myImages/rock.png',
                  'myImages/ok.jpg',
                  'myImages/two_up_inverted.png']

for img_path in gesture_images:
    predicted_gesture = predict_gesture(scratch_model_deep, img_path, class_names)
    print(f"Prediction for {img_path}: {predicted_gesture}")

Prediction for myImages/three2.png: three2
Prediction for myImages/rock.png: rock
Prediction for myImages/ok.jpg: ok
Prediction for myImages/two_up_inverted.png: two_up_inverted


# **CNN Data Augmentation**

In [44]:
# Data Augmentation
data_augmentation_layers = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
])

In [22]:
# Define the Deep CNN Model from Scratch
def build_scratch_cnn():
    model = models.Sequential()
    model.add(tf.keras.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3)))
    model.add(data_augmentation_layers)
    model.add(layers.Rescaling(1.0 / 255))  # Normalize pixel values
    model.add(layers.Conv2D(32, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Conv2D(32, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Conv2D(64, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Conv2D(64, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Conv2D(128, 3, padding='same', activation='relu',
                            kernel_regularizer=regularizers.l2(0.001)))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Dropout(0.3))
    model.add(layers.Conv2D(128, 3, padding='same', activation='relu',
                            kernel_regularizer=regularizers.l2(0.001)))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Dropout(0.3))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5)) 
    
    model.add(layers.Dense(num_classes, activation='softmax'))

    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
                  metrics=['accuracy'])
    return model

# Instantiate and summarize the model
scratch_model_da = build_scratch_cnn()
scratch_model_da.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_6 (Sequential)   (None, 64, 64, 3)         0         
                                                                 
 rescaling_3 (Rescaling)     (None, 64, 64, 3)         0         
                                                                 
 conv2d_16 (Conv2D)          (None, 64, 64, 32)        896       
                                                                 
 batch_normalization_16 (Bat  (None, 64, 64, 32)       128       
 chNormalization)                                                
                                                                 
 max_pooling2d_16 (MaxPoolin  (None, 32, 32, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_17 (Conv2D)          (None, 32, 32, 32)       

In [23]:
# Train Deep CNN Model
history_custom = scratch_model_da.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)]
)

Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [24]:
# Evaluate the CNN Data Augmentation Deep Model
scratch_model_da.evaluate(test_ds)



[0.958897054195404, 0.7878571152687073]

In [None]:
# Save the Deep CNN Model
scratch_model_da.save('scratch_model_deep.h5')

In [None]:
# Load the model
from tensorflow.keras.models import load_model

# Load the model from the .h5 file
scratch_model_da = load_model('scratch_model_deep.h5')

In [None]:
# Testing own images
def predict_gesture(model, img_path, class_names):
    img = image.load_img(img_path, target_size=(128, 128))
    img_array = image.img_to_array(img)
    img_array = tf.expand_dims(img_array, 0)  # Create batch dimension

    predictions = model.predict(img_array)
    predicted_class = class_names[np.argmax(predictions)]
    return predicted_class

gesture_images = ['myImages/three2.png',
                  'myImages/rock.png',
                  'myImages/ok.jpg',
                  'myImages/two_up_inverted.png']

for img_path in gesture_images:
    predicted_gesture = predict_gesture(scratch_model_da, img_path, class_names)
    print(f"Prediction for {img_path}: {predicted_gesture}")

# **Data Augmentation without Dropout/Regularization**

In [45]:
# Define the Deep CNN Model from Scratch
def build_scratch_cnn():
    model = models.Sequential()
    model.add(tf.keras.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3)))
    model.add(data_augmentation_layers)
    model.add(layers.Rescaling(1.0 / 255))  # Normalize pixel values
   
    model.add(layers.Conv2D(32, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
   
    model.add(layers.Conv2D(32, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
   
    model.add(layers.Conv2D(64, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
   
    model.add(layers.Conv2D(64, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    
    model.add(layers.Conv2D(128, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
  
    model.add(layers.Conv2D(128, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())

    model.add(layers.Flatten())
    
    model.add(layers.Dense(num_classes, activation='softmax'))

    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
                  metrics=['accuracy'])
    return model

# Instantiate and summarize the model
scratch_model_da2 = build_scratch_cnn()
scratch_model_da2.summary()

Model: "sequential_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_13 (Sequential)  (None, 64, 64, 3)         0         
                                                                 
 rescaling_7 (Rescaling)     (None, 64, 64, 3)         0         
                                                                 
 conv2d_37 (Conv2D)          (None, 64, 64, 32)        896       
                                                                 
 batch_normalization_37 (Bat  (None, 64, 64, 32)       128       
 chNormalization)                                                
                                                                 
 max_pooling2d_37 (MaxPoolin  (None, 32, 32, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_38 (Conv2D)          (None, 32, 32, 32)      

In [46]:
# Train Deep CNN Model
history_custom = scratch_model_da2.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [47]:
# Evaluate the CNN Data Augmentation Deep Model
scratch_model_da2.evaluate(test_ds)



[0.5184229016304016, 0.8386508226394653]

In [48]:
# Save the Deep CNN Model
scratch_model_da2.save('scratch_model_da2.h5')

In [None]:
# Load the model
from tensorflow.keras.models import load_model

# Load the model from the .h5 file
scratch_model_da2 = load_model('scratch_model_da2.h5')

In [52]:
# Testing own images
def predict_gesture(model, img_path, class_names):
    img = image.load_img(img_path, target_size=(64, 64))
    img_array = image.img_to_array(img)
    img_array = tf.expand_dims(img_array, 0)  # Create batch dimension

    predictions = model.predict(img_array)
    predicted_class = class_names[np.argmax(predictions)]
    return predicted_class

gesture_images = ['myImages/three2.png',
                  'myImages/rock.png',
                  'myImages/ok.jpg',
                  'myImages/two_up_inverted.png']

for img_path in gesture_images:
    predicted_gesture = predict_gesture(scratch_model_da2, img_path, class_names)
    print(f"Prediction for {img_path}: {predicted_gesture}")

Prediction for myImages/three2.png: peace
Prediction for myImages/rock.png: rock
Prediction for myImages/ok.jpg: ok
Prediction for myImages/two_up_inverted.png: two_up_inverted
