# Hand Gesture CNN model creation



In [None]:
# Institution: Carleton University
# Course: OSS4009 Capstone 
# Term: F22 - W23
#
# Filename: 0 - Pre2 - cnn_model_creation.ipynb
#
# Students: Adam Thompson, Philippe Beaulieu
# Advisor:  Dr. Marzieh Amini
#
# Description: This program will create the model from images in a folder structure,
#              and trained, the model can be converted to a Tensorflow light and saved.
#              You can test the mode with an image or live stream with a webcam in
#              the bottom code sections
#              You can save the model to a Tensorflow Light Model after testing the
#              training and validation
#
#              This program will only look at the DEPTH images for the Tensorflow model.
#              The folder hierarchy is important to load the images, it is as follow:
#
#     TRAIN
#       -DEPTH
#          -folder0
#             - image0.jpg
#             - image1.jpg
#             - image2.jpg
#             - ...
#          -folder1
#             - image0.jpg
#             - image1.jpg
#             - image2.jpg
#             - ...
#          -...
#       -RGB
#          -folder0
#             - image0.jpg
#             - image1.jpg
#             - image2.jpg
#             - ...
#          -folder1
#             - image0.jpg
#             - image1.jpg
#             - image2.jpg
#             - ...
#          -...
#     TEST -> follow the same structure as train
#

Setup

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import PIL
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

import pathlib
import os

import warnings
warnings.filterwarnings('ignore')

setting up the folder

In [2]:
#data_dir = pathlib.Path(archive).with_suffix('')
data_dir = pathlib.Path('TRAIN/DEPTH').with_suffix('')

image_count = len(list(data_dir.glob('*/*.jpg')))
print(image_count)

4500


Creating the dataset

In [3]:
batch_size = 32
#img_height = 270
#img_width  = 480
img_height = 120
img_width  = 160


train_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  labels='inferred',
  color_mode='rgb',
  validation_split=0.2,
  subset="training",
  seed=27,
  image_size=(img_height, img_width),
  batch_size=batch_size)

val_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  labels='inferred',
  color_mode='rgb',
  validation_split=0.2,
  subset="validation",
  seed=27,
  image_size=(img_height, img_width),
  batch_size=batch_size)

Found 4500 files belonging to 5 classes.
Using 3600 files for training.
Found 4500 files belonging to 5 classes.
Using 900 files for validation.


In [4]:
class_names = train_ds.class_names
print(class_names)

['1finger', '2finger', 'fist', 'palm', 'thumb_up']


Visualize the data (no need to be run unless you are curious of the data)

In [None]:
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(class_names[labels[i]])
        plt.axis("off")

In [None]:
for image_batch, labels_batch in train_ds:
    print(image_batch.shape)
    print(labels_batch.shape)
    break

Configure the dataset for performance

In [5]:
AUTOTUNE = tf.data.AUTOTUNE

#train_ds = train_ds.cache().shuffle(1024).prefetch(buffer_size=AUTOTUNE)
train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

Create the model - network

In [6]:
num_classes = len(class_names)

model = Sequential([
    layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),     # Normalize the input data
    layers.Conv2D(64, 3, padding='same', activation='relu', kernel_initializer='he_uniform'),
    layers.Conv2D(64, 3, padding='same', activation='relu', kernel_initializer='he_uniform'),
    #layers.BatchNormalization(),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, padding='same', activation='relu', kernel_initializer='he_uniform'),
    layers.Conv2D(128, 3, padding='same', activation='relu', kernel_initializer='he_uniform'),
    #layers.BatchNormalization(),
    layers.MaxPooling2D(),
    layers.Conv2D(256, 3, padding='same', activation='relu', kernel_initializer='he_uniform'),
    layers.Conv2D(256, 3, padding='same', activation='relu', kernel_initializer='he_uniform'),
    #layers.BatchNormalization(),
    layers.MaxPooling2D(),
    layers.Conv2D(512, 3, padding='same', activation='relu', kernel_initializer='he_uniform'),
    layers.Conv2D(512, 3, padding='same', activation='relu', kernel_initializer='he_uniform'),
    #layers.BatchNormalization(),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dropout(0.5, input_shape=(2,)),   
    layers.Dense(256, activation='relu', kernel_initializer='he_uniform'),
    layers.Dense(128, activation='relu', kernel_initializer='he_uniform'),
    layers.Dense(64, activation='relu', kernel_initializer='he_uniform'),
    layers.Dense(32, activation='relu', kernel_initializer='he_uniform'),
    layers.Dropout(0.3, input_shape=(2,)),   
    layers.Dense(num_classes, activation ='softmax')
])

Compile the model

In [7]:
model.compile(optimizer = 'adam',
              loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics = ['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 120, 160, 3)       0         
                                                                 
 conv2d (Conv2D)             (None, 120, 160, 64)      1792      
                                                                 
 conv2d_1 (Conv2D)           (None, 120, 160, 64)      36928     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 60, 80, 64)       0         
 )                                                               
                                                                 
 conv2d_2 (Conv2D)           (None, 60, 80, 128)       73856     
                                                                 
 conv2d_3 (Conv2D)           (None, 60, 80, 128)       147584    
                                                        

Train the model

In [None]:
epochs=35
history = model.fit(train_ds, validation_data=val_ds, epochs=epochs)


Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35

Visualize training results

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
#model.save('final_model.h5')

Predict on new data

In [None]:
#img = tf.keras.utils.load_img('TEST/RGB/2finger/frame596.jpg', target_size=(img_height, img_width))
img = tf.keras.utils.load_img('TEST/DEPTH/test1.png', target_size=(img_height, img_width))
#img = tf.keras.utils.load_img('TRAIN/DEPTH/1finger/img333.jpg', target_size=(img_height, img_width))

# preparing the image for prediction
img_array = tf.keras.utils.img_to_array(img)
img_array = tf.expand_dims(img_array, 0) # Create a batch

# running the prediction on the image array
predictions = model.predict(img_array)
score = tf.nn.softmax(predictions[0])

# display the image
plt.imshow(img)
plt.axis("off")

# print the prediction result
print(
    "This image most likely belongs to {} with a {:.2f} percent confidence."
    .format(class_names[np.argmax(score)], 100 * np.max(score)) )
print(predictions)
plt.show()


Convert the Keras Sequential model to a TensorFlow Lite model - and saving it

In [None]:
# Convert the model.
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the model.
with open('model.tflite', 'wb') as f:
  f.write(tflite_model)

# Print the signatures from the converted model
interpreter = tf.lite.Interpreter(model_content=tflite_model)
signatures  = interpreter.get_signature_list()
print(signatures)
