https://www.kaggle.com/code/irfanarisani/asl-recognition-based-on-cnn-architecture

In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import os, shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import splitfolders

# import required packaged for CNN
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, BatchNormalizationV2
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.callbacks import ModelCheckpoint

In [2]:
# Copy datasets to current working directory
dirData = os.path.join(os.getcwd(), "Data")

# Specify the source folder path
source_folder = os.path.join(dirData, "Train_ABCD")

# Specify the destination folder path
destination_folder = os.path.join(dirData, "input")

# Copy the source folder to the destination folder
if not os.path.exists(destination_folder):
    shutil.copytree(source_folder, destination_folder)
    print("Successfully copied Train folder to the input folder")
else:
    print("Train folder has already been copied")

Successfully copied Train folder to the input folder


In [3]:
# Start clean
try:
    shutil.rmtree(os.path.join(dirData, 'splitdata'))
    shutil.rmtree(os.path.join(dirData, 'train'))
    shutil.rmtree(os.path.join(dirData, 'val'))
except:
    pass

In [4]:
def split_data_train_val(dirData: str) -> None:
    """Split the training data into 'train' and 'val' folders.

    Args:
        dirData: The directory path of the data.

    Returns:
        None
    """

    split_data_dir = os.path.join(dirData, "splitdata")

    # Check if the train-val split already exists
    if os.path.exists(split_data_dir):
        print("Train-val split already exists.")
        return

    # Perform the train-val split
    try:
        splitfolders.ratio(
            os.path.join(dirData, 'input'),
            output=os.path.join(dirData, 'splitdata'),
            ratio=(0.80, 0.20)
        )
        print("Successfully split up the data")
    except Exception as e:
        print("Splitting failed:", str(e))


# split data training into two folders, named 'train' and 'val'
split_data_train_val(dirData)


Successfully split up the data


In [5]:
# delete the folder named 'input'
try:
    shutil.rmtree(os.path.join(dirData, 'input'))
except:
    pass

In [6]:
def move_folder_up_two_levels(folder_path):
    """Move a folder up two levels within the directory structure.
    
    Args:
        folder_path (str): The path of the folder to be moved.
    
    Returns:
        None
    
    Raises:
        FileNotFoundError: If the specified folder does not exist.
    """

    # Check if the folder exists
    if not os.path.exists(folder_path):
        raise FileNotFoundError(f"Folder '{folder_path}' does not exist.")

    # Get the parent directory path
    parent_dir = os.path.dirname(folder_path)

    # Get the grandparent directory path
    grandparent_dir = os.path.dirname(parent_dir)

    # Specify the new destination path
    new_folder_path = os.path.join(grandparent_dir, os.path.basename(folder_path))

    # Move the folder to the new destination
    shutil.move(folder_path, new_folder_path)

    print(f"Folder moved successfully to {new_folder_path}")


In [7]:
move_folder_up_two_levels(os.path.join(dirData, 'splitdata\\train'))
move_folder_up_two_levels(os.path.join(dirData, 'splitdata\\val'))

Folder moved successfully to c:\Users\DhrCS\Documents\GitHub\American_Sign_language\Data\train
Folder moved successfully to c:\Users\DhrCS\Documents\GitHub\American_Sign_language\Data\val


In [8]:
os.listdir(dirData)

['splitdata',
 'Test',
 'Test_ABCD',
 'train',
 'Train_ABCD',
 'Train_Original',
 'val']

# Modelling

In [9]:
# remember our data directory, now take the 'train' and 'val' paths
dirTrain = os.path.join(dirData, 'train')
dirVal = os.path.join(dirData, 'val')

In [10]:
# make sure the number of labels matches
if os.listdir(dirTrain) == os.listdir(dirVal):
    labels = os.listdir(dirTrain)
labels

['A', 'B', 'C', 'D']

In [11]:
# settings new images for data training
setTrain = ImageDataGenerator(rescale = 1./255,
                              horizontal_flip = True, # remember to also build for right hand
                              fill_mode = 'nearest',
                              brightness_range = [0.6, 2.1],
                              zoom_range = 0.32)

# settings new images for data validation
setVal = ImageDataGenerator(rescale = 1./255)

In [12]:
# generate new images for data training
genTrain = setTrain.flow_from_directory(dirTrain,
                                        target_size = (200, 200),
                                        batch_size = 32,
                                        class_mode = 'categorical',
                                        seed = 250)

# generate new images for data validation
genVal = setVal.flow_from_directory(dirVal,
                                    target_size = (200, 200),
                                    batch_size = 32,
                                    class_mode = 'categorical',
                                    seed = 250)
# does the label match? good!
if genTrain.class_indices == genVal.class_indices:
    print(genTrain.class_indices)

Found 19184 images belonging to 4 classes.
Found 4800 images belonging to 4 classes.
{'A': 0, 'B': 1, 'C': 2, 'D': 3}


In [13]:
# i don't know why but, it seems this model is inspired by VGG architecture
def mainModel():
    model = Sequential()
    inputSize = (200, 200, 3)
    
    model.add(Conv2D(32, (7, 7), activation = 'relu',
                     input_shape = inputSize))
    model.add(MaxPooling2D(2, 2))
    
    model.add(Conv2D(64, (5, 5), activation = 'relu'))
    model.add(MaxPooling2D(2, 2))
    
    model.add(Conv2D(128, (3, 3), activation = 'relu'))
    model.add(Dropout(0.3))
    model.add(MaxPooling2D(2, 2))
    
    model.add(Flatten())
    model.add(Dense(512, activation = 'relu'))
    model.add(Dropout(0.3))
    model.add(Dense(len(genTrain.class_indices), # remember the number of letters of the alphabet, it's 26!
                    activation = 'softmax'))
    
    return model

# show the model summary
model = mainModel()
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 194, 194, 32)      4736      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 97, 97, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 93, 93, 64)        51264     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 46, 46, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 44, 44, 128)       73856     
                                                                 
 dropout (Dropout)           (None, 44, 44, 128)       0

In [14]:
# compile the model
model.compile(loss = 'categorical_crossentropy',
              optimizer = tf.optimizers.Adam(learning_rate = 0.0015),
              metrics = ['accuracy'])

In [15]:
# train the model
model.fit(genTrain,
          batch_size = 32,
          steps_per_epoch = 32,
          epochs = 25,
          validation_data = genVal,
          validation_steps = 2,
          verbose = 2)

: 

: 