# Sign Language Recognition - Second Model

### Importing needed Libraries

In [1]:
import cv2
import numpy as np

#Importing os to make it easier to work with file paths
import os  

### Creating Folders to Store Collected Training Data

In [2]:
currentDirectory = os.getcwd()

IMAGES_PATH = os.path.join(currentDirectory , 'Sign_Training_Images')

signs = np.array(['Peace', 'I Love You', 'Good', 'House'])

# Note that the number of frames specified here, however only 300 will be used for training and testing 
# and the first 50 will be disregarded. (As during the first few frames the hand movement is still getting adjusted and are not an accurate representation of the sign)
framesPerSign = 350


In [3]:
for sign in signs:
    try:
        os.makedirs(os.path.join(IMAGES_PATH, sign))
    except:
        pass

### Background Subtraction

In [4]:
background = None

In [5]:
def running_average(image, weight):
    
    global background

    if background is None:
        background = image.copy().astype("float")
        return


    cv2.accumulateWeighted(image, background, weight)

### Hand Segmentation

In [6]:
def HandSegmentation(image, threshold = 25):
    
    global background

    differenceImage = cv2.absdiff(background.astype("uint8"), image)


    thresholdImage = cv2.threshold(differenceImage, threshold, 255, cv2.THRESH_BINARY)[1]


    contours, _ = cv2.findContours(thresholdImage.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)


    if len(contours) == 0:
        
        return
    
    else:

        segmentedImage = max(contours, key=cv2.contourArea)
        
        return (thresholdImage, segmentedImage)

### Collecting Sign Images for Training and Testing

In [7]:
def collectTrainingData(sign):
    
    global IMAGES_PATH
    global framesPerSign
    
    weight = 0.5
    
    camera = cv2.VideoCapture(0)

    ROItop, ROIright, ROIbottom, ROIleft = 20, 460, 260, 725

    frameNumber = 0
    
    backgroundFrames = 100

    while(frameNumber < backgroundFrames + framesPerSign):

        
        ret, frame = camera.read()

        frame = cv2.flip(frame, 1)

        
        regionOfInterest = frame[ROItop : ROIbottom, ROIright : ROIleft]


        grayscaleImage = cv2.cvtColor(regionOfInterest, cv2.COLOR_BGR2GRAY)
        grayscaleImage = cv2.GaussianBlur(grayscaleImage, (7, 7), 0)


        if frameNumber < backgroundFrames:
            
            running_average(grayscaleImage, weight)
            cv2.putText(frame, "Collecting Background...",(160, 320), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
        
        else:

            handRegion = HandSegmentation(grayscaleImage)
            
            if handRegion is not None:
                
                thresholdImage, segmentedImage = handRegion

                
                cv2.drawContours(frame, [segmentedImage + (ROIright, ROItop)], -1, (0, 0, 255))                    

                cv2.putText(frame, 'Collecting Frames for {} Image Number {}'.format(sign,frameNumber - backgroundFrames), (15,12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)  
                cv2.imshow("Theshold Image", thresholdImage)
                cv2.imwrite(IMAGES_PATH + '/' + sign + '/'+ str(frameNumber - backgroundFrames)+'.jpg', thresholdImage)

        
        cv2.rectangle(frame, (ROIleft, ROItop), (ROIright, ROIbottom), (0,255,0), 2)

        
        frameNumber += 1


            
        cv2.imshow("Sign Language Recognition", frame)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

   
    camera.release()
    cv2.destroyAllWindows()
    cv2.waitKey(1)

In [8]:
collectTrainingData('Peace')

In [9]:
collectTrainingData('I Love You')

In [11]:
collectTrainingData('Good')

In [12]:
collectTrainingData('House')

### Building and Training CNN

In [13]:
# !pip install keras

In [8]:
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Activation, Dense, Flatten, BatchNormalization, Conv2D, MaxPool2D
from keras.optimizers import SGD
from keras.metrics import categorical_crossentropy
from keras.preprocessing.image import ImageDataGenerator
import warnings
from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import ModelCheckpoint, EarlyStopping

warnings.simplefilter(action='ignore', category=FutureWarning)

In [24]:
signImages = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input,validation_split=0.2)

In [25]:
trainingData = signImages.flow_from_directory(directory=IMAGES_PATH, target_size=(64,64),subset="training", class_mode='categorical', batch_size=10,shuffle=True)
testingData = signImages.flow_from_directory(directory=IMAGES_PATH, target_size=(64,64), subset="validation",class_mode='categorical', batch_size=10, shuffle=True)

Found 960 images belonging to 4 classes.
Found 240 images belonging to 4 classes.


In [9]:
model = Sequential()

model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(64,64,3)))
model.add(MaxPool2D(pool_size=(2, 2), strides=2))

model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding = 'same'))
model.add(MaxPool2D(pool_size=(2, 2), strides=2))

model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding = 'valid'))
model.add(MaxPool2D(pool_size=(2, 2), strides=2))

model.add(Flatten())

model.add(Dense(64,activation ="relu"))
model.add(Dense(128,activation ="relu"))
model.add(Dense(128,activation ="relu"))

model.add(Dense(4,activation ="softmax"))

In [10]:
model.compile(optimizer=SGD(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
reduceLearningRate = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=1, min_lr=0.0005)
earlyStopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=0, mode='auto')

In [28]:
model.fit(trainingData, epochs=10, callbacks=[reduceLearningRate, earlyStopping],  validation_data = testingData)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


<tensorflow.python.keras.callbacks.History at 0x7fd202352e10>

In [29]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 62, 62, 32)        896       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 31, 31, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 31, 31, 64)        18496     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 15, 15, 64)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 13, 13, 128)       73856     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 6, 6, 128)         0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 4608)             

### Saving / Loading model weights

In [30]:
model.save('Second_Sign_Language_Model.h5')

In [11]:
model.load_weights('Second_Sign_Language_Model.h5')

### Evaluating Model Prediction Accuracy

In [32]:
testImages, testImageslabels = next(testingData)

modelEvaluationScore = model.evaluate(testImages, testImageslabels, verbose=0)


print(f' The Model has a Loss of {modelEvaluationScore[0]} and an Accuracy of {modelEvaluationScore[1] * 100} %')


 The Model has a Loss of 0.0015983032062649727 and an Accuracy of 100.0 %


In [12]:
label_map = {0:'Good',1:'House',2:'I Love You',3:'Peace'}

predictions = model.predict(testImages, verbose=0)


In [34]:
print('Actual Labels')
print('--------------------------------')

for label in testImageslabels :
    print(f'{label_map[np.argmax(label)]}  ')

    
print('\n')    
print("prediction Labels")
print('--------------------------------')

for ind, i in enumerate(predictions):
    print(f'{label_map[np.argmax(i)]}  ')

Actual Labels
--------------------------------
House  
I Love You  
Peace  
I Love You  
I Love You  
House  
House  
I Love You  
I Love You  
House  


prediction Labels
--------------------------------
House  
I Love You  
Peace  
I Love You  
I Love You  
House  
House  
I Love You  
I Love You  
House  


### Making Predictions in Real Time

In [None]:
weight = 0.5

camera = cv2.VideoCapture(0)

ROItop, ROIright, ROIbottom, ROIleft = 20, 460, 260, 725

frameNumber = 0

backgroundFrames = 100

while(True):

        
    ret, frame = camera.read()

    frame = cv2.flip(frame, 1)

        
    regionOfInterest = frame[ROItop : ROIbottom, ROIright : ROIleft]


    grayscaleImage = cv2.cvtColor(regionOfInterest, cv2.COLOR_BGR2GRAY)
    grayscaleImage = cv2.GaussianBlur(grayscaleImage, (7, 7), 0)


    if frameNumber < backgroundFrames:
            
        running_average(grayscaleImage, weight)
        cv2.putText(frame, "Collecting Background...", (160, 320), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
        
    else:

        handRegion = HandSegmentation(grayscaleImage)
            
        if handRegion is not None:
                
            thresholdImage, segmentedImage = handRegion

                
            cv2.drawContours(frame, [segmentedImage + (ROIright, ROItop)], -1, (0, 0, 255))                    

            
            cv2.imshow("Theshold Image", thresholdImage)
            
            thresholdImage = cv2.resize(thresholdImage, (64, 64))
            thresholdImage = cv2.cvtColor(thresholdImage, cv2.COLOR_GRAY2RGB)
            thresholdImage = np.reshape(thresholdImage, (1,thresholdImage.shape[0],thresholdImage.shape[1],3))
            
            
            prediction = model.predict(thresholdImage)
            
            cv2.putText(frame, label_map[np.argmax(prediction)], (180, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
        
    cv2.rectangle(frame, (ROIleft, ROItop), (ROIright, ROIbottom), (0,255,0), 2)

        
    frameNumber+= 1


            
    cv2.imshow("Sign Language Recognition", frame)

    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

camera.release()
cv2.destroyAllWindows()
cv2.waitKey(1)