# ASL Translation
The aim of this project is to identify the ASL symbols shown in a webcam. This project uses the concept of Image Processing to identify the images. <br>
This is a part of the project to build a personal assistant that can take commands using ASL. This will be greatly useful for the people who have trouble hearing. <br>
This model has been trained to identify 6 letters, for the following commands. The personal assistant that has been used is Alexa. <br>
A -> setting an alarm <br>
C -> confirm command <br>
D -> volume down <br>
H -> Hi! <br>
J -> Tell me a joke <br>
R -> Redo command <br>
T -> setting a timer <br>
U -> volume Up <br>
W -> weather <br>


In [None]:
#mounting the Google Drive
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth, drive
from oauth2client.client import GoogleCredentials

In [None]:
#setting the line breaks in the output
from IPython.display import HTML, display

def set_css():
  display(HTML('''
  <style>
    pre {
        white-space: pre-wrap;
    }
  </style>
  '''))
  
get_ipython().events.register('pre_run_cell', set_css)


This model has been developed using tensorflow and keras. 

In [None]:
#importing the libraries

import tensorflow as tf
import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.models import Sequential
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Dropout
from keras.layers.core import Flatten
from keras.layers.core import Dense
from keras.layers.normalization import BatchNormalization

import os
import cv2

from sklearn.model_selection import train_test_split
from collections import Counter

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [None]:
BASE_DIR = "/content/drive/MyDrive/asl/train_new"
LETTERS = ["A","C","D", "H", "J", "R", "T", "U", "W"]

In [None]:
#checking for gpu
tf.test.is_gpu_available()

In [None]:
!nvidia-smi

# Data Preparation

The images are loaded from the folder in the Google drive. They are preprocessed and converted to a numpy array.

In [None]:
#variables to hold the images and their labels
X = []
y =[]

In [None]:
def preprocess_img(img: np.array)-> np.array:
  """
  A function to preprocess the RGB image

  This function resizes the image and applies the Canny Edge detection algorithm to it. It then converts
  the image back to RGB scale.

  Args:
  img: numpy array representation of image

  Returns:
  A numpy array of the image after preprocessing
  """

  #resizing the image to 299x299
  img = cv2.resize(img, (299,299))

  #applying the Canny edge detector
  img = cv2.Canny(img, 40,110)

  #converting the grayscale image back to RGB
  img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    


In [None]:
for ind, alpha in enumerate(LETTERS):
  #path of the images of each letter is of the form BASE_DIR/alpha. 
  current_dir = BASE_DIR+"/"+alpha
  
  #getting the file names in the directory
  file_list = list(os.listdir(current_dir))

  for fname in file_list:
    #reading the image
    img = cv2.imread(os.path.join(current_dir, fname), 1)

    #preprocessing
    img = cv2.resize(img, (299,299))

    #applying the Canny edge detector
    img = cv2.Canny(img, 40,110)

    #converting the grayscale image back to RGB
    img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    
    

    #adding the numpy array to X
    X.append(img)
    #adding the corresponding label to y
    y.append(ind)

  #checking the progress
  print("done with ", alpha)
  



In [None]:
#converting X and y to numpy arrays
X = np.array(X)
y = np.array(y)

print(X.shape)
print(y.shape)

(3600, 299, 299, 3)
(3600,)


In [None]:
#splitting into train and validation sets. Here, 85% of the data is used for training and 15% for validation
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.15, random_state=42)

In [None]:
print("Training set size: ", X_train.shape)
print("Test set size: ", X_val.shape)

In [None]:
#verifying that the data distribution is uniform
train_set_labels = dict(Counter(y_train))
val_set_labels = dict(Counter(y_val))

In [None]:
print(train_set_labels)

In [None]:
print(val_set_labels)

displaying a pie chart of the data distribution

In [None]:
train_keys = list(train_set_labels.keys())
train_values= list(train_set_labels.values())

fig = plt.figure(figsize =(10, 7)) 
plt.pie(train_values, labels = train_keys) 
plt.show()

In [None]:
val_keys = list(val_set_labels.keys())
val_values= list(val_set_labels.values())

fig = plt.figure(figsize =(10, 7)) 
plt.pie(val_values, labels = val_keys) 
plt.show()

# Developing the model

In [None]:
tf.keras.backend.clear_session()

In [None]:
#since this is a image processing task, CNN model has been used.

model = tf.keras.Sequential()

# 1st CONV block
model.add(tf.keras.layers.Input(shape = (299,299,3)))
model.add(Conv2D(64, kernel_size=(5,5), strides=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())

# 2nd CONV block
model.add(Conv2D(128, kernel_size=(3,3), strides=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.2))

# 3rd CONV block
model.add(Conv2D(256, kernel_size=(3,3), strides=(1,1), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.2))



#flatten
model.add(Flatten())

#fully connected layers
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))

#since there are 9 classes, output layer with 9 units and a softmax activation function
model.add(Dense(9, activation='softmax'))



In [None]:
#getting the model summary
model.summary()

In [None]:
#backpropagation algorithm used is Adam with a lr of 0.001
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

#compiling the model with categorical_crossentropy function
model.compile(optimizer = optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
#converting the y matrices to one-hot vectors
y_train_one_hot = tf.keras.utils.to_categorical(y_train)
y_val_one_hot = tf.keras.utils.to_categorical(y_val)

In [None]:
y_train_one_hot.shape

In [None]:
#training the model
history = model.fit(x= X_train, y= y_train_one_hot, batch_size=64,
                    epochs = 15, verbose = 1, 
                    validation_data = (X_val, y_val_one_hot), shuffle=True, 
                    validation_batch_size = 64)

# Evaluating the model on the validation set

In [None]:
#getting the predictions of the model on the validation set
pred_labels = np.argmax(model.predict(X_val), axis=-1)

In [None]:
# confusion matrix of the predictions
print(confusion_matrix(y_val, pred_labels))

[[80  0  0  0  0  0  0  0  0]
 [ 0 60  0  0  0  0  0  0  0]
 [ 1  0 64  0  0  0  0  0  0]
 [ 0  0  0 56  0  0  0  0  0]
 [ 0  0  0  0 62  0  0  0  0]
 [ 0  0  0  0  0 52  0  1  0]
 [ 0  0  0  0  0  0 55  1  0]
 [ 0  0  0  0  0  0  0 53  4]
 [ 0  0  0  0  1  0  0  0 50]]


In [None]:
#classification report of precision-recall values, f1 score 
print(classification_report(y_val, pred_labels))

              precision    recall  f1-score   support

           0       0.99      1.00      0.99        80
           1       1.00      1.00      1.00        60
           2       1.00      0.98      0.99        65
           3       1.00      1.00      1.00        56
           4       0.98      1.00      0.99        62
           5       1.00      0.98      0.99        53
           6       1.00      0.98      0.99        56
           7       0.96      0.93      0.95        57
           8       0.93      0.98      0.95        51

    accuracy                           0.99       540
   macro avg       0.98      0.98      0.98       540
weighted avg       0.99      0.99      0.99       540



# Testing

In [None]:
def predict_asl_sign(file_path: str)-> None:
  """
  A function to detect the ASL sign in the image

  Args:
  file_path: A string which is the filename to be processed.

  Returns:
  It returns None. It displays the predicted class value.
  """

  img = cv2.imread(file_path,1)
  img = preprocess_img(img)

  #displaying the image
  plt.imshow(img)

  #reshaping to be fed into the model
  img = np.reshape(img, (1,299,299,3))

  #getting the predictions
  pred_class = np.argmax(model2.predict(img), axis=-1)


  print("result: ", pred_class)
  print("letter is: ", letters[pred_class[0]])


In [None]:
predict_asl_sign("/content/A.jpg")

In [None]:
predict_asl_sign("/content/D.jpg")

In [None]:
predict_asl_sign("/content/H.jpg")

In [None]:
predict_asl_sign("/content/T.jpg")

In [None]:
predict_asl_sign("/content/U.jpg")

In [None]:
predict_asl_sign("/content/W.jpg")

saving the keras model weights

In [None]:
model.save_weights("asl_edges_extended.h5")