# Sign to Speech Converter (initial notebook)
## This Jypyter Notebook shows how the code was built before building the main python script ('application.py').
## **Note** : This notebook is recommended to be tested on Google Colab if the required libraries are not installed on
## the system. Also the paths for creating folders correspond to the local machine on which this was tested.
## **Please ensure to make necessary changes to the paths before executing the cells** 

In [None]:
import cv2
import numpy as np
import tensorflow as tf
import glob
import pyttsx3

In [6]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Activation, Dense, Flatten, BatchNormalization, Conv2D, MaxPool2D, Dropout
from keras.optimizers import Adam, SGD
from keras.metrics import categorical_crossentropy
from keras.preprocessing.image import ImageDataGenerator
import warnings

from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import ModelCheckpoint, EarlyStopping
warnings.simplefilter(action='ignore', category=FutureWarning)

In [7]:
import os   # accessing folder paths
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as img
import patoolib  # This is useful to extract zip files 
import cv2,glob 
import shutil   # this module is used to do file accessing operations

In [8]:
num_classes = {1:'1',2:'2',3:'3', 4:'4', 5:'5', 6:'6', 7:'7', 8:'8',
           9:'9'}
alpha_classes = {1:'A',2:'B',3:'C',4:'D',5:'E',6:'F',7:'G',8:'H',9:'I',
           10:'J',11:'K',12:'L',13:'M',14:'N',15:'O',16:'P',17:'Q',18:'R',19:'S',20:'T',21:'U',
           22:'V',23:'W',24:'X',25:'Y',26:'Z'}
mylist = [x for x in alpha_classes.values()]
print(mylist[8:])
words_data = {1:'All_The_Best', 2:'Hi!!', 3: 'I_Love_you', 4: 'No', 5:'Super!!', 6:'Yes'}

['I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']


In [None]:
# create directory for dataset
# first create directories inthe name of class keys
# You need to run this only once.
print('Creating Folders for data. Please wait...')
for dir_name in alpha_classes.values():
    !mkdir {'G:\\gestures\\train\\' + dir_name}
    !mkdir {'G:\\gestures\\test\\' + dir_name}
print('Done.')

In [None]:
# creates data for words
# RUN THIS ONLY ONCE 
print('Creating Folders for words data. Please wait....')
for dir_name in words_data.values():
    !mkdir {'G:\\gestures\\words_data\\test\\' + dir_name}
    !mkdir {'G:\\gestures\\words_data\\train\\' + dir_name}
    
print('Done!!!')

In [5]:
background = None
accumulated_weight = 0.7
mask_color = (0.0,0.0,0.0)

ROI_top = 100
ROI_bottom = 300
ROI_right = 300
ROI_left = 500

## Below are helper functions

In [6]:
# This function is used to calculate accumulated_weights in the frame
def cal_accum_avg(frame, accumulated_weight):

    global background
    
    if background is None:
        background = frame.copy().astype("float")
        return None

    cv2.accumulateWeighted(frame, background, accumulated_weight)

In [7]:
# This function segments the hand region found in the frame, if not found returns None.
def segment_hand(frame, threshold=50):
    global background
    
    diff = cv2.absdiff(background.astype("uint8"), frame)

    
    _ , thresholded = cv2.threshold(diff, threshold, 255,cv2.THRESH_BINARY)
    
    edges = cv2.Canny(thresholded, threshold1= 50, threshold2=250)
    cv2.imshow('edges',thresholded)
    
     #Fetching contours in the frame (These contours can be of hand
    #or any other object in foreground) …

    contours, hierarchy = cv2.findContours(thresholded.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
    
    # If length of contours list = 0, means we didn't get any
    #contours...
    if len(contours) == 0:
        return None
    else:
        # The largest external contour should be the hand
        # contour_info = [(c, cv2.contourArea(c),) for c in contours[1]]

        #cntrs, heirs = cv2.findContours(thresholded.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)

        contour_info = [(c, cv2.contourArea(c),) for c in contours]
        #for c in contours[1]:
        #    contour_info.append((c,cv2.contourArea(c),))
        
        hand_segment_max_cont = max(contours, key=cv2.contourArea)
        
        # Returning the hand segment(max contour) and the
  # thresholded image of hand and contour_info list
    return (thresholded, hand_segment_max_cont, contour_info)

In [10]:
## Initialize tbase dir, train_dir, test_dir
#base_dir = 'G:\\gestures\\words_data\\' # un comment this to recognize words
base_dir = 'G:\\gestures\\'            # uncomment this to recognisze numbers
train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir,'test')

# Here we create the data_set for word recognition
## **Note** : Run this cell only when you want to create data
### Make sure you have created corresponding directories  

In [None]:
import time
for element in alpha_classes.values():
    cam = cv2.VideoCapture(0)
    num_frames = 0
    num_imgs_taken = 0
    time.sleep(7)
    print('#################################################')
    print(f'Show sign for {element}!')

    print(f'Creating data for {element}.....')
    
    while True:
        ret, frame = cam.read()

        # flipping the frame to prevent inverted image of captured frame...
        frame = cv2.flip(frame, 1)

        frame_copy = frame.copy()

        roi = frame[ROI_top:ROI_bottom, ROI_right:ROI_left]

        gray_frame = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        gray_frame = cv2.GaussianBlur(gray_frame, (9, 9), 0)

        if num_frames < 60:
            cal_accum_avg(gray_frame, accumulated_weight)
            if num_frames <= 59:
                cv2.putText(frame_copy, "FETCHING BACKGROUND...PLEASE WAIT",
                            (80, 400), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0,0,255), 2)
                
        #Time to configure the hand specifically into the ROI...
        elif num_frames <= 300: 

            hand = segment_hand(gray_frame)
            cv2.putText(frame_copy, "Adjust hand gesture for..",
                            (200, 400), cv2.FONT_HERSHEY_SCRIPT_SIMPLEX, 1, (0,0,255), 2)
                
            if hand is not None:
                
                thresholded, hand_segment, contour_info = hand

                # Draw contours around hand segment
                cv2.drawContours(frame_copy, [hand_segment + (ROI_right,
                ROI_top)], -1, (255, 0, 0),1)
                
                cv2.putText(frame_copy, str(num_frames)+"For" + str(element),
                            (70, 45), cv2.FONT_HERSHEY_SCRIPT_SIMPLEX, 1, (0,0,255), 2)

                # Also display the thresholded image
                cv2.imshow("Thresholded Hand Image", thresholded)
        
        else: 
            
            # Segmenting the hand region...
            hand = segment_hand(gray_frame)
            
            # Checking if we are able to detect the hand...
            if hand is not None:
                
                # unpack the thresholded img and the max_contour...
                thresholded, hand_segment,contour_info = hand

                # Drawing contours around hand segment
                cv2.drawContours(frame_copy, [hand_segment + (ROI_right,
                ROI_top)], -1, (255, 0, 0),1)
                
                cv2.putText(frame_copy, str(num_frames), (70, 45),cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2)
                
                cv2.putText(frame_copy,"Adjust hand gesture for..",(200, 400),cv2.FONT_HERSHEY_SCRIPT_SIMPLEX, 1,(0,0,                              255)                                                                                                                   , 2)
                # Displaying the thresholded image
                cv2.imshow("Thresholded Hand Image", thresholded)
                if num_imgs_taken <= 70:
                    cv2.imwrite(test_dir + '\\' +str(element)+"\\" + str(num_imgs_taken) + '.jpg',                                   thresholded)
                else:
                    break
                num_imgs_taken +=1
            else:
                cv2.putText(frame_copy, 'No hand detected...', (200, 400), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2)



        # Drawing ROI on frame copy
        cv2.rectangle(frame_copy, (ROI_left, ROI_top), (ROI_right,ROI_bottom), (255,128,0), 3)
        
        cv2.putText(frame_copy, "Sign languge recognition_ _ _", (10, 20), cv2.FONT_ITALIC, 0.5, (51,255,51), 1)
        
        # increment the number of frames for tracking
        num_frames += 1

        # Display the frame with segmented hand
        cv2.imshow("Sign Detection", frame_copy)

        # Closing windows with Esc key...(any other key with ord can be used too.)
        k = cv2.waitKey(1) & 0xFF

        if k == 27:
            break

    # Releasing the camera & destroying all the windows...

    cv2.destroyAllWindows()
    cam.release()
    
    print('Done!')

In [None]:
cam.release()

In [9]:
## Initialize tbase dir, train_dir, test_dir
#base_dir = 'G:\\gestures\\words_data\\' # un comment this to recognize words
base_dir = 'G:\\gestures\\alpha_data'            # uncomment this to recognisze numbers
train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir,'test')

# Create image data generators for test and train batches

In [None]:
# This cell creates data generators for train and test images.
train_batches = ImageDataGenerator(rescale=1./255, rotation_range=40, zoom_range=0.2,
      horizontal_flip=True).flow_from_directory(directory=train_dir, target_size=(64,64), class_mode='categorical', batch_size=100,shuffle=True)

test_batches = ImageDataGenerator(rescale=1./255, rotation_range=40, zoom_range=0.2,
      horizontal_flip=True).flow_from_directory(directory=test_dir, target_size=(64,64), class_mode='categorical', batch_size=10, shuffle=True)

# Plot few images to check

In [None]:
imgs, labels = next(train_batches)
print(words_data)
#Plotting the images...
def plotImages(images_arr):
    fig, axes = plt.subplots(1, 10, figsize=(50,50))
    axes = axes.flatten()
    for img, ax in zip( images_arr, axes):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        ax.imshow(img)
        ax.axis('off')
    plt.tight_layout()
    plt.show()


plotImages(imgs)
print('Actual labels:')

for i in labels:
    print(alpha_classes[np.argmax(i) + 1],end = '  ')


# Initializing the SEQUENTIAL model

In [None]:
model = Sequential()

model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(64,64,3)))
model.add(MaxPool2D(pool_size=(2, 2), strides=2))

model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding = 'same'))
model.add(MaxPool2D(pool_size=(2, 2), strides=2))

model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding = 'valid'))
model.add(MaxPool2D(pool_size=(2, 2), strides=2))

model.add(Flatten())

model.add(Dense(64,activation ="relu"))
model.add(Dense(128,activation ="relu"))
#model.add(Dropout(0.2))
model.add(Dense(128,activation ="relu"))
#model.add(Dropout(0.3))
model.add(Dense(26,activation ="softmax"))

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 62, 62, 32)        896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 31, 31, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 31, 31, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 15, 15, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 13, 13, 128)       73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 6, 6, 128)         0         
_________________________________________________________________
flatten (Flatten)            (None, 4608)              0

In [None]:
#model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
#reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=1, min_lr=0.0001)
#early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=0, mode='auto')

In [None]:
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
#model.compile(optimizer=SGD(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
#reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=1, min_lr=0.0005)
#early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=0, mode='auto')

In [None]:
BATCH_SIZE = 100
epochs=20
print(int(np.ceil(train_batches.n/ float(BATCH_SIZE))))

history = model.fit(
    train_batches,
    steps_per_epoch=int(np.ceil(train_batches.n/ float(BATCH_SIZE))),
    epochs=epochs,
    validation_data=test_batches,
    validation_steps=int(np.ceil(test_batches.n/ float(BATCH_SIZE)))
)

In [None]:
#history2 = model.fit(train_batches, epochs=10, callbacks=[reduce_lr, early_stop],  validation_data = test_batches)
#history2 = model.fit(train_batches, epochs=25,  validation_data = test_batches)

## Here we print the accuracy of the model

In [None]:
# For getting next batch of testing imgs...
imgs, labels = next(test_batches)

scores = model.evaluate(imgs, labels, verbose=0)
print(f'{model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')


#Once the model is fitted we save the model using model.save()  function.


#model.save('best_model_dataflair3.h5')

In [None]:
acc = history2.history['accuracy']
val_acc = history2.history['val_accuracy']

loss = history2.history['loss']
val_loss = history2.history['val_loss']

epochs_range = range(25)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
model.save('alpha_model.h5')

# Prediction with model

In [9]:
model = keras.models.load_model('new_words_model.h5')
background = None
text_to_speak = {1:'All The Best', 2:'Hello', 3: 'I Love you', 4: 'No', 5:'Super!!', 6:'Yes'} # used to speak the sign
text_to_speak_numbers = num_classes
text_to_speak_alpha = alpha_classes

In [23]:
def predict():
    
    cam = cv2.VideoCapture(0)
    num_frames =0
    pred = None
    while True:
        ret, frame = cam.read()

        # flipping the frame to prevent inverted image of captured
        #frame...
        
        frame = cv2.flip(frame, 1)

        frame_copy = frame.copy()

        # ROI from the frame
        roi = frame[ROI_top:ROI_bottom, ROI_right:ROI_left]

        gray_frame = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        gray_frame = cv2.GaussianBlur(gray_frame, (9, 9), 0)


        if num_frames < 70:
            
            cal_accum_avg(gray_frame, accumulated_weight)
            
            cv2.putText(frame_copy, "FETCHING BACKGROUND...PLEASE WAIT",
    (80, 400), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0,0,255), 2)
        
        else: 
            # segmenting the hand region
            hand = segment_hand(gray_frame)
            
            # Checking if we are able to detect the hand...
            if hand is not None:
                
                thresholded, hand_segment,contour_info = hand

                # Drawing contours around hand segment
                cv2.drawContours(frame_copy, [hand_segment + (ROI_right,
        ROI_top)], -1, (255, 0, 0),1)
                
                cv2.imshow("Thesholded Hand Image", thresholded)
                
                thresholded = cv2.resize(thresholded, (64, 64))
                thresholded = cv2.cvtColor(thresholded,
    cv2.COLOR_GRAY2RGB)
                thresholded = np.reshape(thresholded,
    (1,thresholded.shape[0],thresholded.shape[1],3))

                prev = text_to_speak[np.argmax(pred) + 1]
                pred = model.predict(thresholded)
                #print(pred)
                cv2.putText(frame_copy, text_to_speak[np.argmax(pred) + 1],
    (300, 45), cv2.FONT_HERSHEY_SCRIPT_SIMPLEX, 1, (0,0,255), 2)
                
                if text_to_speak[np.argmax(pred) + 1] != prev:
                    engine = pyttsx3.init()
                    engine.say(text_to_speak[np.argmax(pred) + 1])
                    engine.runAndWait()
                    prev_num_frames = num_frames
                        
                
        # Draw ROI on frame_copy
        cv2.rectangle(frame_copy, (ROI_left, ROI_top), (ROI_right,
        ROI_bottom), (255,128,0), 3)

        # incrementing the number of frames for tracking
        num_frames += 1
        #print(pred)
        #if pred != None:
            

        # Display the frame with segmented hand
        cv2.putText(frame_copy, "Indian sign language recognition_ _ _",
        (10, 20), cv2.FONT_ITALIC, 0.5, (51,255,51), 1)
        cv2.imshow("Sign Detection", frame_copy)


        # Close windows with Esc
        k = cv2.waitKey(1) & 0xFF

        if k == 27:
            break

    # Release the camera and destroy all the windows
    cam.release()
    cv2.destroyAllWindows()

In [13]:
cam.release()

NameError: name 'cam' is not defined

# Prediction with tkinter app

In [10]:
model = keras.models.load_model('new_words_model.h5')

In [14]:
import tkinter as tk
from PIL import ImageTk, Image
from tkinter import filedialog
import numpy as np
import tensorflow 
import cv2

In [15]:
def load_img():
    global img, image_data
    for img_display in frame.winfo_children():
        img_display.destroy()

    image_data = filedialog.askopenfilename(initialdir="/", title="Choose an image",
                                       filetypes=(("all files", "*.*"), ("jpg files", "*.jpg")))
    basewidth = 150
    img = Image.open(image_data)
    wpercent = (basewidth / float(img.size[0]))
    hsize = int((float(img.size[1]) * float(wpercent)))
    img = img.resize((basewidth, hsize), Image.ANTIALIAS)
    img = ImageTk.PhotoImage(img)
    file_name = image_data.split('/')
    panel = tk.Label(frame, text= str(file_name[len(file_name)-1]).upper()).pack()
    panel_image = tk.Label(frame, image=img).pack()

In [16]:
text_to_speak = {1:'All The Best', 2:'Hello', 3: 'I Love you', 4: 'No', 5:'Super!!', 6:'Yes'}

In [17]:
def classify():
    original = cv2.imread(image_data)
    thresholded = cv2.resize(original, (64, 64))
    
    thresholded = np.reshape(thresholded,
(1,thresholded.shape[0],thresholded.shape[1],3))

    pred = model.predict(thresholded)
    
    string = text_to_speak[np.argmax(pred) + 1]

    table = tk.Label(frame, text="Predicted sign ").pack()

    result = tk.Label(frame, text= string.upper()).pack()


In [24]:
root = tk.Tk()
root.title('Sign predictor')
#root.iconbitmap('class.ico')
root.resizable(False, False)

tit = tk.Label(root, text="Sign predictor", padx=25, pady=6, font=("", 12)).pack()

canvas = tk.Canvas(root, height=400, width=600, bg='#76c3fb')
canvas.pack()

frame = tk.Frame(root, bg='#d776fb')
frame.place(relwidth=0.8, relheight=0.7, relx=0.1, rely=0.1)

chose_image = tk.Button(root, text='Choose Image',
                        padx=20, pady=10,
                        fg="white", bg="#8c04b5", command=load_img)
chose_image.pack(side=tk.LEFT)

live_pred = tk.Button(root, text='Live prediction',
                        padx=20, pady=10,
                        fg="white", bg="#0a7c2e",command=predict)
live_pred.pack(side=tk.RIGHT)



class_image = tk.Button(root, text='Classify Image',
                            padx=20, pady=10,
                            fg="white", bg="#0475b5",command=classify)
class_image.pack(side=tk.LEFT)

 
root.mainloop()

Exception in Tkinter callback
Traceback (most recent call last):
  File "C:\Users\suhas\Anaconda3\lib\site-packages\PIL\Image.py", line 2770, in open
    fp.seek(0)
AttributeError: 'str' object has no attribute 'seek'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\suhas\Anaconda3\lib\tkinter\__init__.py", line 1705, in __call__
    return self.func(*args)
  File "<ipython-input-15-aac2fa9a9967>", line 9, in load_img
    img = Image.open(image_data)
  File "C:\Users\suhas\Anaconda3\lib\site-packages\PIL\Image.py", line 2772, in open
    fp = io.BytesIO(fp.read())
AttributeError: 'str' object has no attribute 'read'
