In [1]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.layers import Input, Conv2D, Dense, Flatten, Dropout, BatchNormalization, MaxPool2D
from tensorflow.keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import mediapipe as mp


In [2]:
# Dataset with letters excluding J and Z as they require motion
train_df = pd.read_csv("Datasets/sign_mnist_train.csv")
test_df = pd.read_csv("Datasets/sign_mnist_test.csv")

In [3]:
train_df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,3,107,118,127,134,139,143,146,150,153,...,207,207,207,207,206,206,206,204,203,202
1,6,155,157,156,156,156,157,156,158,158,...,69,149,128,87,94,163,175,103,135,149
2,2,187,188,188,187,187,186,187,188,187,...,202,201,200,199,198,199,198,195,194,195
3,2,211,211,212,212,211,210,211,210,210,...,235,234,233,231,230,226,225,222,229,163
4,13,164,167,170,172,176,179,180,184,185,...,92,105,105,108,133,163,157,163,164,179


In [4]:
test_df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,6,149,149,150,150,150,151,151,150,151,...,138,148,127,89,82,96,106,112,120,107
1,5,126,128,131,132,133,134,135,135,136,...,47,104,194,183,186,184,184,184,182,180
2,10,85,88,92,96,105,123,135,143,147,...,68,166,242,227,230,227,226,225,224,222
3,0,203,205,207,206,207,209,210,209,210,...,154,248,247,248,253,236,230,240,253,255
4,3,188,191,193,195,199,201,202,203,203,...,26,40,64,48,29,46,49,46,46,53


In [5]:
y_train = train_df['label']
y_test = test_df['label']
del train_df['label']
del test_df['label']
from sklearn.preprocessing import LabelBinarizer
label_binarizer = LabelBinarizer()
y_train = label_binarizer.fit_transform(y_train)
y_test = label_binarizer.fit_transform(y_test)

In [6]:
#Label binarizer can be used to speed up training of model. It takes the classes in the dataset and converts them to binary.
x_train = train_df.values
# x_train
x_test = test_df.values

# y_train = y_train.reshape(-1,)


In [7]:
x_train = x_train / 255
x_test = x_test / 255

x_train = x_train.reshape(-1,28,28,1)
x_test = x_test.reshape(-1,28,28,1)
# y_train = y_train.reshape(-1,28,28,1)
# y_test = y_test.reshape(-1,28,28,1)
x_train.shape

(27455, 28, 28, 1)

In [8]:
# Making the images more realistic by adding noise and transformations to different instances
datagen = ImageDataGenerator(
        featurewise_center=False,
        samplewise_center=False, 
        featurewise_std_normalization=False,
        samplewise_std_normalization=False,
        zca_whitening=False,
        rotation_range=10,
        zoom_range = 0.1, 
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=False,
        vertical_flip=False)

datagen.fit(x_train)

In [9]:
model = Sequential()
model.add(Conv2D(75 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu' , input_shape = (28,28,1)))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Conv2D(50 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Conv2D(25 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Flatten())
model.add(Dense(units = 512 , activation = 'relu'))
model.add(Dropout(0.3))
model.add(Dense(units = 24 , activation = 'softmax'))

In [10]:
model.compile(optimizer='adam',
            loss = 'sparse_categorical_crossentropy',
            metrics = ['accuracy']
            )

In [11]:
model.compile(optimizer = 'adam' , loss = 'categorical_crossentropy' , metrics = ['accuracy'])
model.summary()

history = model.fit(datagen.flow(x_train,y_train, batch_size = 128) ,epochs = 5 , validation_data = (x_test, y_test))

model.save('smnist.h5')

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 75)        750       
                                                                 
 batch_normalization (Batch  (None, 28, 28, 75)        300       
 Normalization)                                                  
                                                                 
 max_pooling2d (MaxPooling2  (None, 14, 14, 75)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 14, 14, 50)        33800     
                                                                 
 dropout (Dropout)           (None, 14, 14, 50)        0         
                                                                 
 batch_normalization_1 (Bat  (None, 14, 14, 50)        2

  saving_api.save_model(


In [12]:
# import os
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
# import tensorflow as tf
# import cv2
# import mediapipe as mp
# from keras.models import load_model
# import numpy as np
# import time

In [13]:
# model = load_model('smnist.h5')

# mphands = mp.solutions.hands
# hands = mphands.Hands()
# mp_drawing = mp.solutions.drawing_utils

# cap = cv2.VideoCapture(0)
# _, frame = cap.read()
# h, w, c = frame.shape

# analysisframe = ''
# letterpred = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y']


In [14]:
# while True:
#     _, frame = cap.read()

#     k = cv2.waitKey(1)
#     if k%256 == 27:
#         print("Escape hit, closing...")
#         break

#     framergb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#     result = hands.process(framergb)
#     hand_landmarks = result.multi_hand_landmarks
#     if hand_landmarks:
#         for handLMs in hand_landmarks:
#             x_max = 0
#             y_max = 0
#             x_min = w
#             y_min = h
#             for lm in handLMs.landmark:
#                 x, y = int(lm.x * w), int(lm.y * h)
#                 if x > x_max:
#                     x_max = x
#                 if x < x_min:
#                     x_min = x
#                 if y > y_max:
#                     y_max = y
#                 if y < y_min:
#                     y_min = y
#             y_min -= 20
#             y_max += 20
#             x_min -= 20
#             x_max += 20
#             cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
#             mp_drawing.draw_landmarks(frame, handLMs, mphands.HAND_CONNECTIONS)
#     cv2.imshow("Frame", frame)
# # cv2.imshow("Last Frame", frame)
# cap.release()
# cv2.destroyAllWindows()

In [15]:
# while True:
#     _, frame = cap.read()
#     # print("a")
#     k = cv2.waitKey(1)
#     if k%256 == 27:
#         # ESC pressed
#         # print("a")
#         print("Escape hit, closing...")
#         break
    
#     elif k%256 == 32:
#         # SPACE pressed
#         # SPACE pressed
#         # print("b")
#         print("Space pressed")
#         analysisframe = frame
#         showframe = analysisframe
#         cv2.imshow("Frame", showframe)
#         framergbanalysis = cv2.cvtColor(analysisframe, cv2.COLOR_BGR2RGB)
#         resultanalysis = hands.process(framergbanalysis)
#         hand_landmarksanalysis = resultanalysis.multi_hand_landmarks
#         if hand_landmarksanalysis:
#             for handLMsanalysis in hand_landmarksanalysis:
#                 x_max = 0
#                 y_max = 0
#                 x_min = w
#                 y_min = h
#                 for lmanalysis in handLMsanalysis.landmark:
#                     x, y = int(lmanalysis.x * w), int(lmanalysis.y * h)
#                     if x > x_max:
#                         x_max = x
#                     if x < x_min:
#                         x_min = x
#                     if y > y_max:
#                         y_max = y
#                     if y < y_min:
#                         y_min = y
#                 y_min -= 20
#                 y_max += 20
#                 x_min -= 20
#                 x_max += 20 
#         print("analysis frame")
#         analysisframe = cv2.cvtColor(analysisframe, cv2.COLOR_BGR2GRAY)
#         analysisframe = analysisframe[y_min:y_max, x_min:x_max]
#         analysisframe = cv2.resize(analysisframe,(28,28))
 

#         nlist = []
#         rows,cols = analysisframe.shape
#         for i in range(rows):
#             for j in range(cols):
#                 k = analysisframe[i,j]
#                 nlist.append(k)
        
#         datan = pd.DataFrame(nlist).T
#         colname = []
#         for val in range(784):
#             colname.append(val)
#         datan.columns = colname
#         print("pixel data")
#         pixeldata = datan.values
#         pixeldata = pixeldata / 255
#         pixeldata = pixeldata.reshape(-1,28,28,1)

In [16]:
# prediction = model.predict(pixeldata)
# predarray = np.array(prediction[0])
# letter_prediction_dict = {letterpred[i]: predarray[i] for i in range(len(letterpred))}
# predarrayordered = sorted(predarray, reverse=True)
# high1 = predarrayordered[0]
# high2 = predarrayordered[1]
# high3 = predarrayordered[2]
# for key,value in letter_prediction_dict.items():
#     if value==high1:
#         print("Predicted Character 1: ", key)
#         print('Confidence 1: ', 100*value)
#     elif value==high2:
#         print("Predicted Character 2: ", key)
#         print('Confidence 2: ', 100*value)
#     elif value==high3:
#         print("Predicted Character 3: ", key)
#         print('Confidence 3: ', 100*value)
# time.sleep(5)