In [37]:
import numpy as np
import cv2
from collections import deque
from mnist import MNIST
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D,MaxPooling2D
from keras import backend as K
from keras.callbacks import ModelCheckpoint
import os

In [29]:
emnist = MNIST(path='data',return_type='numpy')
emnist.select_emnist('letters')  # 6 types: letters, balanced, byclass, bymerge, digits, mnist
images, labels = emnist.load_training()

In [30]:
images = images.reshape(-1,28,28)
labels = labels.reshape(-1,1)
labels = labels - 1 # make labels from 0-25

In [31]:
X_train,X_test,y_train,y_test = train_test_split(images,labels,test_size=0.25,random_state=111)

# Rescale pixel value into [0,1]
X_train = X_train.astype('float32')/255
X_test = X_test.astype('float32')/255

# one-hot encode
y_train = to_categorical(y_train, num_classes=26) 
y_test = to_categorical(y_test, num_classes=26) 

### MLP Model

In [19]:
# Build model
model = Sequential()
model.add(Flatten(input_shape=X_train.shape[1:]))
model.add(Dense(512,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(512,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(26,activation='softmax'))

model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

accuracy = 100 * model.evaluate(X_test,y_test,verbose=0)[1]

print('Before Training - Test accuracy: %.4f%%' % accuracy)

Before Training - Test accuracy: 3.6506%


In [21]:
file_path = 'emnist_mlp_model.h5'
if not os.path.exists(file_path):
    # set checkpoint to save model after every epoch
    checkpoint = ModelCheckpoint(filepath='emnist.model.best.hdf5',
                                verbose=1,save_best_only=True)

    # train the mpl model
    model.fit(X_train,y_train,batch_size=128,epochs=10,
             validation_split=0.2,callbacks=[checkpoint],
             verbose=1,shuffle=True)
   
    # load saved weights to the mpl model
    model.load_weights('emnist.model.best.hdf5')
    # save the nodel
    model.save('emnist_mlp_model.h5')

    accuracy = 100 * model.evaluate(X_test,y_test,verbose=0)[1]

    print('After Training - Test accuracy: %.4f%%' % accuracy)    

Train on 74880 samples, validate on 18720 samples
Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.28722, saving model to emnist.model.best.hdf5
Epoch 2/10

Epoch 00002: val_loss did not improve from 0.28722
Epoch 3/10

Epoch 00003: val_loss did not improve from 0.28722
Epoch 4/10

Epoch 00004: val_loss did not improve from 0.28722
Epoch 5/10

Epoch 00005: val_loss did not improve from 0.28722
Epoch 6/10

Epoch 00006: val_loss did not improve from 0.28722
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.28722
Epoch 8/10

Epoch 00008: val_loss did not improve from 0.28722
Epoch 9/10

Epoch 00009: val_loss did not improve from 0.28722
Epoch 10/10

Epoch 00010: val_loss did not improve from 0.28722
After Training - Test accuracy: 91.6122%


### CNN Model

In [32]:
X_train = X_train.reshape(-1,28,28,1)
X_test = X_test.reshape(-1,28,28,1)

In [35]:
# Build Model
model = Sequential()
model.add(Conv2D(32,kernel_size=(3,3),
                activation='relu',
                input_shape=input_shape))
model.add(Conv2D(64,kernel_size=(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(26,activation='softmax'))

model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [38]:
file_path = 'emnist_cnn_model.h5'
if not os.path.exists(file_path):
    # set checkpoint to save model after every epoch
    checkpoint = ModelCheckpoint(filepath='emnist.cnn_model.best.hdf5',
                                verbose=1,save_best_only=True)

    # train the mpl model
    model.fit(X_train,y_train,batch_size=128,epochs=10,
             validation_split=0.2,callbacks=[checkpoint],
             verbose=1,shuffle=True)

    # load saved weights to the mpl model
    model.load_weights('emnist.cnn_model.best.hdf5')
    # save the nodel
    model.save('emnist_cnn_model.h5')

    accuracy = 100 * model.evaluate(X_test,y_test,verbose=0)[1]

    print('Test accuracy: %.4f%%' % accuracy)

Train on 74880 samples, validate on 18720 samples
Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.35838, saving model to emnist.cnn_model.best.hdf5
Epoch 2/10

Epoch 00002: val_loss improved from 0.35838 to 0.28767, saving model to emnist.cnn_model.best.hdf5
Epoch 3/10

Epoch 00003: val_loss improved from 0.28767 to 0.26527, saving model to emnist.cnn_model.best.hdf5
Epoch 4/10

Epoch 00004: val_loss improved from 0.26527 to 0.23931, saving model to emnist.cnn_model.best.hdf5
Epoch 5/10

Epoch 00005: val_loss improved from 0.23931 to 0.22714, saving model to emnist.cnn_model.best.hdf5
Epoch 6/10

Epoch 00006: val_loss improved from 0.22714 to 0.21925, saving model to emnist.cnn_model.best.hdf5
Epoch 7/10

Epoch 00007: val_loss improved from 0.21925 to 0.21562, saving model to emnist.cnn_model.best.hdf5
Epoch 8/10

Epoch 00008: val_loss improved from 0.21562 to 0.20868, saving model to emnist.cnn_model.best.hdf5
Epoch 9/10

Epoch 00009: val_loss improved from 0.20868 to 0.20809

### Recognition System

In [None]:
cnn_model = load_model('emnist_cnn_model.h5')
mlp_model = load_model('emnist_mlp_model.h5')

letter_mapping = { 1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j',
11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't',
21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 27: '-'}

# define the upper and lower boundaries for a color to be considered 'Blue'
blueLower = np.array([100,60,60])
blueUpper = np.array([140,255,255])

# define a 5x5 kernel for erosion and dialation
kernel = np.ones((5,5),np.uint8)

# define blackborad
blackboard = np.zeros((480,640,3),dtype=np.uint8)
alphabet = np.zeros((200,200,3),dtype=np.uint8)

# setup deques to store alphabet drawn on screen
points = deque(maxlen=512)

# define prediction variables
pred1 = 26
pred2 = 26

index = 0

# Webcamera no 0 is used to capture the frames 
camera = cv2.VideoCapture(0)

# keep looping
while True:
    # grab the current paint window
    (grabbed,frame) = camera.read()
    frame = cv2.flip(frame, 1) # flip horizontally
    
    # defining range of bluecolor in HSV, creates a mask of blue coloured objects found in the frame.
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) # change color from BGR to HSV
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # determine which pixels fall within the blue boundaries and then blur the image
    blueMask = cv2.inRange(hsv,blueLower,blueUpper) # filter background(lower blueLower and higer blueUpper)
    blueMask = cv2.erode(blueMask,kernel,iterations=2) # erode images
    blueMask = cv2.morphologyEx(blueMask,cv2.MORPH_OPEN,kernel) # remove noise
    blueMask = cv2.dilate(blueMask,kernel,iterations=1) # dilate images
    
    # find contours (blue circle) in the image
    (_,cnts,_) = cv2.findContours(blueMask.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
    center = None
    
    # check to see if any contours were found
    if len(cnts)>0:
        # sort the contours and find the largest one
        cnt = sorted(cnts,key=cv2.contourArea,reverse=True)[0]
        # get the radius of the enclosing circle around the found contour
        ((x,y),radius) = cv2.minEnclosingCircle(cnt)
        # draw the circle around the contour
        cv2.circle(frame,(int(x),int(y)),int(radius),(0,255,255),2)
        # get moments to calculate the center of the contour
        M = cv2.moments(cnt)
        center = (int(M['m10']/M['m00']), int(M['m01']/M['m00']))
        points.appendleft(center)
        
    elif len(cnts)==0:
        if len(points) != 0:
            blackboard_gray = cv2.cvtColor(blackboard,cv2.COLOR_BGR2GRAY) # change BGR to GRAY
            blur1 = cv2.medianBlur(blackboard_gray,15) # blur image by median filter
            blur1 = cv2.GaussianBlur(blur1,(5,5),0) # blur image by Gaussian filter
            thresh1 = cv2.threshold(blur1,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1] # process image by threshold
            blackboard_cnts = cv2.findContours(thresh1.copy(),cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)[1]
            if len(blackboard_cnts)>=1:
                cnt = sorted(blackboard_cnts,key=cv2.contourArea,reverse=True)[0]
                
                if cv2.contourArea(cnt)>1000:
                    x,y,w,h = cv2.boundingRect(cnt) # find bound ((x,y) is coordinate of upperleft point, w is width, h is height)
                    alphabet = blackboard_gray[y-10:y+h+10,x-10:x+w+10] # width and height respectively plus 20 pixels
                    newImage = cv2.resize(alphabet,(28,28))
                    newImage = np.array(newImage)
                    newImage = newImage.astype('float32')/255
                    
                    pred1 = mlp_model.predict(newImage.reshape(1,28,28))[0]
                    pred1 = np.argmax(pred1)
                    
                    pred2 = cnn_model.predict(newImage.reshape(1,28,28,1))[0]
                    pred2 = np.argmax(pred2)
            # empty the points deque and blackboard
            points = deque(maxlen=512)
            blackboard = np.zeros((480,640,3),dtype=np.uint8)
    # connect the points with a line
    for i in range(1,len(points)):
        if points[i-1] is None or points[i] is None:
            continue
        cv2.line(frame,points[i-1],points[i],(0,0,0),2)
        cv2.line(blackboard,points[i-1],points[i],(255,255,255),8)
    # put the results on the screen
    cv2.putText(frame,'Multilayer perceptron: '+str(letter_mapping[int(pred1)+1]),
               (10,410),cv2.FONT_HERSHEY_SIMPLEX,0.7,(255,255,255),2)
    cv2.putText(frame,'Convolution Neural Network: '+str(letter_mapping[int(pred2)+1]),
               (10,440),cv2.FONT_HERSHEY_SIMPLEX,0.7,(255,255,255),2)
    
    # show the frame
    cv2.imshow('Alphabets Recognition in Real Time',frame)
    
    # stop the loop util pressing the'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Destroys all of the windows
cv2.destroyAllWindows()

# release the captured frame 
camera.release()