In [1]:
#import required libraries
import numpy as np
import pandas as pd 
import os
import cv2
import matplotlib.pyplot as plt

In [2]:
#function to process images for training
def processed_img(folder_name):
    train_img = []
    files = os.listdir('extracted_images/'+folder_name)
    for file in files[:1000]:
        img = cv2.imread('extracted_images/'+folder_name+'/'+file, cv2.IMREAD_GRAYSCALE) #read the image in grayscale
        img_inv = 255 -  img #inverts the image
        img_bin, thresh = cv2.threshold(img_inv,127,255,cv2.THRESH_BINARY) #convert the image to binary
        img_con, h = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) #add contours to the image
        max_size = 0
        for (i,j) in enumerate(img_con):
            x,y,w,h = cv2.boundingRect(j) #add rectangles to the countours found
            max_size = max(w*h, max_size)
            if max_size == w*h: #extract the rectangle of maximum size
                x_max,y_max,w_max,h_max = x,y,w,h
            img_crop = thresh[y_max:y_max+h_max+10, x_max:x_max+w_max+10] #extracting the area within the bounding box
            img_resize = cv2.resize(img_crop, (28,28))
            img_resize = np.reshape(img_resize,(784,1))
            train_img.append(img_resize)
    return train_img

In [3]:
#list of the symbols
folders = sorted(os.listdir('extracted_images'))
print(folders)

['!', '(', ')', '+', ',', '-', '0']


In [4]:
data = processed_img(folders[0]) #dataframe storing the image array and label
for i in range(len(data)):
    data[i] = np.append(data[i],[folders[-1]]) #adding label column
count = 10
for folder in folders[:-1]:
    train_img = processed_img(folder)
    if folder in ['0','1','2','3','4','5','6','7','8','9']:
        for i in range(len(train_img)):
            train_img[i] = np.append(train_img[i],[folder])
    else:
        for i in range(len(train_img)):
            train_img[i] = np.append(train_img[i],[str(count)])
        count+=1
    data = np.concatenate((data,train_img))

In [5]:
import pandas as pd
df=pd.DataFrame(data,index=None)
df.to_csv('train_final.csv',index=False)

In [6]:
data_df = pd.read_csv('train_final.csv')
data_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
0,255,255,255,255,146,36,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,255,232,132,32,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,255,255,255,255,146,36,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,36,146,255,146,36,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,255,255,255,255,146,36,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
y = data_df[['784']]
y = np.array(y)
data_df.drop(data_df.columns[[784]],axis=1,inplace=True)

In [8]:
import keras
from keras.models import Model
from keras.layers import *
from keras import optimizers
from keras.layers import Input, Dense
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.utils import np_utils

In [9]:
data_df.shape

(8922, 784)

In [10]:
len(folders)

7

In [11]:
#list of image array of shape (28,28)
l=[]
for i in range(data_df.shape[0]):
    l.append(np.array(data_df[i:i+1]).reshape(28,28,1))

In [12]:
#defining the layers of the model
model = Sequential()
model.add(Conv2D(30, (5,5), input_shape = (28,28,1),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(15, (3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128,activation='relu'))
model.add(Dense(50,activation='relu'))
model.add(Dense(7,activation='softmax'))


In [13]:
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [14]:
for i in y:
    if i[0]>7:
        i[0]=i[0]-10

cat = np_utils.to_categorical(y, num_classes=7) 

In [15]:
import tensorflow as tf
tf.config.run_functions_eagerly(True)

In [16]:
model.fit(np.array(l), cat,epochs=10,batch_size=100,shuffle=True,verbose=1)

Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2022f02a740>

In [17]:
#function to process images for testing
def processed_img_test(folder_name):
    test_img = []
    files = os.listdir('extracted_images/'+folder_name)
    for file in files[1000:1100]:
        img = cv2.imread('extracted_images/'+folder_name+'/'+file, cv2.IMREAD_GRAYSCALE)
        img_inv = 255 -  img
        img_bin, thresh = cv2.threshold(img_inv,127,255,cv2.THRESH_BINARY)
        img_con, h = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
        w = int(28)
        h = int(28)
        max_size = 0
        for (i,j) in enumerate(img_con):
            x,y,w,h = cv2.boundingRect(j)
            max_size = max(w*h, max_size)
            if max_size == w*h:
                x_max,y_max,w_max,h_max = x,y,w,h
            img_crop = thresh[y_max:y_max+h_max+10, x_max:x_max+w_max+10]
            img_resize = cv2.resize(img_crop, (28,28))
            img_resize = np.reshape(img_resize,(784,1))
            test_img.append(img_resize)
    return test_img

In [18]:
test_data = processed_img_test(folders[0])
for i in range(len(test_data)):
    test_data[i] = np.append(test_data[i],[folders[-1]])
count = 10
for folder in folders[:-1]:
    test_img = processed_img_test(folder)
    if folder in ['0','1','2','3','4','5','6','7','8','9']:
        for i in range(len(test_img)):
            test_img[i] = np.append(test_img[i],[folder])
    else:
        for i in range(len(test_img)):
            test_img[i] = np.append(test_img[i],[str(count)])
        count+=1
    test_data = np.concatenate((test_data,test_img))

In [19]:
test_df=pd.DataFrame(test_data,index=None)
test_df.to_csv('test_final.csv',index=False)
test_data_df = pd.read_csv('test_final.csv')
y_test = test_data_df[['784']]
y_test = np.array(y_test)
test_data_df.drop(test_data_df.columns[[784]],axis=1,inplace=True)

l_test=[]
for i in range(test_data_df.shape[0]):
    l_test.append(np.array(test_data_df[i:i+1]).reshape(28,28,1))
    
for i in y_test:
    if i[0]>7:
        i[0]=i[0]-10
cat_test = np_utils.to_categorical(y_test, num_classes=7) 

In [20]:
#accuracy on test data
results = model.evaluate(np.array(l_test),cat_test,batch_size=100)
print("The loss and test accuracy on test data: ",results)





The loss and test accuracy on test data:  [0.03376661241054535, 0.9898876547813416]


In [21]:
labels = model.predict(np.array(l_test[:2]))
print("Original labels:",y_test[:2])
print("Predicted labels: ",np.argmax(labels,axis=-1))

Original labels: [[0]
 [0]]
Predicted labels:  [0 0]


In [22]:
labels = model.predict(np.array(l_test[710:720]))
print("Original labels:",y_test[710:720])
print("Predicted labels: ",np.argmax(labels,axis=-1))

Original labels: [[4]
 [4]
 [4]
 [4]
 [4]
 [4]
 [4]
 [4]
 [4]
 [4]]
Predicted labels:  [4 4 4 2 2 2 4 4 4 4]
