# OCR using CNN

Importing the libraries

In [72]:
from keras.layers import Dense, Dropout, Convolution2D, MaxPooling2D
from keras.layers import Flatten
from keras.models import Sequential
import scipy
import numpy as np
import os
import keras.utils

Loading data form EMNIST database

In [73]:
EMNIST = scipy.io.loadmat(os.path.join(os.getcwd(), "matlab", "emnist-digits.mat"))

x_train = EMNIST["dataset"][0][0][0][0][0][0].astype("float64")
y_train = EMNIST["dataset"][0][0][0][0][0][1]

x_test = EMNIST['dataset'][0][0][1][0][0][0].astype("float64")
y_test = EMNIST['dataset'][0][0][1][0][0][1]

# Scaling data
x_train = (x_train - np.mean(x_train)) /np.std(x_train)
x_test =  (x_test - np.mean(x_train)) /np.std(x_train)

nb_classes = 10

y_train = keras.utils.to_categorical(y_train, nb_classes)
y_test = keras.utils.to_categorical(y_test, nb_classes)

print("Fraction test {}, Fraction train {}".format(len(y_test)/(len(y_test) + len(y_train)), len(y_train)/(len(y_test) + len(y_train))))

Fraction test 0.14285714285714285, Fraction train 0.8571428571428571


In [74]:
height = 28
width = 28
input_shape = (height, width, 1) # Dimensions of the input image : 28 x 28, third dimension 1 is added for compatibility with Convolution2D function

x_train_scaled = x_train[:len(x_train)].reshape(len(x_train), height, width, 1)
x_test_scaled = x_test[:len(x_test)].reshape(len(x_test), height, width, 1)

In [75]:
activation = "relu"


nb_filters = 32 # number of convolutional filters to use
pool_size = (2, 2) # size of pooling area for max pooling
kernel_size = (3, 3) # convolution kernel size
model = Sequential()

model.add(Convolution2D(nb_filters,
                        kernel_size,
                        padding='valid',
                        input_shape=input_shape,
                        activation='relu'))

model.add(Convolution2D(nb_filters,
                        kernel_size,
                        activation='relu'))

model.add(MaxPooling2D(pool_size=pool_size))
model.add(Dropout(0.25))
model.add(Flatten())

model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes, activation='softmax'))

model.compile(loss='categorical_crossentropy',
            optimizer='SGD',
            metrics=['accuracy'])

batch = 256
epoch = 10
model.fit(x_train_scaled, y_train,
            batch_size=batch,
            epochs=epoch,
            verbose=1,
            validation_data=(x_test_scaled, y_test))

score = model.evaluate(x_test_scaled, y_test, verbose=0)


  super().__init__(


Epoch 1/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m279s[0m 289ms/step - accuracy: 0.1738 - loss: 2.2428 - val_accuracy: 0.6888 - val_loss: 7.3493
Epoch 2/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m242s[0m 258ms/step - accuracy: 0.5410 - loss: 1.6929 - val_accuracy: 0.8107 - val_loss: 11.2838
Epoch 3/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m235s[0m 250ms/step - accuracy: 0.7151 - loss: 1.0686 - val_accuracy: 0.8482 - val_loss: 14.2322
Epoch 4/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m227s[0m 242ms/step - accuracy: 0.7830 - loss: 0.7500 - val_accuracy: 0.8688 - val_loss: 15.2371
Epoch 5/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m207s[0m 221ms/step - accuracy: 0.8193 - loss: 0.6058 - val_accuracy: 0.8816 - val_loss: 15.7013
Epoch 6/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 198ms/step - accuracy: 0.8410 - loss: 0.5245 - val_accuracy: 0.8903 - val_loss: 15.8343

In [76]:
y_pred = model.predict(x_train_scaled[1].reshape(1, height, width, 1))

print(y_train[1])
print(y_pred)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 206ms/step
[0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
[[4.0305740e-06 7.2966868e-06 4.2318193e-06 3.5341807e-05 3.8730315e-04
  3.2485841e-04 6.3729985e-08 1.1710874e-02 2.0888053e-02 9.6663797e-01]]
