# Computer Vision Model for OCR


This notebook was used to train the model used for the ocr (ocr_model.h5 parameters file).

Extra dependencies needed: scikit-learn


In [10]:
import numpy as np
import os
import tensorflow as tf
from tensorflow import keras
from keras.utils.np_utils import to_categorical
from keras.layers import Dropout, Dense, Flatten, Conv2D, MaxPooling2D
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import cv2

### Load & Prepare the digits dataset


In [35]:
# Source dataset: https://www.kaggle.com/datasets/karnikakapoor/digits
# Original dataset is from Charst74K: http://www.ee.surrey.ac.uk/CVSSP/demos/chars74k/

dataset_path = "./assets/Digits/"

data = os.listdir(dataset_path)
data_X = []     
data_y = []  
data_classes = len(data)
for i in range (0,data_classes):
    data_list = os.listdir(dataset_path +"/"+str(i))
    for j in data_list:
        pic = cv2.imread(dataset_path +"/"+str(i)+"/"+j)
        pic = cv2.resize(pic,(64,64))
        data_X.append(pic)
        data_y.append(i)

# Labels and images
data_X = np.array(data_X)
data_y = np.array(data_y)

if len(data_X) == len(data_y) :
    print("Total Datapoints = ",len(data_X))

Total Datapoints =  10160


In [36]:
train_X, test_X, train_y, test_y = train_test_split(data_X,data_y,test_size=0.15)
print("Training Set Shape = ",train_X.shape)
print("Test Set Shape = ",test_X.shape)

# Preprocessing the images for neuralnet

def Prep(img):
    img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) #making image grayscale
    img = cv2.equalizeHist(img) #Histogram equalization to enhance contrast
    img = img/255 #normalizing
    return img

train_X = np.array(list(map(Prep, train_X)))
test_X = np.array(list(map(Prep, test_X)))

#Reshaping the images
train_X = train_X.reshape(train_X.shape[0], train_X.shape[1], train_X.shape[2],1)
test_X = test_X.reshape(test_X.shape[0], test_X.shape[1], test_X.shape[2],1)

#Augmentation
datagen = tf.keras.preprocessing.image.ImageDataGenerator(width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.2, shear_range=0.1, rotation_range=10)
datagen.fit(train_X)

# One hot encoding of the labels
train_y = to_categorical(train_y, data_classes)
test_y = to_categorical(test_y, data_classes)

Training Set Shape =  (8636, 64, 64, 3)
Test Set Shape =  (1524, 64, 64, 3)


### Neural Network Architecture


In [43]:
# Model Architecture is based on LeNet-5
model = keras.Sequential()

model.add((Conv2D(60,(5,5),input_shape=(64, 64, 1) ,padding = 'same' ,activation='relu')))
model.add((Conv2D(60, (5,5),padding="same",activation='relu')))
model.add(MaxPooling2D(pool_size=(2,2)))
#model.add(Dropout(0.25))

model.add((Conv2D(30, (3,3), padding="same", activation='relu')))
model.add((Conv2D(30, (3,3), padding="same", activation='relu')))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.5))

model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

model.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_56 (Conv2D)          (None, 64, 64, 60)        1560      
                                                                 
 conv2d_57 (Conv2D)          (None, 64, 64, 60)        90060     
                                                                 
 max_pooling2d_28 (MaxPoolin  (None, 32, 32, 60)       0         
 g2D)                                                            
                                                                 
 conv2d_58 (Conv2D)          (None, 32, 32, 30)        16230     
                                                                 
 conv2d_59 (Conv2D)          (None, 32, 32, 30)        8130      
                                                                 
 max_pooling2d_29 (MaxPoolin  (None, 16, 16, 30)       0         
 g2D)                                                

In [45]:
#Compiling the model
opt = keras.optimizers.RMSprop(
    learning_rate=0.001,
    rho=0.9,
    epsilon = 1e-08,
    weight_decay=0.0)
model.compile(optimizer=opt,loss='categorical_crossentropy',metrics=['accuracy'])

#Fit the model
history = model.fit(
    datagen.flow(train_X, train_y, batch_size=32),
    epochs = 30, 
    verbose = 2, 
    steps_per_epoch = 200,
)
model.save("./assets/ocr_model.h5")

Epoch 1/30


2023-04-02 06:20:52.179655: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


200/200 - 138s - loss: 0.3916 - accuracy: 0.8816 - 138s/epoch - 691ms/step
Epoch 2/30
200/200 - 941s - loss: 0.2460 - accuracy: 0.9287 - 941s/epoch - 5s/step
Epoch 3/30
200/200 - 109s - loss: 0.2274 - accuracy: 0.9333 - 109s/epoch - 546ms/step
Epoch 4/30
200/200 - 94s - loss: 0.1834 - accuracy: 0.9459 - 94s/epoch - 469ms/step
Epoch 5/30
200/200 - 93s - loss: 0.1587 - accuracy: 0.9511 - 93s/epoch - 467ms/step
Epoch 6/30
200/200 - 94s - loss: 0.1488 - accuracy: 0.9561 - 94s/epoch - 470ms/step
Epoch 7/30
200/200 - 94s - loss: 0.1356 - accuracy: 0.9614 - 94s/epoch - 470ms/step
Epoch 8/30
200/200 - 95s - loss: 0.1292 - accuracy: 0.9619 - 95s/epoch - 474ms/step
Epoch 9/30
200/200 - 97s - loss: 0.1189 - accuracy: 0.9656 - 97s/epoch - 486ms/step
Epoch 10/30
200/200 - 1906s - loss: 0.1153 - accuracy: 0.9681 - 1906s/epoch - 10s/step
Epoch 11/30
200/200 - 332s - loss: 0.1128 - accuracy: 0.9673 - 332s/epoch - 2s/step
Epoch 12/30
200/200 - 2108s - loss: 0.1158 - accuracy: 0.9675 - 2108s/epoch - 11s

In [46]:
# Image quality is 64x64 pixels in the model
model = keras.models.load_model("./assets/ocr_model.h5")
score = model.evaluate(test_X, test_y, verbose=0)
print('Test Accuracy =', score[1])

Test Accuracy = 0.9973753094673157


In [29]:
# Image quality is 32x32 pixels in the small model
model = keras.models.load_model("./assets/ocr_model_small.h5")
score = model.evaluate(test_X, test_y, verbose=0)
print('Test Accuracy =', score[1])

Test Accuracy = 0.9947506785392761
