In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
import matplotlib.pyplot as plt

In [4]:
# Load the OCR letter recognition dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/letter-recognition/letter-recognition.data'
dataset = pd.read_csv(url, header=None)

In [6]:
# Split the dataset into features and labels
X = dataset.iloc[:, 1:].values  #selecting all rows and selecting all columns from index 1
y = dataset.iloc[:, 0].values   #selecting all rows and selecting column with index 0

In [7]:
print(y[0])

T


In [8]:
# Encode the labels into numeric value
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [6]:
print(y[0])

19


In [10]:
#splitting dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [11]:
X_train = X_train / 15.0
X_test = X_test / 15.0

In [12]:
#we are using sequential model where layers are stacked one after another,
#output of previous layer is given to as input to next layer

model = Sequential()
#1st layer is dense layer which consists on 128 neurons, since it is 1st layer we need to define input_shape of our training data
model.add(Dense(128, activation='relu', input_shape=(16,)))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(26, activation='softmax'))  #softmax is used to predict multiclass category outcome

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [13]:
#now we will compile the model

#sparse_categorical_crossentropy (scce) produces a category index of the most likely matching category.
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics =['accuracy'])

In [14]:
#The batch size is a number of samples processed before the model is updated.
#verbose is the choice that how you want to see the output of your Nural Network while it's training.
#If you set verbose = 0, It will show nothing
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=12, verbose=1)

Epoch 1/50
[1m1334/1334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.1150 - loss: 3.0218 - val_accuracy: 0.5238 - val_loss: 1.8111
Epoch 2/50
[1m1334/1334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - accuracy: 0.3753 - loss: 1.9698 - val_accuracy: 0.6077 - val_loss: 1.4233
Epoch 3/50
[1m1334/1334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.4754 - loss: 1.6790 - val_accuracy: 0.6465 - val_loss: 1.2254
Epoch 4/50
[1m1334/1334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.5285 - loss: 1.4995 - val_accuracy: 0.6780 - val_loss: 1.1453
Epoch 5/50
[1m1334/1334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - accuracy: 0.5564 - loss: 1.4178 - val_accuracy: 0.6950 - val_loss: 1.0464
Epoch 6/50
[1m1334/1334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.5725 - loss: 1.3644 - val_accuracy: 0.7180 - val_loss: 0.9959
Epoch 7/50
[

In [15]:
loss, accuracy = model.evaluate(X_test, y_test)
print("Test accuracy:", accuracy)
print("Test loss:", loss)

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8329 - loss: 0.5585
Test accuracy: 0.8317499756813049
Test loss: 0.5519418716430664


In [16]:
model.save('ocr_model.h5')
# Save the trained model



In [17]:
from tensorflow.keras.models import load_model
model = load_model('ocr_model.h5')
# Load the trained model



In [18]:
sample_records = X_test[:1000]
# Select a few records for classification

In [19]:
# Perform classification
predictions = model.predict(sample_records)

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


In [20]:
predicted_labels = np.argmax(predictions, axis=1)
predicted_letters = label_encoder.inverse_transform(predicted_labels)
actual_letters = label_encoder.inverse_transform(y_test)

In [21]:
# Calculate accuracy
accuracy = np.sum(predicted_labels == y[:1000]) / len(predicted_labels)

In [22]:
# Print the predicted labels and corresponding actual labels
print("Predicted Labels\tActual Labels")
for i in range(len(predicted_letters)):
    print(f"{predicted_letters[i]}\t\t\t{actual_letters[i]}")

Predicted Labels	Actual Labels
D			D
D			D
V			V
B			B
H			H
N			N
R			E
Q			Q
X			R
N			N
Q			Q
O			O
N			N
D			D
I			I
M			M
U			U
S			S
O			O
A			A
X			X
A			A
K			K
S			S
O			O
V			V
Y			Y
J			J
D			D
V			V
N			D
V			V
K			K
Y			F
N			N
I			I
G			G
T			T
H			H
K			K
J			J
O			O
M			M
T			T
B			B
Q			Q
Z			Z
C			C
D			D
X			X
K			C
G			G
Q			Q
F			F
N			N
Y			Y
Z			Z
K			K
C			C
E			T
M			M
W			V
Q			G
R			M
D			D
T			T
H			H
P			P
N			N
C			C
O			O
X			X
X			X
Q			Q
V			V
F			P
V			V
G			G
W			W
O			H
P			P
O			O
I			I
G			G
X			X
Y			X
Y			P
W			W
O			Q
D			D
D			N
Q			Q
T			T
T			T
Y			V
Y			Y
I			T
V			V
J			J
M			U
D			D
L			L
M			M
Y			Y
E			E
M			M
A			A
Y			Y
K			K
Y			Y
U			U
U			U
U			U
K			C
Q			Q
I			I
A			A
E			E
D			D
T			T
S			P
Q			Q
Z			Z
G			G
X			X
G			G
F			F
M			M
S			S
E			E
E			E
K			K
Z			Z
K			X
O			O
E			E
U			U
I			I
R			R
O			O
M			M
J			J
F			F
K			K
M			M
N			N
G			T
D			D
H			H
R			R
L			L
Z			Z
F			F
V			V
W			W
E			E
T			T
W			W
Y			Y
Q			Q
L			L
F		