In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [2]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/letter-recognition/letter-recognition.data"
column_names = ["letter", "x-box", "y-box", "width", "height", "onpix","x-bar", "y-bar", "x2bar", "y2bar", "xybar", "x2ybr", "xy2br", "x-ege","xegvy", "y-ege", "yegvx"]
data = pd.read_csv(url, names=column_names)

In [3]:
X = data.drop('letter', axis=1)
y = data['letter']

In [4]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)

In [8]:
model = Sequential([
Dense(128, activation='relu', input_shape=(16,)),
Dense(64, activation='relu'),
Dense(26, activation='softmax') # 26 classes for letters A-Z
])

In [9]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [10]:
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)

Epoch 1/10
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.3162 - loss: 2.5406 - val_accuracy: 0.6969 - val_loss: 1.1360
Epoch 2/10
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7153 - loss: 1.0084 - val_accuracy: 0.7500 - val_loss: 0.8782
Epoch 3/10
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7724 - loss: 0.7931 - val_accuracy: 0.7894 - val_loss: 0.7475
Epoch 4/10
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7971 - loss: 0.6926 - val_accuracy: 0.7975 - val_loss: 0.6704
Epoch 5/10
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8255 - loss: 0.6117 - val_accuracy: 0.8356 - val_loss: 0.5759
Epoch 6/10
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8428 - loss: 0.5325 - val_accuracy: 0.8450 - val_loss: 0.5513
Epoch 7/10
[1m450/450[0m 

<keras.src.callbacks.history.History at 0x1dc46945390>

In [11]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 947us/step - accuracy: 0.8805 - loss: 0.3709
Test Accuracy: 88.20%


In [12]:
model.save('ocr_model.keras')

In [13]:
from tensorflow.keras.models import load_model
# Load the model
loaded_model = load_model('ocr_model.keras')

In [14]:
new_data = np.array([[2, 3, 5, 4, 7, 8, 3, 4, 5, 6, 7, 8, 3, 4, 5, 6]])
# Predict the class probabilities
predicted_probabilities = loaded_model.predict(new_data)
# Get the index of the class with the highest probability
predicted_class_index = np.argmax(predicted_probabilities)
# Convert the predicted class index back to original letter
predicted_letter = label_encoder.inverse_transform([predicted_class_index])
print(f"The predicted letter is: {predicted_letter[0]}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
The predicted letter is: Q
