In [13]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

In [15]:
# Load the OCR Letter Recognition dataset
data = fetch_openml(name='letter', version=1, as_frame=True, parser='auto')

In [16]:
# Split the dataset into features and target
X = data.data
y = data.target

In [17]:
# Preprocess the data
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
y = to_categorical(y)

In [18]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
# Scale the data for better convergence
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [20]:
# Define the model
model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dense(64, activation='relu'))
model.add(Dense(len(label_encoder.classes_), activation='softmax'))

In [21]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [22]:
# Train the model
model.fit(X_train, y_train, batch_size=32, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1c95ed0d410>

In [23]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print('Test Loss:', loss)
print('Test Accuracy:', accuracy)

Test Loss: 0.1942858248949051
Test Accuracy: 0.9397500157356262


In [24]:
# Make predictions
predictions = model.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)
predicted_letters = label_encoder.inverse_transform(predicted_labels)



In [25]:
# Print some predicted and actual letters
for i in range(10):
    predicted_label = predicted_letters[i]
    actual_label = label_encoder.inverse_transform([np.argmax(y_test[i])])
    print('Predicted:', predicted_label, 'Actual:', actual_label)

Predicted: K Actual: ['K']
Predicted: I Actual: ['I']
Predicted: I Actual: ['I']
Predicted: C Actual: ['O']
Predicted: T Actual: ['T']
Predicted: D Actual: ['D']
Predicted: Y Actual: ['Y']
Predicted: N Actual: ['N']
Predicted: X Actual: ['X']
Predicted: E Actual: ['E']
