In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


In [2]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/letter-recognition/letter-recognition.data"

# Column names based on UCI documentation
columns = ['letter', 'x-box', 'y-box', 'width', 'high', 'onpix', 'x-bar', 'y-bar',
           'x2bar', 'y2bar', 'xybar', 'x2ybr', 'xy2br', 'x-ege', 'xegvy', 'y-ege', 'yegvx']

df = pd.read_csv(url, names=columns)

print(df.head())


  letter  x-box  y-box  width  high  onpix  x-bar  y-bar  x2bar  y2bar  xybar  \
0      T      2      8      3     5      1      8     13      0      6      6   
1      I      5     12      3     7      2     10      5      5      4     13   
2      D      4     11      6     8      6     10      6      2      6     10   
3      N      7     11      6     6      3      5      9      4      6      4   
4      G      2      1      3     1      1      8      6      6      6      6   

   x2ybr  xy2br  x-ege  xegvy  y-ege  yegvx  
0     10      8      0      8      0      8  
1      3      9      2      8      4     10  
2      3      7      3      7      3      9  
3      4     10      6     10      2      8  
4      5      9      1      7      5     10  


In [3]:
# Split features and target
X = df.drop('letter', axis=1)
y = df['letter']

# Encode labels to 0-25
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# One-hot encode target
y_categorical = to_categorical(y_encoded)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [4]:
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(26, activation='softmax')  # 26 classes for A-Z
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [5]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


In [6]:
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1)


Epoch 1/20
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.4286 - loss: 2.1562 - val_accuracy: 0.7494 - val_loss: 0.8448
Epoch 2/20
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7797 - loss: 0.7505 - val_accuracy: 0.8294 - val_loss: 0.5820
Epoch 3/20
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8499 - loss: 0.5414 - val_accuracy: 0.8687 - val_loss: 0.4603
Epoch 4/20
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8744 - loss: 0.4322 - val_accuracy: 0.8775 - val_loss: 0.3985
Epoch 5/20
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8966 - loss: 0.3621 - val_accuracy: 0.8988 - val_loss: 0.3311
Epoch 6/20
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9094 - loss: 0.3073 - val_accuracy: 0.9150 - val_loss: 0.2948
Epoch 7/20
[1m450/450[0m 

In [7]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")


[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 914us/step - accuracy: 0.9497 - loss: 0.1578
Test Accuracy: 0.9475


In [8]:
import numpy as np

predictions = model.predict(X_test[:5])
predicted_classes = np.argmax(predictions, axis=1)
actual_classes = np.argmax(y_test[:5], axis=1)

print("Predicted:", label_encoder.inverse_transform(predicted_classes))
print("Actual:   ", label_encoder.inverse_transform(actual_classes))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Predicted: ['X' 'L' 'A' 'E' 'Q']
Actual:    ['T' 'L' 'A' 'E' 'Q']


In [14]:
# # Predict class probabilities
# y_pred_probs = model.predict(X_test)

# # Convert predictions and true labels from one-hot to class labels
# y_pred_classes = np.argmax(y_pred_probs, axis=1)
# y_true_classes = np.argmax(y_test, axis=1)


In [13]:
# from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# # Accuracy
# acc = accuracy_score(y_true_classes, y_pred_classes)

# # Precision, Recall, F1-score (macro, micro, weighted)
# precision_macro = precision_score(y_true_classes, y_pred_classes, average='macro')
# recall_macro = recall_score(y_true_classes, y_pred_classes, average='macro')
# f1_macro = f1_score(y_true_classes, y_pred_classes, average='macro')

# # Full classification report (includes per-class metrics)
# report = classification_report(y_true_classes, y_pred_classes, target_names=label_encoder.classes_)

# # Confusion matrix
# conf_matrix = confusion_matrix(y_true_classes, y_pred_classes)


In [17]:
# print(f"Accuracy: {acc:.4f}")
# print(f"Macro Precision: {precision_macro:.4f}")
# print(f"Macro Recall: {recall_macro:.4f}")
# print(f"Macro F1 Score: {f1_macro:.4f}")

# print("\nClassification Report:")
# print(report)

# print("\nConfusion Matrix:")
# print(conf_matrix)
