In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score, classification_report


In [3]:
file_path = r"Alphabets_data.csv"
df = pd.read_csv(file_path)
label_encoder = LabelEncoder()
df["letter"] = label_encoder.fit_transform(df["letter"])  # Convert A-Z to 0-25
scaler = MinMaxScaler()
X = scaler.fit_transform(df.drop(columns=["letter"]))
y = df["letter"]  # Encoded labels
y = to_categorical(y, num_classes=26)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [4]:
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),  # First hidden layer
    Dense(64, activation='relu'),  # Second hidden layer
    Dense(26, activation='softmax')  # Output layer (26 classes for letters A-Z)
])
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test), verbose=1)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.2543 - loss: 2.7480 - val_accuracy: 0.6037 - val_loss: 1.5075
Epoch 2/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.6000 - loss: 1.4267 - val_accuracy: 0.6650 - val_loss: 1.2235
Epoch 3/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.6669 - loss: 1.2006 - val_accuracy: 0.6950 - val_loss: 1.1102
Epoch 4/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7001 - loss: 1.0939 - val_accuracy: 0.7063 - val_loss: 1.0323
Epoch 5/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7199 - loss: 1.0075 - val_accuracy: 0.7372 - val_loss: 0.9529
Epoch 6/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7356 - loss: 0.9404 - val_accuracy: 0.7465 - val_loss: 0.8965
Epoch 7/20
[1m500/500[0m 

In [5]:
y_pred = np.argmax(model.predict(X_test), axis=1)
y_true = np.argmax(y_test, axis=1)  # Convert one-hot encoding back to label indices
accuracy = accuracy_score(y_true, y_pred)
print(f"Test Accuracy: {accuracy:.3f}")
print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))


[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step   
Test Accuracy: 0.848
              precision    recall  f1-score   support

           A       0.83      0.94      0.88       158
           B       0.86      0.83      0.84       153
           C       0.93      0.80      0.86       147
           D       0.82      0.86      0.84       161
           E       0.75      0.86      0.80       154
           F       0.78      0.85      0.81       155
           G       0.70      0.79      0.74       155
           H       0.78      0.66      0.71       147
           I       0.94      0.81      0.87       151
           J       0.91      0.87      0.89       149
           K       0.80      0.81      0.81       148
           L       0.88      0.91      0.90       152
           M       0.89      0.97      0.93       158
           N       0.95      0.89      0.92       157
           O       0.84      0.72      0.77       151
           P       0.93      0.85   

In [6]:
neurons_list = [64, 128, 256]
learning_rates = [0.001, 0.0005]
best_score = 0
best_params = {}
for neurons in neurons_list:
    for lr in learning_rates:
        print(f"Training with neurons={neurons}, learning_rate={lr}")
        model = Sequential([
            Dense(neurons, activation='relu', input_shape=(X_train.shape[1],)),
            Dense(neurons // 2, activation='relu'),
            Dense(26, activation='softmax')  # 26 output classes
        ])
        optimizer = Adam(learning_rate=lr)
        model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
        history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test), verbose=0)
        _, accuracy = model.evaluate(X_test, y_test, verbose=0)
        print(f"Accuracy: {accuracy:.4f}\n")
        if accuracy > best_score:
            best_score = accuracy
            best_params = {'neurons': neurons, 'learning_rate': lr}
print(f"\nBest Parameters: {best_params}, Best Accuracy: {best_score:.4f}")


Training with neurons=64, learning_rate=0.001


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Accuracy: 0.7383

Training with neurons=64, learning_rate=0.0005
Accuracy: 0.7003

Training with neurons=128, learning_rate=0.001
Accuracy: 0.7883

Training with neurons=128, learning_rate=0.0005
Accuracy: 0.7462

Training with neurons=256, learning_rate=0.001
Accuracy: 0.8465

Training with neurons=256, learning_rate=0.0005
Accuracy: 0.7862


Best Parameters: {'neurons': 256, 'learning_rate': 0.001}, Best Accuracy: 0.8465


In [7]:
best_neurons = best_params['neurons']
best_lr = best_params['learning_rate']
print(f"Training best model with neurons={best_neurons}, learning_rate={best_lr}")
best_model = Sequential([
    Dense(best_neurons, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(best_neurons // 2, activation='relu'),
    Dense(26, activation='softmax')  # Output layer for 26 classes
])
optimizer = Adam(learning_rate=best_lr)
best_model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
best_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test), verbose=1)
y_pred_best = np.argmax(best_model.predict(X_test), axis=1)
y_true_best = np.argmax(y_test, axis=1)
accuracy_best = accuracy_score(y_true_best, y_pred_best)
print(f"Best Model Accuracy: {accuracy_best:.3f}")
print(classification_report(y_true_best, y_pred_best, target_names=label_encoder.classes_))


Training best model with neurons=256, learning_rate=0.001
Epoch 1/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.3051 - loss: 2.5744 - val_accuracy: 0.6198 - val_loss: 1.2882
Epoch 2/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.6578 - loss: 1.2264 - val_accuracy: 0.7063 - val_loss: 1.0550
Epoch 3/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7214 - loss: 1.0223 - val_accuracy: 0.7638 - val_loss: 0.9005
Epoch 4/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7522 - loss: 0.9085 - val_accuracy: 0.7682 - val_loss: 0.8576
Epoch 5/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7624 - loss: 0.8427 - val_accuracy: 0.7670 - val_loss: 0.8041
Epoch 6/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.7788 - loss: 0.7742 - val_accura

In [8]:
df = pd.read_csv(r"Alphabets_data.csv")
df.info(), df.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 17 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   letter  20000 non-null  object
 1   xbox    20000 non-null  int64 
 2   ybox    20000 non-null  int64 
 3   width   20000 non-null  int64 
 4   height  20000 non-null  int64 
 5   onpix   20000 non-null  int64 
 6   xbar    20000 non-null  int64 
 7   ybar    20000 non-null  int64 
 8   x2bar   20000 non-null  int64 
 9   y2bar   20000 non-null  int64 
 10  xybar   20000 non-null  int64 
 11  x2ybar  20000 non-null  int64 
 12  xy2bar  20000 non-null  int64 
 13  xedge   20000 non-null  int64 
 14  xedgey  20000 non-null  int64 
 15  yedge   20000 non-null  int64 
 16  yedgex  20000 non-null  int64 
dtypes: int64(16), object(1)
memory usage: 2.6+ MB


(None,
   letter  xbox  ybox  width  height  onpix  xbar  ybar  x2bar  y2bar  xybar  \
 0      T     2     8      3       5      1     8    13      0      6      6   
 1      I     5    12      3       7      2    10     5      5      4     13   
 2      D     4    11      6       8      6    10     6      2      6     10   
 3      N     7    11      6       6      3     5     9      4      6      4   
 4      G     2     1      3       1      1     8     6      6      6      6   
 
    x2ybar  xy2bar  xedge  xedgey  yedge  yedgex  
 0      10       8      0       8      0       8  
 1       3       9      2       8      4      10  
 2       3       7      3       7      3       9  
 3       4      10      6      10      2       8  
 4       5       9      1       7      5      10  )