In [37]:
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam, SGD, RMSprop
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from scikeras.wrappers import KerasClassifier
from functools import partial
import warnings
warnings.filterwarnings('ignore')

In [2]:
df=pd.read_csv("Alphabets_data.csv")
df

Unnamed: 0,letter,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,D,2,2,3,3,2,7,7,7,6,6,6,4,2,8,3,7
19996,C,7,10,8,8,4,4,8,6,9,12,9,13,2,9,3,7
19997,T,6,9,6,7,5,6,11,3,7,11,9,5,2,12,2,4
19998,S,2,3,4,2,1,8,7,2,6,10,6,8,1,9,5,8


In [65]:
# Data Exploration

In [3]:
df.isna().any()

letter    False
xbox      False
ybox      False
width     False
height    False
onpix     False
xbar      False
ybar      False
x2bar     False
y2bar     False
xybar     False
x2ybar    False
xy2bar    False
xedge     False
xedgey    False
yedge     False
yedgex    False
dtype: bool

In [4]:
df.isnull().any()

letter    False
xbox      False
ybox      False
width     False
height    False
onpix     False
xbar      False
ybar      False
x2bar     False
y2bar     False
xybar     False
x2ybar    False
xy2bar    False
xedge     False
xedgey    False
yedge     False
yedgex    False
dtype: bool

In [5]:
df.duplicated().any()

True

In [6]:
df.drop_duplicates(inplace=True)
df

Unnamed: 0,letter,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19994,T,5,8,7,7,7,7,9,4,8,7,7,8,3,10,8,6
19995,D,2,2,3,3,2,7,7,7,6,6,6,4,2,8,3,7
19996,C,7,10,8,8,4,4,8,6,9,12,9,13,2,9,3,7
19997,T,6,9,6,7,5,6,11,3,7,11,9,5,2,12,2,4


In [67]:
# Encoding labels

In [7]:
label_encoder = LabelEncoder()
df['letter'] = label_encoder.fit_transform(df['letter'])

In [69]:
# Splitting features and labels

In [8]:
X = df.drop(columns=['letter'])
y = df['letter']

In [None]:
# Standardization

In [9]:
scaler = StandardScaler()
scaler.fit(X)
X_standardized = scaler.transform(X)

In [71]:
# Splitting dataset into training and test sets

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X_standardized, y, test_size=0.2, random_state=42)

In [73]:
# Function to create model

In [11]:
def create_model(learning_rate=0.001, neurons=32, activation='relu', optimizer='adam', dropout_rate=0.2):
    model = Sequential([
        Dense(neurons, activation=activation, input_shape=(X_train.shape[1],)),
        Dropout(dropout_rate),
        Dense(neurons, activation=activation),
        Dropout(dropout_rate),
        Dense(len(np.unique(y)), activation='softmax')
    ])
    
    if optimizer == 'adam':
        opt = Adam(learning_rate=learning_rate)
    elif optimizer == 'sgd':
        opt = SGD(learning_rate=learning_rate)
    elif optimizer == 'rmsprop':
        opt = RMSprop(learning_rate=learning_rate)
    else:
        opt = Adam(learning_rate=learning_rate)
    
    model.compile(optimizer=opt,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model



In [None]:
# Wrapping model with KerasClassifier

In [31]:
model = KerasClassifier(model=partial(create_model), verbose=0)

In [None]:
# Function to tune each hyperparameter separately

In [33]:
def tune_hyperparameter(param_grid):
    grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, n_jobs=-1)
    grid_result = grid.fit(X_train, y_train)
    return grid_result.best_params_

In [55]:
best_learning_rate = tune_hyperparameter({'model__learning_rate': [0.001, 0.01, 0.1]})['model__learning_rate']
best_learning_rate

0.01

In [57]:
best_neurons = tune_hyperparameter({'model__neurons': [32, 64, 128]})['model__neurons']
best_neurons

128

In [59]:
best_activation = tune_hyperparameter({'model__activation': ['relu', 'tanh', 'sigmoid']})['model__activation']
best_activation

'tanh'

In [61]:
best_optimizer = tune_hyperparameter({'model__optimizer': ['adam', 'sgd', 'rmsprop']})['model__optimizer']
best_optimizer

'adam'

In [63]:
best_dropout = tune_hyperparameter({'model__dropout_rate': [0.2, 0.3, 0.4]})['model__dropout_rate']
best_dropout

0.2

In [None]:
# Training the final model with best parameters

In [49]:
final_model = create_model(
    best_learning_rate, best_neurons, best_activation,
    best_optimizer, best_dropout
)

In [75]:
# Training with optimal batch size and epochs

In [51]:
final_model.fit(X_train, y_train, epochs=30, batch_size=32, verbose=1)

Epoch 1/30
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - accuracy: 0.5905 - loss: 1.3847
Epoch 2/30
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.7637 - loss: 0.7826
Epoch 3/30
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.7904 - loss: 0.6873
Epoch 4/30
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.8073 - loss: 0.6534
Epoch 5/30
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.8097 - loss: 0.6454
Epoch 6/30
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.8261 - loss: 0.5831
Epoch 7/30
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.8250 - loss: 0.6106
Epoch 8/30
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.8292 - loss: 0.5871
Epoch 9/30
[1m467/467[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x26caa98adb0>

In [77]:
# Evaluation

In [53]:
y_pred = np.argmax(final_model.predict(X_test), axis=1)
accuracy = accuracy_score(y_test, y_pred)
print("Final Model Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))


[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
Final Model Accuracy: 0.9094804499196572
Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.98      0.97       144
           1       0.87      0.83      0.85       156
           2       0.93      0.94      0.93       147
           3       0.92      0.85      0.88       163
           4       0.85      0.93      0.89       144
           5       0.85      0.96      0.90       146
           6       0.95      0.77      0.85       173
           7       0.81      0.83      0.82       133
           8       0.92      0.97      0.95       108
           9       0.97      0.89      0.93       150
          10       0.86      0.93      0.89       149
          11       0.94      0.93      0.94       147
          12       0.92      0.94      0.93       143
          13       0.94      0.92      0.93       138
          14       0.88      0.91      0.89       