In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report

df = pd.read_csv('Alphabets_data.csv')
df.head(5)


Unnamed: 0,letter,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10


In [14]:
# 1. Data Exploration and Preprocessing
print("Dataframe shape:", df.shape)
print("Number of unique classes:", df["letter"].nunique())

# Handling missing values (if any)
df.dropna(inplace=True)

# Separate features (X) and target variable (y)
X = df.drop('letter', axis=1)
y = df['letter']

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


Dataframe shape: (20000, 17)
Number of unique classes: 26


In [15]:
# 2. Model Implementation
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Initialize the basic ANN model
mlp_base = MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, random_state=42)  # Default parameters
mlp_base.fit(X_train, y_train)
y_pred_base = mlp_base.predict(X_test)



In [16]:
# 3. Hyperparameter Tuning (Grid Search example)
from sklearn.model_selection import GridSearchCV
parameter_space = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}
mlp_tuned = MLPClassifier(max_iter=300,random_state=42)
clf = GridSearchCV(mlp_tuned, parameter_space, n_jobs=-1, cv=3)
clf.fit(X_train, y_train)




In [17]:
# 4. Evaluation
print("Classification Report for the baseline model:")
print(classification_report(y_test, y_pred_base))

y_pred_tuned = clf.predict(X_test)
print("Classification Report for the tuned model:")
print(classification_report(y_test, y_pred_tuned))

Classification Report for the baseline model:
              precision    recall  f1-score   support

           A       0.97      0.99      0.98       149
           B       0.91      0.95      0.93       153
           C       0.96      0.93      0.95       137
           D       0.96      0.97      0.96       156
           E       0.93      0.96      0.94       141
           F       0.94      0.95      0.94       140
           G       0.94      0.93      0.93       160
           H       0.95      0.85      0.90       144
           I       0.98      0.95      0.97       146
           J       0.95      0.97      0.96       149
           K       0.89      0.93      0.91       130
           L       0.95      0.97      0.96       155
           M       0.99      0.98      0.98       168
           N       0.96      0.94      0.95       151
           O       0.93      0.99      0.96       145
           P       0.98      0.96      0.97       173
           Q       0.98      0.97  