In [14]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings("ignore")

In [2]:
df=pd.read_csv("Alphabets_data.csv")
df

Unnamed: 0,letter,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,D,2,2,3,3,2,7,7,7,6,6,6,4,2,8,3,7
19996,C,7,10,8,8,4,4,8,6,9,12,9,13,2,9,3,7
19997,T,6,9,6,7,5,6,11,3,7,11,9,5,2,12,2,4
19998,S,2,3,4,2,1,8,7,2,6,10,6,8,1,9,5,8


In [3]:
df.isna().sum()

letter    0
xbox      0
ybox      0
width     0
height    0
onpix     0
xbar      0
ybar      0
x2bar     0
y2bar     0
xybar     0
x2ybar    0
xy2bar    0
xedge     0
xedgey    0
yedge     0
yedgex    0
dtype: int64

In [5]:
# Summarize key features
print("Number of samples:", len(df))
print("Number of features:", len(df.columns) - 1)  # Excluding the target column
print("Classes:", df['letter'].unique())

# Normalize the data 
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df.drop('letter', axis=1))


Number of samples: 20000
Number of features: 16
Classes: ['T' 'I' 'D' 'N' 'G' 'S' 'B' 'A' 'J' 'M' 'X' 'O' 'R' 'F' 'C' 'H' 'W' 'L'
 'P' 'E' 'V' 'Y' 'Q' 'U' 'K' 'Z']


In [9]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(scaled_data, df['letter'], test_size=0.2, random_state=42)

# Initialize the ANN model
ann_model = MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam', random_state=42)

# Train the model
ann_model.fit(X_train, y_train)

# Make predictions
y_pred = ann_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.9565


In [12]:
# Define the parameter grid
param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (200,)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd'],
    'learning_rate': ['constant', 'adaptive'],
}

# Initialize the grid search
grid_search = GridSearchCV(MLPClassifier(random_state=42), param_grid, cv=3, scoring='accuracy')

# Perform grid search
grid_search.fit(X_train, y_train)

# Get the best parameters
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

# Train the model with the best parameters
best_ann_model = MLPClassifier(random_state=42, **best_params)
best_ann_model.fit(X_train, y_train)

# Make predictions with the tuned model
y_pred_tuned = best_ann_model.predict(X_test)

# Evaluate the tuned model
accuracy_tuned = accuracy_score(y_test, y_pred_tuned)
print("Tuned Model Accuracy:", accuracy_tuned)


Best Parameters: {'activation': 'tanh', 'hidden_layer_sizes': (200,), 'learning_rate': 'constant', 'solver': 'adam'}
Tuned Model Accuracy: 0.972


In [15]:
# Evaluation before tuning
print("Before Tuning:")
print(classification_report(y_test, y_pred))

# Evaluation after tuning
print("After Tuning:")
print(classification_report(y_test, y_pred_tuned))


Before Tuning:
              precision    recall  f1-score   support

           A       0.98      0.99      0.98       149
           B       0.91      0.94      0.93       153
           C       0.98      0.94      0.96       137
           D       0.95      0.97      0.96       156
           E       0.94      0.96      0.95       141
           F       0.94      0.94      0.94       140
           G       0.92      0.94      0.93       160
           H       0.93      0.85      0.89       144
           I       0.98      0.95      0.96       146
           J       0.95      0.97      0.96       149
           K       0.89      0.93      0.91       130
           L       0.97      0.97      0.97       155
           M       0.99      0.98      0.99       168
           N       0.97      0.93      0.95       151
           O       0.93      0.98      0.96       145
           P       0.98      0.95      0.96       173
           Q       0.98      0.96      0.97       166
           R