In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.impute import SimpleImputer
import keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import classification_report,accuracy_score,precision_score,recall_score,f1_score
import warnings
warnings.filterwarnings('ignore')

# Data Exploration and Pre-Processing

In [3]:
df=pd.read_csv('Alphabets_data.csv')
df.head()

Unnamed: 0,letter,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10


In [4]:
df.shape

(20000, 17)

In [5]:
df.columns

Index(['letter', 'xbox', 'ybox', 'width', 'height', 'onpix', 'xbar', 'ybar',
       'x2bar', 'y2bar', 'xybar', 'x2ybar', 'xy2bar', 'xedge', 'xedgey',
       'yedge', 'yedgex'],
      dtype='object')

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 17 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   letter  20000 non-null  object
 1   xbox    20000 non-null  int64 
 2   ybox    20000 non-null  int64 
 3   width   20000 non-null  int64 
 4   height  20000 non-null  int64 
 5   onpix   20000 non-null  int64 
 6   xbar    20000 non-null  int64 
 7   ybar    20000 non-null  int64 
 8   x2bar   20000 non-null  int64 
 9   y2bar   20000 non-null  int64 
 10  xybar   20000 non-null  int64 
 11  x2ybar  20000 non-null  int64 
 12  xy2bar  20000 non-null  int64 
 13  xedge   20000 non-null  int64 
 14  xedgey  20000 non-null  int64 
 15  yedge   20000 non-null  int64 
 16  yedgex  20000 non-null  int64 
dtypes: int64(16), object(1)
memory usage: 2.6+ MB


In [7]:
df.describe()

Unnamed: 0,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
count,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0
mean,4.02355,7.0355,5.12185,5.37245,3.50585,6.8976,7.50045,4.6286,5.17865,8.28205,6.454,7.929,3.0461,8.33885,3.69175,7.8012
std,1.913212,3.304555,2.014573,2.26139,2.190458,2.026035,2.325354,2.699968,2.380823,2.488475,2.63107,2.080619,2.332541,1.546722,2.567073,1.61747
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,3.0,5.0,4.0,4.0,2.0,6.0,6.0,3.0,4.0,7.0,5.0,7.0,1.0,8.0,2.0,7.0
50%,4.0,7.0,5.0,6.0,3.0,7.0,7.0,4.0,5.0,8.0,6.0,8.0,3.0,8.0,3.0,8.0
75%,5.0,9.0,6.0,7.0,5.0,8.0,9.0,6.0,7.0,10.0,8.0,9.0,4.0,9.0,5.0,9.0
max,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0


In [8]:
# check for missing values
df.isnull().sum()

letter    0
xbox      0
ybox      0
width     0
height    0
onpix     0
xbar      0
ybar      0
x2bar     0
y2bar     0
xybar     0
x2ybar    0
xy2bar    0
xedge     0
xedgey    0
yedge     0
yedgex    0
dtype: int64

In [9]:
# No missing values.

## Data Normalization

In [11]:
# standard scaler 
scaler=StandardScaler()

In [12]:
X = df.drop("letter", axis=1) # features
y = df["letter"] # target colums
print(X.shape)
print(y.shape)

(20000, 16)
(20000,)


In [13]:
X_scaled=scaler.fit_transform(X)

In [14]:
# convert target colums to numeric using label encoder
le=LabelEncoder()
y=le.fit_transform(y)

In [15]:
# SPlit the datset into train and test

In [21]:
x_train,x_test,y_train,y_test=train_test_split(X_scaled,y,train_size=0.8,random_state=50)

In [31]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(16000, 16)
(4000, 16)
(16000,)
(4000,)


# ANN Model Implementation

In [34]:
# Creating a basic ANN model.

In [36]:
model = Sequential()
model.add(Dense(64, input_dim=x_train.shape[1], activation='relu'))  # Hidden Layer 1
model.add(Dense(32, activation='relu'))  # Hidden Layer 2
model.add(Dense(26, activation='softmax')) # 26 output layers for alphabets
model.summary()# Output Layer (for classification)

In [38]:
# Compile the model

model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [40]:
# train the model

In [42]:
history=model.fit(x_train,y_train,epochs=10,batch_size=32,validation_data=(x_test,y_test))

Epoch 1/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.3127 - loss: 2.5140 - val_accuracy: 0.7017 - val_loss: 1.0952
Epoch 2/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 851us/step - accuracy: 0.7225 - loss: 0.9909 - val_accuracy: 0.7763 - val_loss: 0.7986
Epoch 3/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 835us/step - accuracy: 0.7893 - loss: 0.7357 - val_accuracy: 0.8098 - val_loss: 0.6648
Epoch 4/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 873us/step - accuracy: 0.8256 - loss: 0.5949 - val_accuracy: 0.8325 - val_loss: 0.5797
Epoch 5/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 873us/step - accuracy: 0.8466 - loss: 0.5256 - val_accuracy: 0.8445 - val_loss: 0.5251
Epoch 6/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 996us/step - accuracy: 0.8585 - loss: 0.4652 - val_accuracy: 0.8620 - val_loss: 0.4714
Epoch 7/10
[1m500

## Evaluate the model

In [45]:
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100}%")
print(f"Test Loss: {test_loss * 100}%")

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 687us/step - accuracy: 0.8999 - loss: 0.3290
Test Accuracy: 89.30000066757202%
Test Loss: 35.63053011894226%


# Hyperparameter Tuning

In [48]:

# Function to build the model (required for GridSearchCV)
def build_model(hidden_units=32, activation='relu', optimizer='adam'):
    model = Sequential([
        Dense(hidden_units, activation=activation, input_dim=x_train.shape[1]),
        Dense(hidden_units // 2, activation=activation),
        Dense(26, activation='softmax')  # Output layer
    ])
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [50]:
param_grid = {
    'hidden_units': [32, 64, 128],
    'activation': ['relu', 'tanh'],
    'optimizer': ['adam', 'sgd'],
    'epochs': [10, 20],
    'batch_size': [16, 32]
}

In [52]:

# List to store results
results = []

# Perform grid search manually
for hidden_units in param_grid['hidden_units']:
    for activation in param_grid['activation']:
        for optimizer in param_grid['optimizer']:
            for epochs in param_grid['epochs']:
                for batch_size in param_grid['batch_size']:
                    print(f"Testing combination: hidden_units={hidden_units}, activation={activation}, "
                          f"optimizer={optimizer}, epochs={epochs}, batch_size={batch_size}")
                    
                    # Build and train model
                    model = build_model(hidden_units, activation, optimizer)
                    history = model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0, validation_split=0.2)
                    
                    # Evaluate on the test set
                    test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=0)
                    
                    # Store the result
                    results.append({
                        'hidden_units': hidden_units,
                        'activation': activation,
                        'optimizer': optimizer,
                        'epochs': epochs,
                        'batch_size': batch_size,
                        'test_accuracy': test_accuracy
                    })

Testing combination: hidden_units=32, activation=relu, optimizer=adam, epochs=10, batch_size=16
Testing combination: hidden_units=32, activation=relu, optimizer=adam, epochs=10, batch_size=32
Testing combination: hidden_units=32, activation=relu, optimizer=adam, epochs=20, batch_size=16
Testing combination: hidden_units=32, activation=relu, optimizer=adam, epochs=20, batch_size=32
Testing combination: hidden_units=32, activation=relu, optimizer=sgd, epochs=10, batch_size=16
Testing combination: hidden_units=32, activation=relu, optimizer=sgd, epochs=10, batch_size=32
Testing combination: hidden_units=32, activation=relu, optimizer=sgd, epochs=20, batch_size=16
Testing combination: hidden_units=32, activation=relu, optimizer=sgd, epochs=20, batch_size=32
Testing combination: hidden_units=32, activation=tanh, optimizer=adam, epochs=10, batch_size=16
Testing combination: hidden_units=32, activation=tanh, optimizer=adam, epochs=10, batch_size=32
Testing combination: hidden_units=32, activa

In [54]:
# Find the best combination
best_combination = max(results, key=lambda x: x['test_accuracy'])

In [56]:
# Display results
print("Best Hyperparameter Combination:")
print(best_combination)

Best Hyperparameter Combination:
{'hidden_units': 128, 'activation': 'relu', 'optimizer': 'adam', 'epochs': 20, 'batch_size': 16, 'test_accuracy': 0.9497500061988831}


# Evaluation

In [59]:
# Predictions on the test set
y_pred = model.predict(x_test).argmax(axis=1)

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 602us/step


In [61]:
# Evaluation Metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

In [63]:
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")

Accuracy: 0.80
Precision: 0.81
Recall: 0.80
F1-Score: 0.80


In [65]:
# Classification Report
print("Classification Report:\n", classification_report(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.91      0.88       152
           1       0.71      0.80      0.75       159
           2       0.84      0.77      0.80       143
           3       0.79      0.85      0.82       161
           4       0.70      0.87      0.78       145
           5       0.81      0.80      0.80       158
           6       0.74      0.61      0.67       169
           7       0.85      0.63      0.72       142
           8       0.93      0.76      0.84       159
           9       0.91      0.81      0.86       149
          10       0.75      0.81      0.78       155
          11       0.85      0.82      0.83       131
          12       0.82      0.82      0.82       164
          13       0.91      0.86      0.89       154
          14       0.80      0.77      0.78       147
          15       0.90      0.85      0.87       194
          16       0.80      0.71      0.75       150
   

# ---- The End ----