### Classification Using Artificial Neural Networks with Hyperparameter Tuning on Alphabets Data
#### Overview
##### In this assignment, you will be tasked with developing a classification model using Artificial Neural Networks (ANNs) to classify data points from the "Alphabets_data.csv" dataset into predefined categories of alphabets. This exercise aims to deepen your understanding of ANNs and the significant role hyperparameter tuning plays in enhancing model performance.

#### Data Exploration and Preprocessing

In [8]:
import pandas as pd
import numpy as np
# Load the dataset
df = pd.read_csv('Alphabets_data.csv')
df.head()

Unnamed: 0,letter,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 17 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   letter  20000 non-null  object
 1   xbox    20000 non-null  int64 
 2   ybox    20000 non-null  int64 
 3   width   20000 non-null  int64 
 4   height  20000 non-null  int64 
 5   onpix   20000 non-null  int64 
 6   xbar    20000 non-null  int64 
 7   ybar    20000 non-null  int64 
 8   x2bar   20000 non-null  int64 
 9   y2bar   20000 non-null  int64 
 10  xybar   20000 non-null  int64 
 11  x2ybar  20000 non-null  int64 
 12  xy2bar  20000 non-null  int64 
 13  xedge   20000 non-null  int64 
 14  xedgey  20000 non-null  int64 
 15  yedge   20000 non-null  int64 
 16  yedgex  20000 non-null  int64 
dtypes: int64(16), object(1)
memory usage: 2.6+ MB


In [10]:
# Summarize key features
print("Number of samples:", len(df))
print("Number of features:", len(df.columns) - 1)  # Excluding the target column
print("Number of classes:", len(df['letter'].unique()))

Number of samples: 20000
Number of features: 16
Number of classes: 26


In [11]:
# Split features and target variable
X = df.iloc[:,1:16]
y = df['letter']

In [12]:
### Normalization
from sklearn.preprocessing import StandardScaler
# Scale the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

### label encoding
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [13]:
#### Split into training and testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20, random_state=41)
X_train.shape, X_test.shape, y_train.shape, y_test.shape 

((16000, 15), (4000, 15), (16000,), (4000,))

### Model Implementation

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Initialize the ANN
model = Sequential()

# Add the input layer and the first hidden layer
model.add(Dense(units=16, activation='relu', input_dim=X_train.shape[1]))

# Add the second hidden layer (optional)
model.add(Dense(units=8, activation='relu'))

# Add the output layer
model.add(Dense(units=len(np.unique(y)), activation='softmax'))

# Compile the ANN
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Summary of the model
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [18]:
# Train the model on the training set
history = model.fit(X_train, y_train, epochs=50, batch_size=10, validation_split=0.1)

Epoch 1/50
[1m1440/1440[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.1551 - loss: 2.8232 - val_accuracy: 0.5138 - val_loss: 1.6126
Epoch 2/50
[1m1440/1440[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.5505 - loss: 1.4812 - val_accuracy: 0.6025 - val_loss: 1.2772
Epoch 3/50
[1m1440/1440[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.6288 - loss: 1.2077 - val_accuracy: 0.6456 - val_loss: 1.1296
Epoch 4/50
[1m1440/1440[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.6663 - loss: 1.0840 - val_accuracy: 0.6956 - val_loss: 1.0346
Epoch 5/50
[1m1440/1440[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.7015 - loss: 0.9882 - val_accuracy: 0.7144 - val_loss: 0.9680
Epoch 6/50
[1m1440/1440[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.7240 - loss: 0.9379 - val_accuracy: 0.7362 - val_loss: 0.9263
Epoch 7/50
[1m1

In [19]:
### evaluate the model
# Evaluate the model on the train set
loss, accuracy = model.evaluate(X_train, y_train)
print(f'train Accuracy: {accuracy * 100:.2f}%')

# Make predictions on the train set
yhat_train = model.predict(X_train)
y_pred_classes = yhat_train.argmax(axis=-1)

from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(y_train, y_pred_classes ))
print(classification_report(y_train, y_pred_classes ))

[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8155 - loss: 0.6019
train Accuracy: 81.24%
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[[554   5   0   0   0   3  12   3   1   1   5   1   1   0   0   5   5   8
    5   2   6   6   5   4   1   0]
 [  1 537   0  17   0   7   2   1   3   1   2   0   0   0   0   2   1  20
   20   0   0   1   0   1   2   1]
 [  0   0 473   0  45   2  53   4   0   0   3   1   2   0   6   0   0   0
    2   3  13   1   2   0   0   0]
 [  0  48   0 488   5   9   9   5   3   4   3   1   4  14   4   8   0  28
   12   2   1   0   0   0   0   0]
 [  1   6   4   2 452   3  43   2   3   0   3   0   0   0   0   0   9   2
   16  10   0   0   0  18   4  28]
 [  0  12   0   4   2 516   1   1  24   1   0   0   0   1   0   9   2   0
   14  16   0   0   4   1  10   0]
 [  2   3  50   5   5   7 425   2   1   4   9   6   3   0  20   6  34   3
    6   0   1   5   7   0   6   4]
 [  4   9   7  25   0   7   6 

In [20]:
# Make predictions on the test set
yhat_test = model.predict(X_test)
y_pred_classes1 = yhat_test.argmax(axis=-1)

from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(y_test, y_pred_classes1 ))
print(classification_report(y_test, y_pred_classes1 ))

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[[141   2   0   0   0   1   3   0   0   0   0   0   0   0   0   0   2   0
    0   0   2   2   0   0   3   0]
 [  0 116   0   3   1   0   2   0   1   0   1   0   0   1   0   2   0  12
    7   0   0   0   0   0   1   0]
 [  0   0  93   0   6   2  15   1   0   0   1   0   1   0   3   1   0   0
    0   0   3   0   0   0   0   0]
 [  0  11   0 111   2   2   3   1   1   1   1   3   2   4   2   1   0   6
    6   0   0   0   0   0   0   0]
 [  0   2   1   2 122   2   7   1   3   0   2   0   0   0   0   0   3   1
    3   3   0   0   0   5   0   5]
 [  0   6   0   1   1 131   1   1   5   0   0   0   0   0   0   2   0   0
    1   3   0   0   0   0   5   0]
 [  1   0  11   1   0   4 117   0   1   0   3   2   1   0   5   1   4   0
    2   0   1   2   2   0   0   1]
 [  3   1   0   4   0   1   1  98   1   0   4   0   1   5   8   2   0  12
    0   1   2   2   0   4   1   0]
 [  0   2   0   0   0   2   0   0 130   1   0   0   0

### Hyperparameter Tuning

In [35]:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import ParameterGrid

def create_model(hidden_layers=1, units=16, activation='relu', learning_rate=0.001):  # Model Creation Function
    model = Sequential()
    model.add(Dense(units=units, activation=activation, input_dim=X_train.shape[1]))

    for _ in range(hidden_layers - 1):
        model.add(Dense(units=units, activation=activation))

    model.add(Dense(units=len(np.unique(y)), activation='softmax'))
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [36]:
def evaluate_model(hidden_layers, units, activation, learning_rate):        #Model Evaluation Function
    model = create_model(hidden_layers=hidden_layers, units=units, activation=activation, learning_rate=learning_rate)
    model.fit(X_train, y_train, epochs=10, batch_size=10, verbose=0)
    _, accuracy = model.evaluate(X_test, y_test, verbose=0)
    return accuracy

In [37]:
#Perform Hyperparameter Tuning
param_grid = {                                        
    'hidden_layers': [1, 2, 3],
    'units': [8, 16, 32],
    'activation': ['relu', 'tanh'],
    'learning_rate': [0.001, 0.01]
}

best_score = 0
best_params = {}

for params in ParameterGrid(param_grid):
    score = evaluate_model(**params)
    print(f"Params: {params} - Score: {score}")
    if score > best_score:
        best_score = score
        best_params = params

print(f"Best Score: {best_score}")
print(f"Best Params: {best_params}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Params: {'activation': 'relu', 'hidden_layers': 1, 'learning_rate': 0.001, 'units': 8} - Score: 0.7092499732971191
Params: {'activation': 'relu', 'hidden_layers': 1, 'learning_rate': 0.001, 'units': 16} - Score: 0.7817500233650208
Params: {'activation': 'relu', 'hidden_layers': 1, 'learning_rate': 0.001, 'units': 32} - Score: 0.8424999713897705
Params: {'activation': 'relu', 'hidden_layers': 1, 'learning_rate': 0.01, 'units': 8} - Score: 0.7319999933242798
Params: {'activation': 'relu', 'hidden_layers': 1, 'learning_rate': 0.01, 'units': 16} - Score: 0.800000011920929
Params: {'activation': 'relu', 'hidden_layers': 1, 'learning_rate': 0.01, 'units': 32} - Score: 0.8659999966621399
Params: {'activation': 'relu', 'hidden_layers': 2, 'learning_rate': 0.001, 'units': 8} - Score: 0.7045000195503235
Params: {'activation': 'relu', 'hidden_layers': 2, 'learning_rate': 0.001, 'units': 16} - Score: 0.7952499985694885
Params: {'activation': 'relu', 'hidden_layers': 2, 'learning_rate': 0.001, 'uni

In [38]:
# Best parameters from the tuning process
best_hidden_layers = best_params['hidden_layers']
best_units = best_params['units']
best_activation = best_params['activation']
best_learning_rate = best_params['learning_rate']

# Create the final model with the best parameters
final_model = create_model(
    hidden_layers=best_hidden_layers,
    units=best_units,
    activation=best_activation,
    learning_rate=best_learning_rate
)

# Train the final model on the full training data
final_model.fit(X_train, y_train, epochs=10, batch_size=10, verbose=1)


Epoch 1/10
[1m1600/1600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.4213 - loss: 2.1376
Epoch 2/10
[1m1600/1600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.7426 - loss: 0.9408
Epoch 3/10
[1m1600/1600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.7995 - loss: 0.7230
Epoch 4/10
[1m1600/1600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.8292 - loss: 0.6010
Epoch 5/10
[1m1600/1600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.8470 - loss: 0.5338
Epoch 6/10
[1m1600/1600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.8649 - loss: 0.4719
Epoch 7/10
[1m1600/1600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.8815 - loss: 0.4285
Epoch 8/10
[1m1600/1600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.8870 - loss: 0.3903
Epoch 9/10
[1m1600/1600

<keras.src.callbacks.history.History at 0x28a81426990>

In [39]:
# Evaluate the final model on the test data
loss, accuracy = final_model.evaluate(X_test, y_test, verbose=1)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8870 - loss: 0.3711
Test Loss: 0.36852511763572693
Test Accuracy: 0.8880000114440918


In [40]:
# Make predictions on the train set
yhat_train = final_model.predict(X_train)
y_pred_classes = yhat_train.argmax(axis=-1)

from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(y_train, y_pred_classes ))
print(classification_report(y_train, y_pred_classes ))

[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 948us/step
[[603   1   0   0   0   1   6   1   0   4   0   1   1   0   1   3   2   3
    0   0   0   0   1   0   3   2]
 [  0 573   0  13   0   2   1   5   0   0   0   0   2   1   0   0   0  12
    5   0   0   1   0   3   1   0]
 [  0   0 549   0  13   0  22   0   0   0   3   1   0   0  11   0   0   4
    0   0   7   0   0   0   0   0]
 [  0  16   0 562   0   1   3  12   0   2   2   0   1   4   9   0   0  23
    5   3   3   0   0   2   0   0]
 [  1   5   1   0 527   8  16   2   1   0   0   4   0   0   0   0   9   2
    5   8   0   0   0   5   1  11]
 [  0   5   0   3   4 549   0   3   4   3   0   0   0   0   3  22   1   0
    8  10   0   0   0   0   2   1]
 [  1   4   0   6   2   1 548   0   1   4   3   1   2   0  14   0  15   4
    1   0   1   2   4   0   0   0]
 [  2  13   2  14   0   0   2 487   0   5   3   0   3   4   6   4   6  27
    0   0   3   0   0   2   0   0]
 [  3   4   0   1   0   5   0   0 568  18   0   2  

In [41]:
# Make predictions on the test set
yhat_test = final_model.predict(X_test)
y_pred_classes1 = yhat_test.argmax(axis=-1)

from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(y_test, y_pred_classes1 ))
print(classification_report(y_test, y_pred_classes1 ))

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[[152   1   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   1   0   0   1   0]
 [  2 125   0   5   0   0   0   2   1   1   1   0   0   0   0   0   0   8
    1   0   1   0   0   0   0   0]
 [  2   0 102   1   5   0   7   0   0   0   1   0   0   0   5   0   0   0
    0   1   2   0   0   0   0   0]
 [  0   9   0 124   1   0   0   8   0   0   0   0   1   0   4   0   0   5
    1   2   1   0   0   1   0   0]
 [  0   1   0   0 132   1   5   0   0   0   3   3   0   0   0   0   3   2
    2   1   0   0   0   2   0   7]
 [  0   1   0   0   3 138   0   0   2   2   0   0   0   0   0   3   0   0
    3   4   0   0   0   0   1   0]
 [  0   2   0   3   1   0 143   0   0   0   1   0   0   1   2   0   1   2
    1   0   0   1   0   1   0   0]
 [  2   3   0   2   0   0   0 127   0   0   2   1   0   1   2   1   2   6
    0   0   1   0   1   0   0   0]
 [  0   0   0   1   0   1   0   0 135   4   0   0   0