# ARTIFICIAL NEURAL NETWORKS

### Classification Using Artificial Neural Networks with Hyperparameter Tuning on Alphabets Data

### Overview

#### In this assignment, you will be tasked with developing a classification model using Artificial Neural Networks (ANNs) to classify data points from the "Alphabets_data.csv" dataset into predefined categories of alphabets. This exercise aims to deepen your understanding of ANNs and the significant role hyperparameter tuning plays in enhancing model performance.

### Dataset: "Alphabets_data.csv"

#### The dataset provided, "Alphabets_data.csv", consists of labeled data suitable for a classification task aimed at identifying different alphabets. Before using this data in your model, you'll need to preprocess it to ensure optimal performance.

## Tasks

### 1. Data Exploration and Preprocessing

#### ●	Begin by loading and exploring the "Alphabets_data.csv" dataset. Summarize its key features such as the number of samples, features, and classes.


#### ●	Execute necessary data preprocessing steps including data normalization, managing missing values.

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
import tensorflow as tf
import random as rn

In [None]:
alphabets = pd.read_csv("Alphabets_data.csv")
alphabets

Unnamed: 0,letter,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,D,2,2,3,3,2,7,7,7,6,6,6,4,2,8,3,7
19996,C,7,10,8,8,4,4,8,6,9,12,9,13,2,9,3,7
19997,T,6,9,6,7,5,6,11,3,7,11,9,5,2,12,2,4
19998,S,2,3,4,2,1,8,7,2,6,10,6,8,1,9,5,8


In [None]:
alphabets.dtypes

Unnamed: 0,0
letter,object
xbox,int64
ybox,int64
width,int64
height,int64
onpix,int64
xbar,int64
ybar,int64
x2bar,int64
y2bar,int64


In [None]:
alphabets.describe()

Unnamed: 0,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
count,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0
mean,4.02355,7.0355,5.12185,5.37245,3.50585,6.8976,7.50045,4.6286,5.17865,8.28205,6.454,7.929,3.0461,8.33885,3.69175,7.8012
std,1.913212,3.304555,2.014573,2.26139,2.190458,2.026035,2.325354,2.699968,2.380823,2.488475,2.63107,2.080619,2.332541,1.546722,2.567073,1.61747
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,3.0,5.0,4.0,4.0,2.0,6.0,6.0,3.0,4.0,7.0,5.0,7.0,1.0,8.0,2.0,7.0
50%,4.0,7.0,5.0,6.0,3.0,7.0,7.0,4.0,5.0,8.0,6.0,8.0,3.0,8.0,3.0,8.0
75%,5.0,9.0,6.0,7.0,5.0,8.0,9.0,6.0,7.0,10.0,8.0,9.0,4.0,9.0,5.0,9.0
max,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0


In [None]:
alphabets.isnull().sum()

Unnamed: 0,0
letter,0
xbox,0
ybox,0
width,0
height,0
onpix,0
xbar,0
ybar,0
x2bar,0
y2bar,0


In [None]:
len(alphabets["letter"].value_counts())

26

In [None]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

# Fit and transform the target variable
alphabets['letter_encoded'] = label_encoder.fit_transform(alphabets['letter'])

# If you want to see the mapping
mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print(mapping)

{'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'J': 9, 'K': 10, 'L': 11, 'M': 12, 'N': 13, 'O': 14, 'P': 15, 'Q': 16, 'R': 17, 'S': 18, 'T': 19, 'U': 20, 'V': 21, 'W': 22, 'X': 23, 'Y': 24, 'Z': 25}


In [None]:
alphabets

Unnamed: 0,letter,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex,letter_encoded
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8,19
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10,8
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9,3
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8,13
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,D,2,2,3,3,2,7,7,7,6,6,6,4,2,8,3,7,3
19996,C,7,10,8,8,4,4,8,6,9,12,9,13,2,9,3,7,2
19997,T,6,9,6,7,5,6,11,3,7,11,9,5,2,12,2,4,19
19998,S,2,3,4,2,1,8,7,2,6,10,6,8,1,9,5,8,18


In [None]:
X = alphabets.drop(["letter", "letter_encoded"], axis = 1)
y = alphabets["letter_encoded"]
print("\n\nIndependent Variables\n\n", X)
print("\n\nDependent Variable\n\n", y)



Independent Variables

        xbox  ybox  width  height  onpix  xbar  ybar  x2bar  y2bar  xybar  \
0         2     8      3       5      1     8    13      0      6      6   
1         5    12      3       7      2    10     5      5      4     13   
2         4    11      6       8      6    10     6      2      6     10   
3         7    11      6       6      3     5     9      4      6      4   
4         2     1      3       1      1     8     6      6      6      6   
...     ...   ...    ...     ...    ...   ...   ...    ...    ...    ...   
19995     2     2      3       3      2     7     7      7      6      6   
19996     7    10      8       8      4     4     8      6      9     12   
19997     6     9      6       7      5     6    11      3      7     11   
19998     2     3      4       2      1     8     7      2      6     10   
19999     4     9      6       6      2     9     5      3      1      8   

       x2ybar  xy2bar  xedge  xedgey  yedge  yedgex  
0      

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler = scaler.fit(X)
X_std_data = pd.DataFrame(scaler.transform(X), columns = X.columns)
X_std_data

Unnamed: 0,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
0,-1.057698,0.291877,-1.053277,-0.164704,-1.144013,0.544130,2.365097,-1.714360,0.344994,-0.917071,1.347774,0.034125,-1.305948,-0.219082,-1.438153,0.122911
1,0.510385,1.502358,-1.053277,0.719730,-0.687476,1.531305,-1.075326,0.137561,-0.495072,1.895968,-1.312807,0.514764,-0.448492,-0.219082,0.120081,1.359441
2,-0.012309,1.199738,0.435910,1.161947,1.138672,1.531305,-0.645273,-0.973591,0.344994,0.690380,-1.312807,-0.446513,-0.019764,-0.865626,-0.269477,0.741176
3,1.555774,1.199738,0.435910,0.277513,-0.230939,-0.936631,0.644886,-0.232823,0.344994,-1.720796,-0.932724,0.995402,1.266419,1.074008,-0.659036,0.122911
4,-1.057698,-1.826464,-1.053277,-1.933571,-1.144013,0.544130,-0.645273,0.507945,0.344994,-0.917071,-0.552641,0.514764,-0.877220,-0.865626,0.509640,1.359441
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,-1.057698,-1.523844,-1.053277,-1.049137,-0.687476,0.050543,-0.215220,0.878329,0.344994,-0.917071,-0.172558,-1.888428,-0.448492,-0.219082,-0.269477,-0.495354
19996,1.555774,0.897117,1.428701,1.161947,0.225598,-1.430218,0.214833,0.507945,1.605094,1.494105,0.967691,2.437316,-0.448492,0.427463,-0.269477,-0.495354
19997,1.033079,0.594497,0.435910,0.719730,0.682135,-0.443044,1.504991,-0.603207,0.765028,1.092242,0.967691,-1.407789,-0.448492,2.367097,-0.659036,-2.350149
19998,-1.057698,-1.221224,-0.556881,-1.491354,-1.144013,0.544130,-0.215220,-0.973591,0.344994,0.690380,-0.172558,0.034125,-0.877220,0.427463,0.509640,0.122911


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_std_data, y, test_size = 0.3, stratify = y, random_state = 107)

print("X Train Size : ", X_train.shape)
print("X Test Size : ", X_test.shape)
print("y Train Size : ", y_train.shape)
print("y Test Size : ", y_test.shape)

X Train Size :  (14000, 16)
X Test Size :  (6000, 16)
y Train Size :  (14000,)
y Test Size :  (6000,)


In [None]:
import warnings
warnings.filterwarnings("ignore")

model = Sequential()
model.add(Dense(64, input_dim = X_train.shape[1], activation = "relu"))
model.add(Dense(1, activation = "sigmoid"))

In [None]:
model.compile(loss = "binary_crossentropy",
              optimizer = "adam",
              metrics = ["accuracy"])

In [None]:
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.5)

Epoch 1/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.0448 - loss: -29.2341 - val_accuracy: 0.0380 - val_loss: -170.4301
Epoch 2/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0439 - loss: -271.5046 - val_accuracy: 0.0380 - val_loss: -711.7996
Epoch 3/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.0357 - loss: -934.9463 - val_accuracy: 0.0380 - val_loss: -1735.7657
Epoch 4/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.0380 - loss: -2119.5403 - val_accuracy: 0.0380 - val_loss: -3299.2397
Epoch 5/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.0397 - loss: -3890.9099 - val_accuracy: 0.0380 - val_loss: -5397.4048
Epoch 6/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0421 - loss: -6138.5303 - val_accuracy: 0.0380 - val_los

In [None]:
# Evaluate the model on the test set
# evaluate the keras model
scores = model.evaluate(X_train, y_train)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

# Make predictions on the test set
y_train_pred = model.predict(X_train)
y_train_pred = np.round(y_train_pred)  # Rounding predictions for binary classification

# Display predictions
print("Predictions on test set:")
print(y_train_pred[:10])

[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.0389 - loss: -78796.0859
compile_metrics: 3.83%
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Predictions on test set:
[[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]]


In [None]:
import tensorflow as tf
print(tf.__version__)

2.17.0


In [None]:
pip install keras



In [None]:
pip install keras_tuner



In [None]:
pip install keras tensorflow



In [None]:
!pip uninstall keras tensorflow
!pip install tensorflow

Found existing installation: keras 3.4.1
Uninstalling keras-3.4.1:
  Would remove:
    /usr/local/lib/python3.10/dist-packages/keras-3.4.1.dist-info/*
    /usr/local/lib/python3.10/dist-packages/keras/*
Proceed (Y/n)? Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/cli/base_command.py", line 179, in exc_logging_wrapper
    status = run_func(*args)
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/commands/uninstall.py", line 106, in run
    uninstall_pathset = req.uninstall(
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/req/req_install.py", line 722, in uninstall
    uninstalled_pathset.remove(auto_confirm, verbose)
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/req/req_uninstall.py", line 364, in remove
    if auto_confirm or self._allowed_to_proceed(verbose):
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/req/req_uninstall.py", line 404, in _allowed_to_proceed
    return ask("Pr

In [None]:
!pip install scikeras



In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
# Import KerasClassifier from scikeras
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV, KFold
# Import train_test_split for data splitting
from sklearn.model_selection import train_test_split
# Assume you have your data in 'X' and 'y'
# Replace with your actual data loading or generation
import numpy as np  # For example, using numpy to create dummy data

# Replace this with your actual data
X = np.random.rand(100, 8)  # 100 samples with 8 features
y = np.random.randint(0, 2, 100)  # 100 binary labels (0 or 1)


# Define the model creation function
def create_model(learning_rate=0.001, dropout_rate=0.0, activation_function='relu', init='uniform', neuron1=4, neuron2=2): # Set default values for the parameters
    model = Sequential()
    model.add(Dense(neuron1, input_dim=8, kernel_initializer=init, activation=activation_function))
    model.add(Dropout(dropout_rate))
    model.add(Dense(neuron2, kernel_initializer=init, activation=activation_function))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1, activation='sigmoid'))

    # Use TensorFlow's Adam optimizer
    adam = Adam(learning_rate=learning_rate)
    model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
    return model

# Create the KerasClassifier
model = KerasClassifier(build_fn=create_model)

# Define the grid search parameters
learning_rate = [0.001, 0.01, 0.1]
dropout_rate = [0.0, 0.1, 0.2]
activation_function = ['softmax', 'relu', 'tanh', 'linear']
init = ['uniform', 'normal', 'zero']
neuron1 = [4, 8, 16]
neuron2 = [2, 4, 8]
batch_size = [10, 20, 40]
epochs = [10, 50, 100]

# Create the dictionary of the grid search parameters (without batch_size and epochs)
param_grids = dict(model__learning_rate=learning_rate, model__dropout_rate=dropout_rate, # Changed to 'model__parameter_name'
                   model__activation_function=activation_function, model__init=init, model__neuron1=neuron1, model__neuron2=neuron2,
                   batch_size=batch_size, epochs=epochs) # Include batch_size and epochs directly

# Build and fit the GridSearchCV
grid = GridSearchCV(estimator=model, param_grid=param_grids, cv=KFold(), verbose=1)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Added this line

# Pass the batch_size and epochs to the fit method using `fit_params` is no longer needed
grid_result = grid.fit(X_train, y_train) # Remove model__batch_size and model__epochs

# Summarize the results
print('Best: {}, using {}'.format(grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print('{:.4f} (+/-{:.4f}) with: {}'.format(mean, stdev, param))

Fitting 5 folds for each of 8748 candidates, totalling 43740 fits
Epoch 1/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4015 - loss: 0.7600
Epoch 2/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4035 - loss: 0.7559 
Epoch 3/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4273 - loss: 0.7428 
Epoch 4/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4725 - loss: 0.7222 
Epoch 5/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3342 - loss: 0.7723 
Epoch 6/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4331 - loss: 0.7326 
Epoch 7/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4735 - loss: 0.7165 
Epoch 8/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5198 - loss: 0.6997 



[1m1/2[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 55ms/step



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 6/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4538 - loss: 0.7018 
Epoch 7/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6688 - loss: 0.6774 
Epoch 8/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.4216 - loss: 0.6944  
Epoch 9/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5432 - loss: 0.6969 
Epoch 10/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4865 - loss: 0.6897 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
Epoch 1/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.4290 - loss: 0.7290
Epoch 2/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.4620 - loss: 0.7104  
Epoch 3/10
[1m7/7[0m [32m━━━━━━━━━━━━━━