In [21]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input

# Load dataset
data = pd.read_csv('Alphabets_data.csv')

# Check the column names
print("Column names in the dataset:")
print(data.columns)

# Data exploration
print(data.info())
print(data.describe())

# Assuming 'letter' is the target variable
target_column = 'letter'
print(data[target_column].value_counts())  # Show the distribution of target classes

# Preprocessing
# Handle missing values by dropping rows with any missing values
data.dropna(inplace=True)

# Separate features and target
X = data.drop(columns=[target_column])  # Drop the target label
y = data[target_column]  # Keep the target label

# Encode target labels if they are categorical
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

# Build a basic ANN model function
def build_ann_model(optimizer='adam', activation='relu', neurons=32):
    model = Sequential()
    model.add(Input(shape=(X_train.shape[1],)) )  # Use Input layer for shape
    model.add(Dense(neurons, activation=activation))
    model.add(Dense(neurons, activation=activation))
    model.add(Dense(len(np.unique(y_encoded)), activation='softmax'))  # Output layer
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# List of hyperparameters for manual tuning
optimizers = ['adam', 'sgd']
activations = ['relu', 'tanh']
neurons_list = [32, 64]
best_accuracy = 0
best_params = {}

# Manual Hyperparameter Tuning
for optimizer in optimizers:
    for activation in activations:
        for neurons in neurons_list:
            # Build and train the model
            model = build_ann_model(optimizer=optimizer, activation=activation, neurons=neurons)
            model.fit(X_train, y_train, epochs=10, batch_size=20, verbose=0)
            
            # Evaluate the model
            y_pred = np.argmax(model.predict(X_test), axis=-1)
            accuracy = accuracy_score(y_test, y_pred)
            print(f"Optimizer: {optimizer}, Activation: {activation}, Neurons: {neurons}, Accuracy: {accuracy}")
            
            # Save the best performing model
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_params = {'optimizer': optimizer, 'activation': activation, 'neurons': neurons}

# Output the best results
print(f"Best Hyperparameters: {best_params}")
print(f"Best Accuracy: {best_accuracy}")

# Evaluate the best model
best_model = build_ann_model(**best_params)
best_model.fit(X_train, y_train, epochs=10, batch_size=20, verbose=0)
y_pred = np.argmax(best_model.predict(X_test), axis=-1)

# Evaluation metrics
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Final Accuracy Score
accuracy = accuracy_score(y_test, y_pred)
print(f"Final Model Accuracy: {accuracy}")


Column names in the dataset:
Index(['letter', 'xbox', 'ybox', 'width', 'height', 'onpix', 'xbar', 'ybar',
       'x2bar', 'y2bar', 'xybar', 'x2ybar', 'xy2bar', 'xedge', 'xedgey',
       'yedge', 'yedgex'],
      dtype='object')
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 17 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   letter  20000 non-null  object
 1   xbox    20000 non-null  int64 
 2   ybox    20000 non-null  int64 
 3   width   20000 non-null  int64 
 4   height  20000 non-null  int64 
 5   onpix   20000 non-null  int64 
 6   xbar    20000 non-null  int64 
 7   ybar    20000 non-null  int64 
 8   x2bar   20000 non-null  int64 
 9   y2bar   20000 non-null  int64 
 10  xybar   20000 non-null  int64 
 11  x2ybar  20000 non-null  int64 
 12  xy2bar  20000 non-null  int64 
 13  xedge   20000 non-null  int64 
 14  xedgey  20000 non-null  int64 
 15  yedge   20000 non-null  int64 
 16  yedgex  20