In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

# Load the dataset
data = pd.read_csv(r"D:\Assignments\Neural networks\Alphabets_data.csv")

# Explore the dataset
print(data.head())
print(data.info())
print(data.describe())

# Check for missing values
print(data.isnull().sum())

# Handle missing values (if any)
data = data.dropna()

# Separate features and labels
X = data.drop("letter", axis=1)
y = data["letter"]

# Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Encode the labels if they are not integers
if y.dtype == 'object':
    y = pd.factorize(y)[0]

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Check the shapes of the training and test sets
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

# Convert labels to categorical if necessary
y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)

# Build the ANN model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(y_train_cat.shape[1], activation='softmax')  # Adjust output layer based on the number of classes
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train_cat, epochs=50, validation_split=0.2, batch_size=32)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test_cat)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


  letter  xbox  ybox  width  height  onpix  xbar  ybar  x2bar  y2bar  xybar  \
0      T     2     8      3       5      1     8    13      0      6      6   
1      I     5    12      3       7      2    10     5      5      4     13   
2      D     4    11      6       8      6    10     6      2      6     10   
3      N     7    11      6       6      3     5     9      4      6      4   
4      G     2     1      3       1      1     8     6      6      6      6   

   x2ybar  xy2bar  xedge  xedgey  yedge  yedgex  
0      10       8      0       8      0       8  
1       3       9      2       8      4      10  
2       3       7      3       7      3       9  
3       4      10      6      10      2       8  
4       5       9      1       7      5      10  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 17 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   letter  20000 non-null  object
 1   xbo

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.2613 - loss: 2.6366 - val_accuracy: 0.6541 - val_loss: 1.2454
Epoch 2/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6923 - loss: 1.0991 - val_accuracy: 0.7444 - val_loss: 0.9161
Epoch 3/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7657 - loss: 0.8069 - val_accuracy: 0.7884 - val_loss: 0.7509
Epoch 4/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8021 - loss: 0.6818 - val_accuracy: 0.8150 - val_loss: 0.6511
Epoch 5/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 994us/step - accuracy: 0.8248 - loss: 0.5966 - val_accuracy: 0.8344 - val_loss: 0.5779
Epoch 6/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 984us/step - accuracy: 0.8449 - loss: 0.5292 - val_accuracy: 0.8487 - val_loss: 0.5230
Epoch 7/50
[1m400/400[0m [32m━━━