In [17]:
!pip uninstall -y umap-learn
!pip install scikit-learn==1.4.2 scikeras==0.13.0






In [18]:
import sklearn
import scikeras
print("scikit-learn version:", sklearn.__version__)
print("scikeras version:", scikeras.__version__)

scikit-learn version: 1.4.2
scikeras version: 0.13.0


In [19]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier

In [11]:
# Load & preprocess data
df = pd.read_csv("/content/Alphabets_data.csv")
if df.iloc[:, -1].dtype == 'object':
    le = LabelEncoder()
    df.iloc[:, -1] = le.fit_transform(df.iloc[:, -1])
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
non_numeric_cols = X.select_dtypes(include=['object']).columns
if len(non_numeric_cols):
    X[non_numeric_cols] = X[non_numeric_cols].apply(LabelEncoder().fit_transform)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [12]:
# Build ANN
def create_model(hidden_layers=1, neurons=32, activation='relu', learning_rate=0.001):
    model = Sequential()
    model.add(keras.Input(shape=(X_train.shape[1],)))
    model.add(Dense(neurons, activation=activation))
    for _ in range(hidden_layers - 1):
        model.add(Dense(neurons, activation=activation))
    model.add(Dense(len(np.unique(y)), activation='softmax'))
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [13]:
# Train default model
default_model = create_model()
default_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2, verbose=0)
loss, acc = default_model.evaluate(X_test, y_test)
print(f"Default Model Accuracy: {acc:.4f}")

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5400 - loss: 1.1765
Default Model Accuracy: 0.5600


In [2]:
from scikeras.wrappers import KerasClassifier


In [4]:
def create_model(hidden_layers=1, neurons=32, activation='relu', learning_rate=0.001):
    model = keras.Sequential()
    model.add(keras.Input(shape=(X_train.shape[1],)))
    model.add(Dense(neurons, activation=activation))

    for _ in range(hidden_layers - 1):
        model.add(Dense(neurons, activation=activation))

    model.add(Dense(len(np.unique(y_train)), activation='softmax'))

    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        metrics=['accuracy']
    )
    return model


In [6]:
from sklearn.model_selection import GridSearchCV


In [14]:
# Assuming you already loaded your DataFrame as `df`
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Encode label if needed
if df.iloc[:, -1].dtype == 'object':
    le = LabelEncoder()
    df.iloc[:, -1] = le.fit_transform(df.iloc[:, -1])

X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Encode non-numeric features in X
non_numeric_cols = X.select_dtypes(include=['object']).columns
if len(non_numeric_cols):
    X[non_numeric_cols] = X[non_numeric_cols].apply(LabelEncoder().fit_transform)

# Normalize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [15]:
# Grid search
model = KerasClassifier(
    model=create_model,
    hidden_layers=1,
    neurons=32,
    activation='relu',
    learning_rate=0.001,
    epochs=20,
    batch_size=32,
    verbose=0
)

param_grid = {
    'hidden_layers': [1, 2],
    'neurons': [32, 64],
    'activation': ['relu', 'tanh'],
    'learning_rate': [0.001, 0.01],
    'epochs': [20],
    'batch_size': [32, 64]
}

grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=2, verbose=1)
grid_result = grid.fit(X_train, y_train)
print("Best Hyperparameters:", grid_result.best_params_)
best_model = grid_result.best_estimator_
y_pred = best_model.predict(X_test)

Fitting 2 folds for each of 32 candidates, totalling 64 fits




Best Hyperparameters: {'activation': 'tanh', 'batch_size': 32, 'epochs': 20, 'hidden_layers': 2, 'learning_rate': 0.01, 'neurons': 64}


In [16]:
# Evaluation
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.20      0.25      0.22         4
           2       0.00      0.00      0.00         6
           3       0.50      0.23      0.31        31
           4       0.54      0.35      0.43        94
           5       0.53      0.48      0.51       196
           6       0.51      0.56      0.54       353
           7       0.61      0.54      0.57       712
           8       0.78      0.84      0.81      1596
           9       0.58      0.39      0.47       485
          10       0.50      0.57      0.53       308
          11       0.49      0.67      0.57       175
          12       0.28      0.38      0.32        29
          13       0.14      0.11      0.12         9
          14       0.00      0.00      0.00         1
          15       0.00      0.00      0.00         0

    accuracy                           0.64      4000
   macro avg       0.35   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
