In [113]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import optuna

In [114]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device {device}')

Using device cuda


In [115]:
df=pd.read_csv('fmnist_small.csv')
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,9,0,0,0,0,0,0,0,0,0,...,0,7,0,50,205,196,213,165,0,0
1,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,1,0,0,0,...,142,142,142,21,0,3,0,0,0,0
3,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,8,0,0,0,0,0,0,0,0,0,...,213,203,174,151,188,10,0,0,0,0


In [116]:
class CustomDataset(Dataset):
    def __init__(self,x,y):
        self.features=torch.tensor(x,dtype=torch.float32)
        self.labels=torch.tensor(y,dtype=torch.long)

    def __len__(self):
        return self.features.shape[0]
    
    def __getitem__(self, index):
        return self.features[index],self.labels[index]

In [117]:
x=df.iloc[:,1:].values
y=df.iloc[:,0].values

In [118]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [119]:
x_train=x_train/255.0
x_test=x_test/255.0

In [120]:

train_data=CustomDataset(x_train,y_train)
test_data=CustomDataset(x_test,y_test)

In [121]:
class MyNN(nn.Module):
    def __init__(self,input_layer,output_layer,hidden_layers,neurons_per_layer_list,dropout_rate):    
        super().__init__()
        layers=[]
        for i in range(hidden_layers):
            layers.append(nn.Linear(input_layer,neurons_per_layer_list[i]))
            layers.append(nn.BatchNorm1d(neurons_per_layer_list[i]))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))
            input_layer=neurons_per_layer_list[i]
        layers.append(nn.Linear(neurons_per_layer_list[i],output_layer))
        self.model=nn.Sequential(*layers)
    def forward(self,x):
        return self.model(x)

In [122]:
def objective(trial):
    #next hyperparamter value from the search space
    hidden_layers=trial.suggest_int("hidden_layers",1,5)
    # neurons_per_layer=trial.suggest_int("neurons_per_layer",8,128,step=8)
    neurons_per_layer_list=[]
    for i in range(hidden_layers):
        neurons_per_layer_list.append(trial.suggest_int(f"neural_layer_{i}",32,128,step=8))

    epochs=trial.suggest_int("epochs",10,50,step=10)
    
    learning_rate=trial.suggest_float("learning_rate",1e-5,1e-1,log=True)
    
    dropout_rate=trial.suggest_float("dropout_rate",0.1,0.5,step=0.1)
    
    batch_size=trial.suggest_categorical("batch_size",[16,32,64,128])
    
    optimizer_name=trial.suggest_categorical("optimizer",['Adam','SGD','RMSprop'])
    
    weight_decay=trial.suggest_float("weigh_decay",1e-5,1e-3,log=True)


    train_dataloader=DataLoader(train_data,batch_size=batch_size,pin_memory=True,shuffle=True)
    test_dataloader=DataLoader(test_data,batch_size=batch_size,pin_memory=True,shuffle=False)
    #model init 
    input_layer=784
    output_layer=10

    model=MyNN(input_layer,output_layer,hidden_layers,neurons_per_layer_list,dropout_rate)
    model.to(device)

    #parameter initialisation
    criterion=nn.CrossEntropyLoss()


    if optimizer_name == 'Adam':
        optimizer=optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    elif optimizer_name == 'SGD':
        optimizer=optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    else:
        optimizer=optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    #traning loop
    for epoch in range(epochs):
        for batch_features,batch_labels in train_dataloader:
            batch_features,batch_labels=batch_features.to(device),batch_labels.to(device)

            #forward pass
            outputs=model.forward(batch_features)
            #loss
            loss=criterion(outputs,batch_labels)
            optimizer.zero_grad()
            #backward pass
            loss.backward()
            #update grad
            optimizer.step()
    #evaluation
    model.eval()
    total=0
    correct=0

    with torch.no_grad():
        for batch_features,batch_labels in test_dataloader:
            batch_features,batch_labels=batch_features.to(device),batch_labels.to(device)
            outputs=model.forward(batch_features)
            _,predicted=torch.max(outputs,1)
            total=total+batch_labels.shape[0]
            correct=correct+(predicted==batch_labels).sum().item()
        accuracy=correct/total
    return accuracy



In [123]:
study=optuna.create_study(direction='maximize')

[I 2025-03-20 17:45:44,963] A new study created in memory with name: no-name-1c46f228-298d-42a2-aa95-957ac80f9db5


In [124]:
study.optimize(objective,n_trials=10)

[I 2025-03-20 17:45:45,776] Trial 0 finished with value: 0.8075 and parameters: {'hidden_layers': 3, 'neural_layer_0': 104, 'neural_layer_1': 32, 'neural_layer_2': 72, 'epochs': 10, 'learning_rate': 0.005405681788968536, 'dropout_rate': 0.1, 'batch_size': 128, 'optimizer': 'Adam', 'weigh_decay': 1.366353290776703e-05}. Best is trial 0 with value: 0.8075.
[I 2025-03-20 17:45:54,261] Trial 1 finished with value: 0.815 and parameters: {'hidden_layers': 3, 'neural_layer_0': 48, 'neural_layer_1': 40, 'neural_layer_2': 112, 'epochs': 40, 'learning_rate': 0.07492014681503498, 'dropout_rate': 0.2, 'batch_size': 32, 'optimizer': 'SGD', 'weigh_decay': 3.603833707695593e-05}. Best is trial 1 with value: 0.815.
[I 2025-03-20 17:45:55,371] Trial 2 finished with value: 0.8041666666666667 and parameters: {'hidden_layers': 2, 'neural_layer_0': 56, 'neural_layer_1': 120, 'epochs': 10, 'learning_rate': 0.009684405652817954, 'dropout_rate': 0.5, 'batch_size': 64, 'optimizer': 'Adam', 'weigh_decay': 8.805

In [125]:
print(f"The best trial is : {study.best_params}")
print(f"The best accuracy value is : {study.best_value}")


The best trial is : {'hidden_layers': 5, 'neural_layer_0': 96, 'neural_layer_1': 104, 'neural_layer_2': 104, 'neural_layer_3': 72, 'neural_layer_4': 128, 'epochs': 30, 'learning_rate': 0.007282340573085661, 'dropout_rate': 0.30000000000000004, 'batch_size': 16, 'optimizer': 'SGD', 'weigh_decay': 1.0701208471293877e-05}
The best accuracy value is : 0.8258333333333333


In [126]:
best_trial = study.best_trial
print(f"The best trial is : {best_trial.params}")
hidden_layers = best_trial.params['hidden_layers']
print("Neuron counts per layer:")
for i in range(hidden_layers):
    layer_name = f"neural_layer_{i}"
    if layer_name in best_trial.params:
        print(f"Layer {i+1}: {best_trial.params[layer_name]} neurons")

The best trial is : {'hidden_layers': 5, 'neural_layer_0': 96, 'neural_layer_1': 104, 'neural_layer_2': 104, 'neural_layer_3': 72, 'neural_layer_4': 128, 'epochs': 30, 'learning_rate': 0.007282340573085661, 'dropout_rate': 0.30000000000000004, 'batch_size': 16, 'optimizer': 'SGD', 'weigh_decay': 1.0701208471293877e-05}
Neuron counts per layer:
Layer 1: 96 neurons
Layer 2: 104 neurons
Layer 3: 104 neurons
Layer 4: 72 neurons
Layer 5: 128 neurons
