## **HyperParametr Tuning using Optuna**

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

In [2]:
torch.manual_seed(42)

<torch._C.Generator at 0x71cc40759250>

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device} ")

Using device: cuda 


In [5]:
df = pd.read_csv("fashion-mnist_train.csv")
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
X = df.iloc[:, 1: ].values
y = df.iloc[:, 0].values

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
## Scaling the features
X_train = X_train/255.0
X_test = X_test/255.0

## **Our Custome Dataset and DataLoader Class**

In [7]:
class MyCustomDataset(Dataset):
    def __init__(self, features, lable):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.lable = torch.tensor(lable, dtype=torch.long)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, index):
        return self.features[index], self.lable[index]

In [11]:
train_dataset = MyCustomDataset(X_train, y_train)
test_dataset = MyCustomDataset(X_test, y_test)

## **DataLoader**

In [13]:
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4, pin_memory=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)

## **Tuning On only Nuron and Layers**

In [14]:
class MyNN(nn.Module):
    def __init__(self, input_dim, output_dim, num_hidden_layers, neurons_per_layer):
        super().__init__()
        layers = []
        for i in range(num_hidden_layers):
            layers.append(nn.Linear(input_dim, neurons_per_layer))
            layers.append(nn.BatchNorm1d(neurons_per_layer))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(0.3))
            input_dim = neurons_per_layer

        layers.append(nn.Linear(neurons_per_layer, output_dim))

        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

In [15]:
def objective(trial):

    # next hyperparameter values from the search space
    num_hidden_layer = trial.suggest_int("neuron_per_layer", 1, 5)
    neurons_per_layer = trial.suggest_int("neurons_per_layer", 8, 128, step=8)

    # model init
    input_dim = 784
    output_dim = 10

    model = MyNN(input_dim, output_dim, num_hidden_layer, neurons_per_layer)
    model.to(device)

    # params init
    learning_rate = 0.01
    epochs = 50

    loss_functions = nn.CrossEntropyLoss()

    ## optimizer
    optimzer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    # training loop

    for epoch in range(epochs):
        total_epoch_loss = 0
        for batch_features, batch_labels in train_dataloader:
            ## move dataset into the gpu
            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
            ## forward
            output = model(batch_features)
    
            ## loss
            loss = loss_functions(output, batch_labels)
    
            ## backward
            optimzer.zero_grad()
            loss.backward()
    
            ## update the weight
            optimzer.step()
    
        #     total_epoch_loss = total_epoch_loss + loss.item()
    
        # avg_loss = total_epoch_loss/len(train_dataloader)
        # print(f'Epoch: {epoch + 1} , Loss: {avg_loss}')
    # evaluations
    total = 0
    correct = 0
    
    with torch.no_grad():
    
      for batch_features, batch_labels in test_dataloader:
        batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
    
        outputs = model(batch_features)
        # print(outputs[0])
    
        _, predicted = torch.max(outputs, 1)
        # print(predicted)
    
        total = total + batch_labels.shape[0]
        # print((predicted == batch_labels).sum().item())
    
        correct = correct + (predicted == batch_labels).sum().item()
        # break
    
    accuracy = correct/total

    return accuracy

In [18]:
import optuna
study = optuna.create_study(direction="maximize")

[I 2025-11-26 15:48:16,440] A new study created in memory with name: no-name-f9fd3f27-b813-4d93-a660-0ebaa42b56c5


In [19]:
study.optimize(objective, n_trials=10)

[I 2025-11-26 15:50:39,998] Trial 0 finished with value: 0.868 and parameters: {'neuron_per_layer': 2, 'neurons_per_layer': 120}. Best is trial 0 with value: 0.868.
[I 2025-11-26 15:52:39,401] Trial 1 finished with value: 0.8473333333333334 and parameters: {'neuron_per_layer': 4, 'neurons_per_layer': 80}. Best is trial 0 with value: 0.868.
[I 2025-11-26 15:54:33,414] Trial 2 finished with value: 0.8603333333333333 and parameters: {'neuron_per_layer': 4, 'neurons_per_layer': 128}. Best is trial 0 with value: 0.868.
[I 2025-11-26 15:56:26,658] Trial 3 finished with value: 0.85225 and parameters: {'neuron_per_layer': 4, 'neurons_per_layer': 88}. Best is trial 0 with value: 0.868.
[I 2025-11-26 15:58:40,654] Trial 4 finished with value: 0.7073333333333334 and parameters: {'neuron_per_layer': 5, 'neurons_per_layer': 24}. Best is trial 0 with value: 0.868.
[I 2025-11-26 16:00:59,428] Trial 5 finished with value: 0.8571666666666666 and parameters: {'neuron_per_layer': 3, 'neurons_per_layer': 