In [1]:
import ipywidgets as widgets
widgets.IntProgress()
from tqdm import tqdm
from tqdm.auto import tqdm

In [2]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())
if torch.cuda.is_available():
    print(torch.cuda.get_device_name())

2.9.1+cu128
True
NVIDIA GeForce RTX 5060 Ti


In [3]:
# get the gpu 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset , DataLoader
import torch.nn as nn 
import torch.optim as optim
import matplotlib.pyplot as plt 

In [5]:
torch.manual_seed(42)

<torch._C.Generator at 0x1f841c441f0>

In [6]:
df = pd.read_csv("fmnist_small.csv")
df.shape

(6000, 785)

In [7]:
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,9,0,0,0,0,0,0,0,0,0,...,0,7,0,50,205,196,213,165,0,0
1,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,1,0,0,0,...,142,142,142,21,0,3,0,0,0,0
3,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,8,0,0,0,0,0,0,0,0,0,...,213,203,174,151,188,10,0,0,0,0


In [8]:
X = df.drop(columns = ['label'] , axis = 1)
y = df['label']

In [9]:
X.shape

(6000, 784)

In [10]:
X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = 0.2 , random_state = 42) 

In [11]:
# scale the images values
X_train = X_train / 255.0
X_test = X_test / 255.0

In [12]:
# convert all these into numpy array 
X_train = X_train.to_numpy()
X_test = X_test.to_numpy() 
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

In [13]:
class CustomDataset(Dataset): 
    def __init__(self , features , labels): 
        self.features = torch.tensor(features , dtype = torch.float)
        self.labels = torch.tensor(labels , dtype = torch.long)

    def __len__(self):
        return len(self.features)
    def __getitem__(self , index): 
        return self.features[index] , self.labels[index]

In [14]:
# make the dataset 
train_dataset = CustomDataset(features = X_train , labels = y_train)
test_dataset = CustomDataset(features = X_test , labels = y_test)

In [15]:
# make the dataloader 
train_data_loader = DataLoader(
    dataset = train_dataset, 
    batch_size = 32, 
    shuffle = True,
    pin_memory = True
)

test_data_loader = DataLoader(
    dataset = test_dataset, 
    batch_size = 32, 
    pin_memory = True
)

In [16]:
input_dim = X_train.shape[1]
output_dim = len(np.unique(y_train))
output_dim

10

In [17]:
class MyNN(nn.Module): 
    def __init__(self , input_dim , output_dim , num_hidden_layers , neurons_per_layer , dropout_rate): 
        super().__init__()

        layers = [] # store the layers of the network
        for i in range(num_hidden_layers): 
            layers.append(nn.Linear(in_features = input_dim , out_features = neurons_per_layer))
            layers.append(nn.BatchNorm1d(neurons_per_layer))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))
            input_dim = neurons_per_layer

        # Add the output layer 
        layers.append(nn.Linear(neurons_per_layer ,output_dim))

        self.model = nn.Sequential(*layers) # *layers -> unpack the list 

    def forward(self , x): 
        return self.model(x)

In [18]:
# objective function for optuna
# for now we will tune only 2 params: no of layer,no of neurons per layer 
def objective(trial): 
    # extract next hyper-parameter values from the search space
    num_hidden_layers = trial.suggest_int("num_hidden_layers" , 1 , 5)
    num_neurons_per_layer = trial.suggest_int("num_neurons_per_layer" , 8 , 128 , step = 8)
    # step = 8 means try with neron numbers 8 , 16 , 24...............
    epochs = trial.suggest_int('epochs' , 10 , 80 , step = 10)
    learning_rate = trial.suggest_float('learning_rate' , 1e-5 , 1e-1 , log = True)
    dropout_rate = trial.suggest_float('dropout_rate' , 0.1 , 0.5 , step = 0.1)
    batch_size = trial.suggest_categorical('batch_size' , [16 , 32 , 64 , 128])
    optimizer_name = trial.suggest_categorical('optimizer' , ['Adam' , 'SGD' , 'RMSprop'])
    weight_decay = trial.suggest_float('weight_decay' , 1e-5 , 1e-3 , log = True)

    # make the dataloader 
    train_data_loader = DataLoader(
        dataset = train_dataset, 
        batch_size = batch_size,
        shuffle = True,
        pin_memory = True
    )
    test_data_loader = DataLoader(
        dataset = test_dataset, 
        batch_size = batch_size, 
        pin_memory = True
    )
    
    # initialize the model 
    input_dimension = 784
    output_dimension = 10
    model = MyNN(
        input_dimension,
        output_dimension,
        num_hidden_layers,
        num_neurons_per_layer, 
        dropout_rate
    )

    model.to(device)
    
    # loss and optimizer selection 
    criterion = nn.CrossEntropyLoss()

    if optimizer_name == 'Adam': 
        optimizer = optim.Adam(model.parameters() , lr = learning_rate , weight_decay = weight_decay) 
    elif optimizer_name == 'SGD': 
        optimizer = optim.SGD(model.parameters() , lr = learning_rate , weight_decay = weight_decay)
    else:
        optimizer = optim.RMSprop(model.parameters() , lr = learning_rate , weight_decay = weight_decay)
    
    # training loop 
    for epoch in range(epochs): 
        model.train()
        for batch_features , batch_labels in train_data_loader: 
            # move the data to GPU 
            batch_features = batch_features.to(device)
            batch_labels = batch_labels.to(device)
            # forward
            outputs = model(batch_features)
            # find loss 
            loss = criterion(outputs , batch_labels)
            # clear grad 
            optimizer.zero_grad()
            # backward 
            loss.backward()
            # update grad
            optimizer.step()
            
    # evaluation of the model 
    model.eval()
    total = 0
    correct = 0 

    with torch.no_grad(): 
        for batch_features , batch_labels in test_data_loader: 
            # move the data to GPU 
            batch_features = batch_features.to(device)
            batch_labels = batch_labels.to(device)
            # forward
            outputs = model(batch_features) 
            _ , predicted = torch.max(outputs , 1)
            total += batch_labels.size(0)
            correct = correct + (predicted == batch_labels).sum().item()
    accuracy = correct / total
    # return the evaluation value
    return accuracy

In [19]:
# import optuna 
import optuna

In [20]:
# create study for optuna 
study = optuna.create_study(direction = 'maximize')
study.optimize(objective , n_trials = 10)

[I 2026-01-08 08:55:24,402] A new study created in memory with name: no-name-a1dd8ce9-286c-4029-b6d0-00f73c18728d
[I 2026-01-08 08:55:56,526] Trial 0 finished with value: 0.7233333333333334 and parameters: {'num_hidden_layers': 5, 'num_neurons_per_layer': 48, 'epochs': 60, 'learning_rate': 0.00033448667386069185, 'dropout_rate': 0.2, 'batch_size': 32, 'optimizer': 'SGD', 'weight_decay': 6.897843641716928e-05}. Best is trial 0 with value: 0.7233333333333334.
[I 2026-01-08 08:56:00,989] Trial 1 finished with value: 0.5066666666666667 and parameters: {'num_hidden_layers': 1, 'num_neurons_per_layer': 112, 'epochs': 10, 'learning_rate': 0.05628220290384867, 'dropout_rate': 0.2, 'batch_size': 16, 'optimizer': 'RMSprop', 'weight_decay': 0.00026827346351222976}. Best is trial 0 with value: 0.7233333333333334.
[I 2026-01-08 08:56:16,961] Trial 2 finished with value: 0.825 and parameters: {'num_hidden_layers': 5, 'num_neurons_per_layer': 96, 'epochs': 50, 'learning_rate': 0.0003820452502546164, 

In [21]:
study.best_value

0.8333333333333334

In [22]:
study.best_params

{'num_hidden_layers': 5,
 'num_neurons_per_layer': 88,
 'epochs': 60,
 'learning_rate': 0.00024061499890310414,
 'dropout_rate': 0.4,
 'batch_size': 16,
 'optimizer': 'RMSprop',
 'weight_decay': 0.00016316223648709323}