<a href="https://colab.research.google.com/github/SangamSilwal/DeepLearning_DSeries/blob/main/T7_hyperparameter_Tuning_optuna.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset,DataLoader
import torch.nn as nn
import torch.optim as optim

In [2]:
# Checking for the GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [4]:
df = pd.read_csv('/content/fmnist_small.csv')
X = df.iloc[:,1:].values
y = df.iloc[:,0].values

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
X_train = X_train/255.0
X_test = X_test/255.0

In [7]:
# Creating Custom Dataset class using Dataset and Dataset loader for batch operation
# we need to provide the model input in batches for applying BatchNormalization tecnique

class CustomDataset(Dataset):

  def __init__(self,features,labels):
    self.features = torch.tensor(features,dtype=torch.float32)
    self.labels = torch.tensor(labels,dtype=torch.long)

  def __len__(self):
    return len(self.features)

  def __getitem__(self, index):
    return self.features[index], self.labels[index]

train_dataset = CustomDataset(X_train,y_train)
test_dataset = CustomDataset(X_test,y_test)

In [8]:
# Now we will create our ANN Architecture according to the optuna for model hyperparameter tuning
class ANN(nn.Module):

  def __init__(self,input_dim,output_dim,num_hidden_layers,neurons_per_layer,dropout_rate):
    super().__init__()
    layers = []

    for i in range(num_hidden_layers):
      layers.append(nn.Linear(input_dim,neurons_per_layer))
      layers.append(nn.BatchNorm1d(neurons_per_layer))
      layers.append(nn.ReLU())
      layers.append(nn.Dropout(dropout_rate))
      input_dim = neurons_per_layer
    layers.append(nn.Linear(neurons_per_layer,output_dim))
    self.model = nn.Sequential(*layers)

  def forward(self,x):
    return self.model(x)

In [9]:
from torch.optim import optimizer
#Objective Function for hyperparameter Tuning
def objective(trial):
  #Setting the hyperparameters
  num_hidden_layers = trial.suggest_int("num_hidden_layers",1,5)
  neurons_per_layer = trial.suggest_int("neuron_per_layer",8,128,step=8)
  epochs = trial.suggest_int("epochs",10,50,step=10)
  learning_rate = trial.suggest_float("learning_rate",1e-5,1e-1,log=True)
  dropout_rate = trial.suggest_float("dropour_rate",0.1,0.5,step=0.1)
  batch_size = trial.suggest_categorical("batch_size",[16,32,64])
  optimizer_name = trial.suggest_categorical("optimizer",['Adam','SGD','RMSprop'])
  weight_decay = trial.suggest_float("weight_decay",1e-5,1e-3,log=True)

  #Train Loader
  train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True,pin_memory=True)
  test_loader = DataLoader(test_dataset,batch_size=batch_size,shuffle=False,pin_memory=True)

  # Model initialization
  input_dim = 784
  output_dim = 10

  model = ANN(input_dim,output_dim,num_hidden_layers,neurons_per_layer,dropout_rate)
  model.to(device)

  # optimizer Selection
  criterion = nn.CrossEntropyLoss()
  if optimizer_name == "Adam":
    optimizer = optim.Adam(model.parameters(),lr=learning_rate,weight_decay=weight_decay)
  elif optimizer_name == "SGD":
    optimizer = optim.SGD(model.parameters(),lr=learning_rate,weight_decay=weight_decay)
  else:
    optimizer = optim.RMSprop(model.parameters(),lr=learning_rate,weight_decay=weight_decay)

  #training Loop
  for epoch in range(epochs):
    for batch_features,batch_labels in train_loader:
      batch_features,batch_labels = batch_features.to(device),batch_labels.to(device)
      outputs = model(batch_features)
      loss = criterion(outputs,batch_labels)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

  # Model Evaluation
  model.eval()
  total = 0
  correct = 0
  with torch.no_grad():
    for batch_features, batch_labels in test_loader:
      batch_features,batch_labels = batch_features.to(device),batch_labels.to(device)
      outputs = model(batch_features)
      _,predicted = torch.max(outputs,1)
      total = total + batch_labels.shape[0]
      correct = correct + (predicted == batch_labels).sum().item()
    accuracy = correct/total

  return accuracy




In [10]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.6.0-py3-none-any.whl (404 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.7/404.7 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.6.0


In [11]:
import optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective,n_trials=7)

[I 2025-12-01 08:14:23,096] A new study created in memory with name: no-name-f9f671f9-37ca-4427-a8ad-f7e9e779944f
[I 2025-12-01 08:14:32,485] Trial 0 finished with value: 0.69 and parameters: {'num_hidden_layers': 2, 'neuron_per_layer': 112, 'epochs': 10, 'learning_rate': 0.0003617367193490889, 'dropour_rate': 0.2, 'batch_size': 32, 'optimizer': 'SGD', 'weight_decay': 5.1415990645892665e-05}. Best is trial 0 with value: 0.69.
[I 2025-12-01 08:14:36,959] Trial 1 finished with value: 0.8091666666666667 and parameters: {'num_hidden_layers': 1, 'neuron_per_layer': 16, 'epochs': 30, 'learning_rate': 0.0001098624247693269, 'dropour_rate': 0.2, 'batch_size': 64, 'optimizer': 'RMSprop', 'weight_decay': 3.9566676737632546e-05}. Best is trial 1 with value: 0.8091666666666667.
[I 2025-12-01 08:14:56,068] Trial 2 finished with value: 0.8275 and parameters: {'num_hidden_layers': 5, 'neuron_per_layer': 96, 'epochs': 20, 'learning_rate': 0.05150433106691456, 'dropour_rate': 0.1, 'batch_size': 16, 'op

In [12]:
study.best_value

0.83

In [13]:
study.best_params

{'num_hidden_layers': 4,
 'neuron_per_layer': 64,
 'epochs': 40,
 'learning_rate': 0.0017794322602305555,
 'dropour_rate': 0.4,
 'batch_size': 16,
 'optimizer': 'Adam',
 'weight_decay': 1.5392180358834937e-05}