<a href="https://colab.research.google.com/github/Dhruvit-Agrawal/PyTorch/blob/main/8_HyperaprameterTuning_optuna.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt


In [2]:
torch.manual_seed(42)

<torch._C.Generator at 0x78e906f841b0>

In [3]:
device='cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [5]:
df=pd.read_csv('/content/drive/MyDrive/PyTorch/Fashion MNIST/fmnist_small.csv')
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,9,0,0,0,0,0,0,0,0,0,...,0,7,0,50,205,196,213,165,0,0
1,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,1,0,0,0,...,142,142,142,21,0,3,0,0,0,0
3,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,8,0,0,0,0,0,0,0,0,0,...,213,203,174,151,188,10,0,0,0,0


In [6]:
#train test split
x=df.drop('label',axis=1)
y=df['label']
x_train,x_test,y_train,y_test=train_test_split(x,y, test_size=0.2)

In [7]:
#scaling
scaler=StandardScaler()
x_train=scaler.fit_transform(x_train)
x_test=scaler.transform(x_test)

In [8]:
#custom dataset and dataLoader classes
from torch.utils.data import Dataset, DataLoader

In [14]:
#custom dataset
class FashionDataset(Dataset):
  def __init__(self,x,y):
    self.x=torch.tensor(x,dtype=torch.float32)   # features in float
    self.y=torch.tensor(y.values,dtype=torch.long)      # labels in 'long'

  def __len__(self):
    return len(self.x)

  def __getitem__(self,idx):
    return self.x[idx], self.y[idx]

In [16]:
#creating datasets
train_dataset=FashionDataset(x_train,y_train)
test_dataset=FashionDataset(x_test,y_test)

In [17]:
#creating data loaders
train_loader=DataLoader(train_dataset,batch_size=32,shuffle=True, pin_memory=True)
test_loader=DataLoader(test_dataset,batch_size=32,shuffle=False, pin_memory=True)

In [28]:
class MyNNModel(nn.Module):
  def __init__(self,input_dim ,output_dim, num_hidden_layer,num_neurons_per_layer):

    super().__init__()
    layer=[]

    for i in range(num_hidden_layer):
      #layer
      if i==0:
        layer.append(nn.Linear(input_dim,num_neurons_per_layer))
      else:
        layer.append(nn.Linear(num_neurons_per_layer,num_neurons_per_layer))

      #batch normalisation
      layer.append(nn.BatchNorm1d(num_neurons_per_layer))
      #activation function
      layer.append(nn.ReLU())
      #dropouts
      layer.append(nn.Dropout(0.2))

    #output layer
    layer.append(nn.Linear(num_neurons_per_layer,output_dim))
    layer.append(nn.LogSoftmax(dim=1))

    #model
    self.network=nn.Sequential(*layer)  # " * " used to unpack the layer list

  def forward(self,x):

    return self.network(x)


In [18]:
x_train.shape[1]

784

In [33]:
#oblective function
def objective(trial):

  #extract parameter values
  num_hidden_layer=trial.suggest_int("num_hidden_layer",1,5)  #range-->1,5
  num_neurons_per_layer=trial.suggest_categorical("num_neurons_per_layer",[8,16,32,64,72,128])
  epoch=trial.suggest_int("epochs", 75,100,step=5)
  learning_rate=trial.suggest_float("learning_rate", 0.001,0.1,log=True)
  batch_size=trial.suggest_categorical("batch_size",[32,64])
  weight_decay=trial.suggest_float("weight_decay", 0.00001,0.001,log=True)
  optimizer_name=trial.suggest_categorical("optimizer",["adam","sgd"])

  #dataset dataloader
  train_loader=DataLoader(train_dataset,batch_size=batch_size,shuffle=True, pin_memory=True)
  test_loader=DataLoader(test_dataset,batch_size=batch_size,shuffle=False, pin_memory=True)


  #model init
  input_size=x_train.shape[1]
  output_size=10
  model=MyNNModel(input_size ,output_size, num_hidden_layer,num_neurons_per_layer)
  model.to(device)   # shifted to gpu

  #loss function
  loss_fn=nn.CrossEntropyLoss()

  #optimiser selection

   # Instantiate the optimizer based on the optimizer_name
  if optimizer_name == "adam":
      optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
  else:  # optimizer_name == "sgd"
      optimizer = optim.SGD(model.parameters(), lr=learning_rate,, weight_decay=weight_decay)


  #training loop
  for e in range(epochs):
    for batch_x,batch_y in train_loader:

      #moving to gpu
      batch_x=batch_x.to(device)
      batch_y=batch_y.to(device)

      #forward pass
      y_hat=model(batch_x)

      #zeroing gradients
      optimizer.zero_grad()

      #loss
      loss=loss_fn(y_hat,batch_y)

      #backward pass
      loss.backward()

      #update weights
      optimizer.step()



  #evaluation
  model.eval()

  with torch.no_grad():
    accuracy=[]

    for batch_x,batch_y in test_loader:

      #moving to gpu
      batch_x=batch_x.to(device)
      batch_y=batch_y.to(device)

      #prediction
      y_pred_pvalues=model(batch_x)
      y_pred=torch.argmax(y_pred_pvalues,dim=1)

      #accuracy
      batch_accuracy= (y_pred==batch_y).float().mean()
      accuracy.append(batch_accuracy.item())

    avg_accuracy= np.mean(accuracy)

  #return accuracy
  return avg_accuracy

In [21]:
%%capture
!pip install optuna

In [22]:
import optuna

#creating study
study=optuna.create_study(direction="maximize", study_name="Tuning MyNNModel")    #since trial object is returning "Accuracy"
                                                  # if "loss" then use "minimise"


[I 2025-01-25 09:03:49,136] A new study created in memory with name: Tuning MyNNModel


In [34]:
#optimization
study.optimize(objective,n_trials=10)

[I 2025-01-25 09:14:03,813] Trial 5 finished with value: 0.8264802631578947 and parameters: {'num_hidden_layer': 4, 'num_neurons_per_layer': 32, 'optimizer': 'adam'}. Best is trial 5 with value: 0.8264802631578947.
[I 2025-01-25 09:14:56,099] Trial 6 finished with value: 0.6455592105263158 and parameters: {'num_hidden_layer': 4, 'num_neurons_per_layer': 8, 'optimizer': 'sgd'}. Best is trial 5 with value: 0.8264802631578947.
[I 2025-01-25 09:15:49,996] Trial 7 finished with value: 0.8289473684210527 and parameters: {'num_hidden_layer': 3, 'num_neurons_per_layer': 72, 'optimizer': 'adam'}. Best is trial 7 with value: 0.8289473684210527.
[I 2025-01-25 09:16:44,158] Trial 8 finished with value: 0.8388157894736842 and parameters: {'num_hidden_layer': 3, 'num_neurons_per_layer': 128, 'optimizer': 'adam'}. Best is trial 8 with value: 0.8388157894736842.
[I 2025-01-25 09:17:45,668] Trial 9 finished with value: 0.8256578947368421 and parameters: {'num_hidden_layer': 5, 'num_neurons_per_layer': 

In [1]:
study.best_params()

NameError: name 'study' is not defined

In [None]:
study.best_value()