### Pytorch + Optuna

An open source hyperparameter optimization framework to automate hyperparameter search

In [19]:
!pip install optuna



In [20]:
!pip install category_encoders



In [21]:
# importing libraries
import numpy as np
import pandas as pd

import optuna
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
from torch.utils.data import DataLoader,Dataset
import torch.optim as optimizer

from sklearn.preprocessing import StandardScaler,OneHotEncoder,OrdinalEncoder,FunctionTransformer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split,cross_val_score,KFold
import category_encoders as ce
from sklearn.metrics import r2_score, mean_squared_error

### Intial setup

In [22]:
data=pd.read_csv('/content/modified.csv')

In [23]:
y=np.log1p(data['price'])
x=data.drop(columns=['price'],axis=1)

In [24]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)
x_train_final, x_val, y_train_final, y_val = train_test_split(
    x_train, y_train, test_size=0.2, random_state=42)
print(type(x_train_final),x_train_final.shape)
print(type(x_val),x_val.shape)
print(type(x_test),x_test.shape)
print(type(y_train_final),y_train_final.shape)
print(type(y_val),y_val.shape)
print(type(y_test),y_test.shape)

<class 'pandas.core.frame.DataFrame'> (2435, 12)
<class 'pandas.core.frame.DataFrame'> (609, 12)
<class 'pandas.core.frame.DataFrame'> (761, 12)
<class 'pandas.core.series.Series'> (2435,)
<class 'pandas.core.series.Series'> (609,)
<class 'pandas.core.series.Series'> (761,)


In [25]:
# transformation
def transformation():

  transformer=ColumnTransformer(transformers=[
    ('tnf1',OneHotEncoder(drop='first',sparse_output=False,),['agePossession','type']),
    ('tnf2',OrdinalEncoder(categories=[['no','yes']]),['servant room']),
    ('tnf3',OrdinalEncoder(categories=[['no','yes']]),['pooja room']),
     ('tnf4',OrdinalEncoder(categories=[['groundfloor','mid floor','high floor','low floor','hometop']]),['floor_type']),
    ('tnf5',OrdinalEncoder(categories=[['low','normal','semi_luxrious','luxrious']]),['luxury']),
    ('tnf6',OrdinalEncoder(categories=[['1','0','2','3','3+']]),['balcony']),
    ('tnf7',OrdinalEncoder(categories=[['unfurnished', 'semifurnished', 'furnished']]),['furnishing_type']),
    ('tnf8',ce.TargetEncoder(),['sector']),
    ('tnf9',StandardScaler(),['built_up_area'])
    ],remainder='passthrough')

  return transformer

In [26]:
transformer=transformation()
transformer.fit(x_train_final,y_train_final) # y_train required for target encoder
x_train_transformed=transformer.transform(x_train_final)
x_val_transform=transformer.transform(x_val)
x_test_transformed=transformer.transform(x_test)
print(type(x_train_transformed),x_train_transformed.shape)
print(type(x_val_transform),x_val_transform.shape)
print(type(x_test_transformed),x_test_transformed.shape)

<class 'numpy.ndarray'> (2435, 15)
<class 'numpy.ndarray'> (609, 15)
<class 'numpy.ndarray'> (761, 15)


In [27]:
# changing into numpy array
y_train_final=y_train_final.values
y_val=y_val.values
y_test=y_test.values

### ANN + optuna

In [28]:
# customdataset
class customdataset(Dataset):
  def __init__(self,x,y):
    self.x_tensor=torch.tensor(x,dtype=torch.float32)
    self.y_tensor=torch.tensor(y,dtype=torch.float32)

  def __len__(self):
    return self.x_tensor.shape[0]

  def __getitem__(self, index):

    return self.x_tensor[index],self.y_tensor[index]

In [29]:
# making dataset_object
train_data=customdataset(x_train_transformed,y_train_final)
valid_data=customdataset(x_val_transform,y_val)

In [30]:
#checking device avilability
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [31]:
'''
num_hidden_layers,
num_nueron_layer,
batchnorm,
dropout,
batch_size,
activation function'''

'\nnum_hidden_layers,\nnum_nueron_layer,\nbatchnorm,\ndropout,\nbatch_size,\nactivation function'

In [32]:
# defning custom Ann subclass
class customAnn(nn.Module):

  # intialization
  def __init__(
              self,input_dim,output_dim,
              num_hidden_layers,nueron_per_layer,
              activation,
              batch_bool,drop_bool,
              drop_rate
              ):
    super().__init__()

    layers=[]

    for i in range(num_hidden_layers):
      '''
      num_hidden_layers = 4 in ist trial
      so this loop as for i in range(4):
          i=0 # ist hidden layer
          i=1 # 2nd hidden layer
          i=2 # 3rd hidden layer
          i=3 # 4th hidden layer
      again in next trial optuna picks 5 as given in search_space

      '''
      out_feature=nueron_per_layer[i]
      '''
      (from objective function)
      nueron_per_layer = []
      for i in range(num_hidden_layers):
        if i == 0:
        nueron = 16  # Simulated trial.suggest_int('nueron_per_layer0', 10, 18, step=2)
        else:
          nueron = 12  # Simulated trial.suggest_int('nueron_per_layer{i}', 2, 18, step=2)

      nueron_per_layer.append(nueron)

      output:
      nueron_per_layer = [16, 12, 12, 12, 12]

      so we select as out_feature=nueron_per_layer[i] for each layer
      ist hidden layer = nueron_per_layer[i] i=0 : out_feature=16 and so_on____

      '''
      # defining architecture of a hidden layer
      layers.append(nn.Linear(out_features=out_feature,in_features=input_dim))

      if drop_bool:
        layers.append(nn.Dropout1d(drop_rate))
      if batch_bool:
        layers.append(nn.BatchNorm1d(out_feature,affine=True))

      if activation=='ReLU':
        layers.append(nn.ReLU())

      elif activation=='LeakyReLU':
        layers.append(nn.LeakyReLU())

      elif activation=='GELU':
        layers.append(nn.GELU())

      input_dim=out_feature

    # ouptut layer
    layers.append(nn.Linear(out_features=1,in_features=input_dim))

    self.model=nn.Sequential(*layers)


  def forward(self,x_train_transformed):
    x_tensor=torch.tensor(x_train_transformed,dtype=torch.float32)

    return self.model(x_tensor).squeeze(dim=1)

In [33]:
def init_weights(parameter):
      # Intialized weights and bias
      if isinstance(parameter,nn.Linear):
        init.kaiming_uniform_(parameter.weight, nonlinearity='relu')

        if parameter.bias is  not None:
          init.zeros_(parameter.bias)

      # intializing gamma and betta of batchnorm
      elif isinstance(parameter,nn.BatchNorm1d):
        init.ones_(parameter.weight)
        init.zeros_(parameter.bias)

In [34]:
# global_list for storing logs
train_r2_list = []
val_r2_list = []
trial_numbers = []

def objective(trial):
  #  defining hyperparameter and it's search space

  ''' Pick an integer between 4 and 8 (inclusive) for
   the num_hidden_layers parameter.'''
  num_hidden_layers=trial.suggest_int('num_hidden_layers',4,8)

  '''
  Defining no.of nuerons per layer and storing trial object into
  nueron_per_layer list
  '''
  nueron_per_layer=[]

  for i in range(num_hidden_layers):
    if i==0: # ist hidden layer input_dim =15
      nueron=trial.suggest_int(f'nueron_per_layer{i}',10,18,step=2)
    else:
      nueron=trial.suggest_int(f'nueron_per_layer{i}',2,18,step=2)

    nueron_per_layer.append(nueron)

  # batch_size
  batch_size=trial.suggest_categorical('batch_size',[64,128])

  # param init
  activation_name = trial.suggest_categorical('activation', ['ReLU', 'LeakyReLU','GELU'])
  batch_norm=trial.suggest_categorical('batch1dnorm',[True,False])
  drop_layer=trial.suggest_categorical('drop1d',[True,False])
  learning_rate=trial.suggest_float('learning_rate',1e-4,1e-1,log=True)
  keep_prob=trial.suggest_float('keep_prob',0.1,0.8)
  drop_rate=1-keep_prob
  epochs=trial.suggest_int('No.of.epochs',100,1000,step=100)

  # model init
  input_dim=15
  output_dim=1
  model=customAnn(
      input_dim=input_dim,
      output_dim=output_dim,
      num_hidden_layers=num_hidden_layers,
      nueron_per_layer=nueron_per_layer,
      activation=activation_name,
      batch_bool=batch_norm,
      drop_bool=drop_layer,
      drop_rate=drop_rate,


  ).apply(init_weights).to(device)




  # DataLLoader
  train_loader=DataLoader(train_data,batch_size=batch_size,shuffle=True,num_workers=2,pin_memory=True)
  valid_loader=DataLoader(valid_data,batch_size=batch_size,shuffle=False,num_workers=2,pin_memory=True)

  #optimizer
  optim=optimizer.Adam(model.parameters(),lr=learning_rate)
  loss_fxn=nn.MSELoss()

  # training

  model.train()

  for i in range(epochs):
    for batch_features,batch_labels in train_loader:
      batch_features,batch_labels=batch_features.to(device),batch_labels.to(device)

      # forward propogation
      output=model(batch_features)

      # loss
      loss=loss_fxn(output,batch_labels)

      # clear gadient after next backward pass
      optim.zero_grad()

      # calculating gradient
      loss.backward()

      # updation_step
      optim.step()

  # evaluation
  model.eval()

  batch_actual=[]
  batch_pred=[]
  train_preds=[]
  train_actual=[]
  with torch.inference_mode():
    for x_batch, y_batch in train_loader:
          x_batch, y_batch = x_batch.to(device), y_batch.to(device)
          preds = model(x_batch)
          train_preds.append(preds)
          train_actual.append(y_batch)

    for batch_features,batch_labels in valid_loader:
      batch_features,batch_labels=batch_features.to(device),batch_labels.to(device)
      valid_pred=model(batch_features)
      batch_pred.append(valid_pred) # store predictions
      batch_actual.append(batch_labels) # storing actual value

  # batch_actual and batch_loss are list stores predictions from each batch as separate tensors
  train_preds = torch.expm1(torch.cat(train_preds))
  train_actuals = torch.expm1(torch.cat(train_actual))
  val_preds = torch.expm1(torch.cat(batch_pred))
  val_actuals = torch.expm1(torch.cat(batch_actual))

  # Calculate R² scores
  train_r2 = r2_score(train_actuals.cpu().numpy(), train_preds.cpu().numpy())
  val_r2 = r2_score(val_actuals.cpu().numpy(), val_preds.cpu().numpy())
  # Log values
  train_r2_list.append(train_r2)
  val_r2_list.append(val_r2)
  trial_numbers.append(trial.number)

  print(f"[Trial {trial.number}] Train R2: {train_r2:.4f}, Valid R2: {val_r2:.4f}")
  accuracy_score=val_r2

  return accuracy_score

In [35]:
study = optuna.create_study(direction='maximize')

[I 2025-06-09 05:10:10,282] A new study created in memory with name: no-name-80efc111-08ef-4f69-abed-605861a6b554


In [None]:
import time
start_time=time.time()
study.optimize(objective, n_trials=100)
end_time=time.time()
print(f'Time taken in bayesian search {(end_time-start_time)/60:.4f} min')

  x_tensor=torch.tensor(x_train_transformed,dtype=torch.float32)
[I 2025-06-09 05:15:42,003] Trial 0 finished with value: -0.05816531181335449 and parameters: {'num_hidden_layers': 8, 'nueron_per_layer0': 10, 'nueron_per_layer1': 6, 'nueron_per_layer2': 4, 'nueron_per_layer3': 18, 'nueron_per_layer4': 2, 'nueron_per_layer5': 2, 'nueron_per_layer6': 18, 'nueron_per_layer7': 14, 'batch_size': 64, 'activation': 'GELU', 'batch1dnorm': True, 'drop1d': True, 'learning_rate': 0.00014164394886910145, 'keep_prob': 0.18921778736670122, 'No.of.epochs': 900}. Best is trial 0 with value: -0.05816531181335449.


[Trial 0] Train R2: -0.0514, Valid R2: -0.0582


  x_tensor=torch.tensor(x_train_transformed,dtype=torch.float32)
[I 2025-06-09 05:19:42,276] Trial 1 finished with value: 0.7016950845718384 and parameters: {'num_hidden_layers': 8, 'nueron_per_layer0': 16, 'nueron_per_layer1': 2, 'nueron_per_layer2': 16, 'nueron_per_layer3': 4, 'nueron_per_layer4': 6, 'nueron_per_layer5': 10, 'nueron_per_layer6': 6, 'nueron_per_layer7': 2, 'batch_size': 64, 'activation': 'GELU', 'batch1dnorm': True, 'drop1d': False, 'learning_rate': 0.0031887297548378836, 'keep_prob': 0.5973346604169879, 'No.of.epochs': 700}. Best is trial 1 with value: 0.7016950845718384.


[Trial 1] Train R2: 0.7708, Valid R2: 0.7017


  x_tensor=torch.tensor(x_train_transformed,dtype=torch.float32)
[I 2025-06-09 05:22:17,538] Trial 2 finished with value: 0.7954999804496765 and parameters: {'num_hidden_layers': 4, 'nueron_per_layer0': 10, 'nueron_per_layer1': 18, 'nueron_per_layer2': 16, 'nueron_per_layer3': 6, 'batch_size': 64, 'activation': 'GELU', 'batch1dnorm': False, 'drop1d': False, 'learning_rate': 0.012640063796796575, 'keep_prob': 0.5334588679211031, 'No.of.epochs': 600}. Best is trial 2 with value: 0.7954999804496765.


[Trial 2] Train R2: 0.8558, Valid R2: 0.7955


  x_tensor=torch.tensor(x_train_transformed,dtype=torch.float32)
[I 2025-06-09 05:23:14,122] Trial 3 finished with value: 0.07606077194213867 and parameters: {'num_hidden_layers': 6, 'nueron_per_layer0': 14, 'nueron_per_layer1': 16, 'nueron_per_layer2': 4, 'nueron_per_layer3': 18, 'nueron_per_layer4': 4, 'nueron_per_layer5': 8, 'batch_size': 64, 'activation': 'LeakyReLU', 'batch1dnorm': False, 'drop1d': False, 'learning_rate': 0.05474263688399951, 'keep_prob': 0.7035239993113495, 'No.of.epochs': 200}. Best is trial 2 with value: 0.7954999804496765.


[Trial 3] Train R2: -0.1103, Valid R2: 0.0761


  x_tensor=torch.tensor(x_train_transformed,dtype=torch.float32)
[I 2025-06-09 05:27:01,544] Trial 4 finished with value: 0.7476454973220825 and parameters: {'num_hidden_layers': 8, 'nueron_per_layer0': 12, 'nueron_per_layer1': 8, 'nueron_per_layer2': 12, 'nueron_per_layer3': 10, 'nueron_per_layer4': 2, 'nueron_per_layer5': 18, 'nueron_per_layer6': 4, 'nueron_per_layer7': 4, 'batch_size': 128, 'activation': 'ReLU', 'batch1dnorm': False, 'drop1d': False, 'learning_rate': 0.0019111338118524802, 'keep_prob': 0.3878554931077325, 'No.of.epochs': 1000}. Best is trial 2 with value: 0.7954999804496765.


[Trial 4] Train R2: 0.8517, Valid R2: 0.7476


  x_tensor=torch.tensor(x_train_transformed,dtype=torch.float32)
[I 2025-06-09 05:29:53,440] Trial 5 finished with value: -0.05295908451080322 and parameters: {'num_hidden_layers': 4, 'nueron_per_layer0': 14, 'nueron_per_layer1': 12, 'nueron_per_layer2': 16, 'nueron_per_layer3': 14, 'batch_size': 64, 'activation': 'ReLU', 'batch1dnorm': True, 'drop1d': True, 'learning_rate': 0.0021917615103394412, 'keep_prob': 0.10228610685816009, 'No.of.epochs': 500}. Best is trial 2 with value: 0.7954999804496765.


[Trial 5] Train R2: -0.0460, Valid R2: -0.0530


  x_tensor=torch.tensor(x_train_transformed,dtype=torch.float32)
