In [None]:
! pip install optuna

Collecting optuna
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.2.1-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.6/383.6 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.15.2 colorlog-6.9.0 optuna-4.2.1


In [None]:
from sklearn.datasets import fetch_openml

# Load Fashion MNIST
fashion_mnist = fetch_openml('Fashion-MNIST', version=1, as_frame=False)

# Extract data and labels
X, y = fashion_mnist.data, fashion_mnist.target.astype(int)

print(f"Data shape: {X.shape}, Labels shape: {y.shape}")


Data shape: (70000, 784), Labels shape: (70000,)


In [None]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn as nn
from sklearn.model_selection import train_test_split

In [None]:
torch.manual_seed(123)

<torch._C.Generator at 0x7e8a88196430>

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [None]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
class dataset(Dataset):

  def __init__(self, x, y):

    self.x = torch.tensor(x, dtype=torch.float32)
    self.y = torch.tensor(y, dtype=torch.long)

  def __len__(self):
    return len(self.x)

  def __getitem__(self, index):
    return self.x[index], self.y[index]

In [None]:
x_train.shape

(56000, 784)

In [None]:
train_dataset = dataset(x_train, y_train)
test_dataset = dataset(x_test, y_test)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=128,pin_memory=True, shuffle=True) # use larger batch size and pin_memory
test_loader = DataLoader(test_dataset, batch_size=128,pin_memory=True, shuffle=True)

In [None]:
# New model defination

class Model(nn.Module):

  def __init__(self,input_dim,output_dim,num_hidden_layer,neuron_per_layer):

    super().__init__()

    layers = []

    for i in range(num_hidden_layer):

      layers.append(nn.Linear(input_dim,neuron_per_layer))
      layers.append(nn.BatchNorm1d(neuron_per_layer))
      layers.append(nn.ReLU())
      layers.append(nn.Dropout(p=0.2))
      input_dim = neuron_per_layer

    layers.append(nn.Linear(neuron_per_layer,output_dim))

    self.model = nn.Sequential(*layers)

  def forward(self,x):

    return self.model(x)

In [None]:
# Objective function for Optuna

def objective(trial):

  num_hidden_layer = trial.suggest_int('num_hidden_layer',1,5)
  neuron_per_layer = trial.suggest_int('neuron_per_layer',8,128,step=8)

  input_dim = 784
  output_dim = 10

  model = Model(input_dim,output_dim,num_hidden_layer,neuron_per_layer)
  model.to(device)

  lr = 0.01
  epochs = 10

  lossfn = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr=lr)

  for i in range(epochs):

    for batch_features, batch_labels in train_loader:

      batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

      output = model(batch_features)

      loss = lossfn(output, batch_labels)

      optimizer.zero_grad()

      loss.backward()

      optimizer.step()


  model.eval()


  total = 0
  count = 0

  with torch.no_grad():

    for batch_features, batch_labels in test_loader:

      batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

      output = model(batch_features)

      _, predicted = torch.max(output.data, 1)

      total += batch_labels.size(0)

      count += (predicted == batch_labels).sum().item()

  accuracy = count / total

  return accuracy

In [None]:
import optuna

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10)

[I 2025-04-06 15:45:28,790] A new study created in memory with name: no-name-8d198f52-76a8-4ddc-851e-73607ebbe51a
[I 2025-04-06 15:45:50,606] Trial 0 finished with value: 0.8846428571428572 and parameters: {'num_hidden_layer': 3, 'neuron_per_layer': 64}. Best is trial 0 with value: 0.8846428571428572.
[I 2025-04-06 15:46:07,146] Trial 1 finished with value: 0.8797142857142857 and parameters: {'num_hidden_layer': 4, 'neuron_per_layer': 96}. Best is trial 0 with value: 0.8846428571428572.
[I 2025-04-06 15:46:18,456] Trial 2 finished with value: 0.8861428571428571 and parameters: {'num_hidden_layer': 1, 'neuron_per_layer': 128}. Best is trial 2 with value: 0.8861428571428571.
[I 2025-04-06 15:46:33,534] Trial 3 finished with value: 0.8749285714285714 and parameters: {'num_hidden_layer': 3, 'neuron_per_layer': 56}. Best is trial 2 with value: 0.8861428571428571.
[I 2025-04-06 15:46:46,285] Trial 4 finished with value: 0.8718571428571429 and parameters: {'num_hidden_layer': 2, 'neuron_per_l

In [None]:
study.best_value

0.8878571428571429

In [None]:
study.best_params

{'num_hidden_layer': 5, 'neuron_per_layer': 120}

In [None]:
# New model defination

class Model2(nn.Module):

  def __init__(self,input_dim,output_dim,num_hidden_layer,neuron_per_layer,dropout):

    super().__init__()

    layers = []

    for i in range(num_hidden_layer):

      layers.append(nn.Linear(input_dim,neuron_per_layer))
      layers.append(nn.BatchNorm1d(neuron_per_layer))
      layers.append(nn.ReLU())
      layers.append(nn.Dropout(p=dropout))
      input_dim = neuron_per_layer

    layers.append(nn.Linear(neuron_per_layer,output_dim))

    self.model = nn.Sequential(*layers)

  def forward(self,x):

    return self.model(x)

In [None]:
# Objective function for Optuna

def objective2(trial):

  num_hidden_layer = trial.suggest_int('num_hidden_layer',1,5)
  neuron_per_layer = trial.suggest_int('neuron_per_layer',8,128,step=8)
  epochs = trial.suggest_int('epochs',10,50,step=10)
  lr = trial.suggest_float('lr',1e-5,1e-1,log=True)
  dropout = trial.suggest_float('dropout',0.1,0.5,step=0.1)
  batch_size = trial.suggest_categorical('batch_size',[128,256,512])
  optimizer_name = trial.suggest_categorical('optimizer',['Adam','SGD'])

  train_loader = DataLoader(train_dataset, batch_size=batch_size,pin_memory=True, shuffle=True) # use larger batch size and pin_memory
  test_loader = DataLoader(test_dataset, batch_size=batch_size,pin_memory=True, shuffle=True)


  input_dim = 784
  output_dim = 10

  model = Model2(input_dim,output_dim,num_hidden_layer,neuron_per_layer,dropout)
  model.to(device)

  lossfn = nn.CrossEntropyLoss()

  if optimizer_name == 'Adam':
    optimizer = optim.Adam(model.parameters(), lr=lr)
  else:
    optimizer = optim.SGD(model.parameters(), lr=lr)

  for i in range(epochs):

    for batch_features, batch_labels in train_loader:

      batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

      output = model(batch_features)

      loss = lossfn(output, batch_labels)

      optimizer.zero_grad()

      loss.backward()

      optimizer.step()


  model.eval()


  total = 0
  count = 0

  with torch.no_grad():

    for batch_features, batch_labels in test_loader:

      batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

      output = model(batch_features)

      _, predicted = torch.max(output.data, 1)

      total += batch_labels.size(0)

      count += (predicted == batch_labels).sum().item()

  accuracy = count / total

  return accuracy

In [None]:
study2 = optuna.create_study(direction='maximize')
study2.optimize(objective2, n_trials=20)

[I 2025-04-06 16:19:46,016] A new study created in memory with name: no-name-88f1af95-09a4-4893-8037-3fcbca8b5811
[I 2025-04-06 16:20:11,657] Trial 0 finished with value: 0.8017857142857143 and parameters: {'num_hidden_layer': 1, 'neuron_per_layer': 88, 'epochs': 20, 'lr': 0.0003983399390918529, 'dropout': 0.30000000000000004, 'batch_size': 128, 'optimizer': 'SGD'}. Best is trial 0 with value: 0.8017857142857143.
[I 2025-04-06 16:20:27,633] Trial 1 finished with value: 0.8115714285714286 and parameters: {'num_hidden_layer': 1, 'neuron_per_layer': 8, 'epochs': 20, 'lr': 0.00011778306070844803, 'dropout': 0.2, 'batch_size': 256, 'optimizer': 'Adam'}. Best is trial 1 with value: 0.8115714285714286.
[I 2025-04-06 16:21:03,449] Trial 2 finished with value: 0.5157857142857143 and parameters: {'num_hidden_layer': 3, 'neuron_per_layer': 40, 'epochs': 50, 'lr': 7.240239610195713e-05, 'dropout': 0.4, 'batch_size': 512, 'optimizer': 'SGD'}. Best is trial 1 with value: 0.8115714285714286.
[I 2025-

In [None]:
study2.best_value

In [None]:
study2.best_params