In [30]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt

In [31]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [32]:
torch.manual_seed(42)

<torch._C.Generator at 0x79430bb81490>

In [33]:
df = pd.read_csv('/content/drive/MyDrive/Fashion MNIST dataset/fashion-mnist_train.csv')
df

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
59996,1,0,0,0,0,0,0,0,0,0,...,73,0,0,0,0,0,0,0,0,0
59997,8,0,0,0,0,0,0,0,0,0,...,160,162,163,135,94,0,0,0,0,0
59998,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [34]:
x = df.iloc[:, 1:].values
y = df.iloc[:,0].values


In [35]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2, random_state=42)

In [36]:
# scaling the features
x_train = x_train/255.0
x_test = x_test/255.0

In [37]:
x_train

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.00392157, 0.        ,
        0.        ]])

In [38]:
# create customDataset class
class CustomDataset(Dataset):
  def __init__(self, features, labels):
    self.features = torch.tensor(features, dtype=torch.float32)
    self.labels = torch.tensor(labels, dtype=torch.long)

  def __len__(self):
    return len(self.features)

  def __getitem__(self, index):
    return self.features[index], self.labels[index]

In [39]:
# create train_dataset object
train_dataset = CustomDataset(x_train, y_train)

In [40]:
train_dataset[0]

(tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.2275,
         0.5333, 0.0000, 0.0

In [41]:
# create test_dataset object
test_dataset = CustomDataset(x_test, y_test)

In [42]:
train_dataset[0]

(tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.2275,
         0.5333, 0.0000, 0.0

In [43]:
# create train and test loader

# train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [44]:
# we are doing these to reduce overfitting
# 1) BatchNormalization
# 2) Dropout
# 3) L2 Regularization

# to improve the accuracy
# 1) 83% -----> when run on cpu
# 2) 89% -----> when run on GPU -------> training acc = 98%
# 3) 89% -----> when reduced overfit -----> training acc = 93%

In [45]:
class NeuralNetwork(nn.Module):
  def __init__(self, input_dim, output_dim, num_hidden_layer, neurons_per_layer, dropout_rate):
    super().__init__()

    layers = []

    for i in range(num_hidden_layer):
      layers.append(nn.Linear(input_dim, neurons_per_layer))
      layers.append(nn.BatchNorm1d(neurons_per_layer))
      layers.append(nn.ReLU())
      layers.append(nn.Dropout(dropout_rate))
      input_dim = neurons_per_layer

    layers.append(nn.Linear(neurons_per_layer, output_dim))

    self.model = nn.Sequential(*layers)

  def forward(self, x):
    return self.model(x)

In [50]:
# Objective function
def objective(trial):
  # next hyperparameter values from the search space
  num_hidden_layer = trial.suggest_int("num_hidden_layer", 1, 5)
  neurons_per_layer = trial.suggest_int("neurons_per_layer", 8, 128, step=8)
  learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
  epochs = trial.suggest_int("epochs", 10, 50, step=10)
  dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5, step=0.1)
  batch_size = trial.suggest_categorical("batch_size", [16,32,64,128])
  optimizer_name = trial.suggest_categorical("optimizer", ['adam','SGD','RMSprop'])
  weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-3, log=True)

  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
  test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

  # model init
  input_dim = 784
  output_dim = 10

  model = NeuralNetwork(input_dim, output_dim, num_hidden_layer, neurons_per_layer, dropout_rate)
  model = model.to(device)

  # params init
  # learning_rate = 0.01
  # epochs = 50

  # optimizer selection
  criterian = nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=1e-4)

  if optimizer_name == 'adam':
    optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
  elif optimizer_name == 'SGD':
    optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
  else:
    optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

  # training loop
  for epoch in range(epochs):

    for batch_features, batch_label in train_loader:

      # move to GPU
      batch_features = batch_features.to(device)
      batch_label = batch_label.to(device)

      # forward
      out = model(batch_features)

      # loss calculation
      loss = criterian(out, batch_label)

      # back prop
      optimizer.zero_grad()
      loss.backward()

      # update grad
      optimizer.step()

  # evaluation
  model.eval()

  total = 0
  correct = 0

  with torch.no_grad():

    for batch_features, batch_label in test_loader:
      # move to GPU
      batch_features = batch_features.to(device)
      batch_label = batch_label.to(device)

      out = model(batch_features)

      _, pred = torch.max(out.data, 1)

      total = total + batch_label.shape[0]
      correct = correct + (pred == batch_label).sum().item()

    accuracy = correct/total

  return accuracy

In [47]:
!pip install optuna



In [48]:
import optuna
study = optuna.create_study(direction='maximize')

[I 2025-12-22 15:32:43,973] A new study created in memory with name: no-name-3f2b9823-aa51-4066-92de-fde7568874d7


In [51]:
study.optimize(objective, n_trials=10)

[I 2025-12-22 15:37:41,698] Trial 1 finished with value: 0.7894166666666667 and parameters: {'num_hidden_layer': 5, 'neurons_per_layer': 128, 'learning_rate': 6.811862669111406e-05, 'epochs': 40, 'dropout_rate': 0.2, 'batch_size': 32, 'optimizer': 'SGD', 'weight_decay': 0.00030850674805956975}. Best is trial 1 with value: 0.7894166666666667.
[I 2025-12-22 15:43:41,461] Trial 2 finished with value: 0.8869166666666667 and parameters: {'num_hidden_layer': 3, 'neurons_per_layer': 72, 'learning_rate': 0.0008981650883097245, 'epochs': 50, 'dropout_rate': 0.1, 'batch_size': 16, 'optimizer': 'adam', 'weight_decay': 0.00010262342210302129}. Best is trial 2 with value: 0.8869166666666667.
[I 2025-12-22 15:45:45,437] Trial 3 finished with value: 0.8249166666666666 and parameters: {'num_hidden_layer': 1, 'neurons_per_layer': 32, 'learning_rate': 0.00020453242317005683, 'epochs': 50, 'dropout_rate': 0.5, 'batch_size': 32, 'optimizer': 'adam', 'weight_decay': 6.0304548353419486e-05}. Best is trial 2

In [52]:
study.best_trial

FrozenTrial(number=8, state=<TrialState.COMPLETE: 1>, values=[0.8874166666666666], datetime_start=datetime.datetime(2025, 12, 22, 15, 51, 7, 232158), datetime_complete=datetime.datetime(2025, 12, 22, 15, 51, 50, 211710), params={'num_hidden_layer': 2, 'neurons_per_layer': 80, 'learning_rate': 0.06569254027720253, 'epochs': 40, 'dropout_rate': 0.2, 'batch_size': 128, 'optimizer': 'adam', 'weight_decay': 0.0008468918088756364}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'num_hidden_layer': IntDistribution(high=5, log=False, low=1, step=1), 'neurons_per_layer': IntDistribution(high=128, log=False, low=8, step=8), 'learning_rate': FloatDistribution(high=0.1, log=True, low=1e-05, step=None), 'epochs': IntDistribution(high=50, log=False, low=10, step=10), 'dropout_rate': FloatDistribution(high=0.5, log=False, low=0.1, step=0.1), 'batch_size': CategoricalDistribution(choices=(16, 32, 64, 128)), 'optimizer': CategoricalDistribution(choices=('adam', 'SGD', 'RMSprop')

In [53]:
study.best_params

{'num_hidden_layer': 2,
 'neurons_per_layer': 80,
 'learning_rate': 0.06569254027720253,
 'epochs': 40,
 'dropout_rate': 0.2,
 'batch_size': 128,
 'optimizer': 'adam',
 'weight_decay': 0.0008468918088756364}