In [61]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

In [62]:
# Set random seeds for reproducibility
torch.manual_seed(42)

<torch._C.Generator at 0x7fe942739610>

In [63]:
# Check for GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [64]:
df = pd.read_csv('/content/fashion-mnist_train.csv')
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [65]:
df.shape

(60000, 785)

In [66]:
# train test split
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [67]:
X_train = X_train/255.0
X_test = X_test/255.0

In [68]:
class CustomData(Dataset):
  def __init__ (self, features, labels):
    self.features = torch.tensor(features, dtype=torch.float32)
    self.labels = torch.tensor(labels, dtype=torch.long)

  def __len__ (self):
    return len(self.features)

  def __getitem__(self, index):
    return self.features[index], self.labels[index]

In [69]:
train_dataset = CustomData(X_train, y_train)
test_dataset = CustomData(X_test, y_test)
print(f'Length of Train Data is {len(train_dataset)} and Test data length is {len(test_dataset)}')


Length of Train Data is 48000 and Test data length is 12000


In [70]:
class MyANN(nn.Module):
  def __init__(self, input_dim, output_dim, num_hidden_layers, num_of_neurons, dropout):
    super().__init__()

    layers = []

    for i in range(num_hidden_layers):
      layers.append(nn.Linear(input_dim, num_of_neurons))
      layers.append(nn.BatchNorm1d(num_of_neurons))
      layers.append(nn.ReLU())
      layers.append(nn.Dropout(dropout))
      input_dim = num_of_neurons
    layers.append(nn.Linear(num_of_neurons, output_dim))

    self.model = nn.Sequential(*layers)

  def forward(self, x):
    return self.model(x)

In [71]:
def objective(trial):
  num_hidden_layers = trial.suggest_int('num_hidden_layer', 1, 5)
  num_of_neurons = trial.suggest_int('num_of_neurons', 8, 128, step=8)
  epochs = trial.suggest_int('epochs', 10, 70, step=10)
  learning_rate = trial.suggest_float('learning_rate', 0.1, 0.5, log=True)
  weight_decay = trial.suggest_float('weight_decay', 1e-5, 1e-3, log=True)
  dropout = trial.suggest_float('dropout', 0.3, 0.5, step=0.1)
  batch_size = trial.suggest_categorical('batch_size', [16,32,64,128])
  optimizer_name = trial.suggest_categorical('optimizer_name', ['SGD', 'Adam', 'RMSProp'])

  input_dim = 784
  output_dim = 10

  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
  test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)


  model = MyANN(input_dim, output_dim, num_hidden_layers, num_of_neurons, dropout)
  model.to(device)

  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

  if optimizer_name == 'Adam':
    optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
  elif optimizer_name == 'SGD':
    optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
  else:
    optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay)


  for epoch in range(epochs):
    for batch_features, batch_labels in train_loader:
      batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
      #Forward Pass
      output = model(batch_features)
      #Calculate loss
      loss = criterion(output, batch_labels)
      #Backward Pass
      optimizer.zero_grad()
      loss.backward()
      #Update Grads
      optimizer.step()

  #Evaluation Mode
  model.eval()

  # evaluation on test data
  total = 0
  correct = 0

  with torch.no_grad():

    for batch_features, batch_labels in test_loader:

      # move data to gpu
      batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

      outputs = model(batch_features)

      _, predicted = torch.max(outputs, 1)

      total = total + batch_labels.shape[0]

      correct = correct + (predicted == batch_labels).sum().item()

    accuracy = correct/total

  return accuracy

In [72]:
!pip install optuna



In [73]:
import optuna
study = optuna.create_study(direction='maximize')

[I 2026-02-20 05:49:03,163] A new study created in memory with name: no-name-4316463d-91e0-4500-a727-f2d21795616b


In [74]:
study.optimize(objective, n_trials=10)

[I 2026-02-20 05:50:53,791] Trial 0 finished with value: 0.88825 and parameters: {'num_hidden_layer': 5, 'num_of_neurons': 112, 'epochs': 70, 'learning_rate': 0.20411718801159587, 'weight_decay': 1.4477137875947034e-05, 'dropout': 0.3, 'batch_size': 128, 'optimizer_name': 'RMSProp'}. Best is trial 0 with value: 0.88825.
[I 2026-02-20 05:51:08,928] Trial 1 finished with value: 0.7360833333333333 and parameters: {'num_hidden_layer': 5, 'num_of_neurons': 48, 'epochs': 10, 'learning_rate': 0.4245685633100167, 'weight_decay': 0.00019371777611158232, 'dropout': 0.5, 'batch_size': 128, 'optimizer_name': 'Adam'}. Best is trial 0 with value: 0.88825.
[I 2026-02-20 05:52:50,173] Trial 2 finished with value: 0.73025 and parameters: {'num_hidden_layer': 5, 'num_of_neurons': 40, 'epochs': 20, 'learning_rate': 0.18202205941833835, 'weight_decay': 2.5879247808752235e-05, 'dropout': 0.5, 'batch_size': 32, 'optimizer_name': 'Adam'}. Best is trial 0 with value: 0.88825.
[I 2026-02-20 05:53:36,968] Trial

In [75]:
study.best_value

0.88825

In [76]:
study.best_params

{'num_hidden_layer': 5,
 'num_of_neurons': 112,
 'epochs': 70,
 'learning_rate': 0.20411718801159587,
 'weight_decay': 1.4477137875947034e-05,
 'dropout': 0.3,
 'batch_size': 128,
 'optimizer_name': 'RMSProp'}