In [1]:
import torch
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print("Using Device: ", device )

Using Device:  mps


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import os
import numpy as np

In [3]:
torch.manual_seed(42)

<torch._C.Generator at 0x111342c90>

In [4]:
#df = pd.read_csv("fmnist_small.csv")
df = pd.read_csv("fashion-mnist_train.csv")
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
# Train and test split
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Scaling the features
X_train = X_train/255.0
X_test = X_test/255.0

# Create the CustomDataset class

In [8]:
class CustoomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32) 
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, index):
        return self.features[index], self.labels[index]
      

# Create Train Dataset obejct

In [9]:
train_dataset = CustoomDataset(X_train, y_train)

# Creat Test Dataset

In [10]:
test_dataset = CustoomDataset(X_test, y_test )

In [11]:
test_dataset

<__main__.CustoomDataset at 0x140bfaf70>

# Create the DataLoader

# Define my Model

In [12]:
class MyNN(nn.Module):
    def __init__(self, input_dim, output_dim, num_hidden_layers, neurons_per_layer, dropout_rate):
      super().__init__()

      layers = [] # we want to store our hidden layers
      for i in range(num_hidden_layers):
         layers.append(nn.Linear(input_dim, neurons_per_layer))
         layers.append(nn.BatchNorm1d(neurons_per_layer))
         layers.append(nn.ReLU())
         layers.append(nn.Dropout(dropout_rate))
         input_dim = neurons_per_layer

      layers.append(nn.Linear(neurons_per_layer, output_dim))

      self.model = nn.Sequential(*layers)

    def forward(self, x):
       return self.model(x)


# Objective Function

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim

def objective(trial):
    # 1) Hyperparameters from search space
    num_hidden_layers = trial.suggest_int("num_hidden_layers", 1, 5)
    neurons_per_layer = trial.suggest_int("neurons_per_layer", 8, 128, step=8)
    epochs = trial.suggest_int("epochs",10,50,step=10)
    learning_rate = trial.suggest_float("lr", 1e-5, 1e-4, log=True)
    dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5, step=0.1)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 128])
    optimizer_name = trial.suggest_categorical("optimizer", ['Adam', 'SGD', 'RMSprop'])
    weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-3, log=True)


    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

    # 2) Model
    input_dim = 784
    output_dim = 10
    model = MyNN(input_dim, output_dim, num_hidden_layers, neurons_per_layer, dropout_rate)
    model.to(device) # run on my GPU

    # 3) Loss + Optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters())
    if optimizer_name == 'Adam':
        optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    elif optimizer_name=='SGD':
        optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    else:
        optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # 4) Training
    model.train()
    for epoch in range(epochs):
        for batch_features, batch_labels in train_dataloader:  # make sure name is train_loader
            # move data to gpu
            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

           # forward pass
            outputs = model(batch_features)

           # calculate loss
            loss = criterion(outputs, batch_labels)

           # back pass
            optimizer.zero_grad()
            loss.backward()

           # update grads
            optimizer.step()


    # 5) Evaluation (accuracy)
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_features, batch_labels in test_dataloader:
            # move to data to gpu
            batch_features = batch_features.to(device)
            batch_labels = batch_labels.to(device)

            outputs = model(batch_features)
            preds = outputs.argmax(dim=1)

            total += batch_labels.size(0)
            correct += (preds == batch_labels).sum().item()

    accuracy = correct / total
    return accuracy


In [14]:
!pip3 install optuna

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [15]:
import optuna
study = optuna.create_study(direction='maximize')

[I 2026-01-18 14:59:15,495] A new study created in memory with name: no-name-f6d92b35-8338-4c84-8f92-5613097bba39


In [16]:
study.optimize(objective, n_trials=10)

[I 2026-01-18 15:00:15,865] Trial 0 finished with value: 0.5603333333333333 and parameters: {'num_hidden_layers': 4, 'neurons_per_layer': 8, 'epochs': 40, 'lr': 6.674997303158447e-05, 'dropout_rate': 0.30000000000000004, 'batch_size': 128, 'optimizer': 'Adam', 'weight_decay': 1.5821771835417224e-05}. Best is trial 0 with value: 0.5603333333333333.
[I 2026-01-18 15:02:49,614] Trial 1 finished with value: 0.8245 and parameters: {'num_hidden_layers': 1, 'neurons_per_layer': 8, 'epochs': 30, 'lr': 9.36263936135975e-05, 'dropout_rate': 0.30000000000000004, 'batch_size': 16, 'optimizer': 'RMSprop', 'weight_decay': 0.0009189231707351996}. Best is trial 1 with value: 0.8245.
[I 2026-01-18 15:03:25,944] Trial 2 finished with value: 0.8514166666666667 and parameters: {'num_hidden_layers': 1, 'neurons_per_layer': 72, 'epochs': 50, 'lr': 1.785725772381248e-05, 'dropout_rate': 0.2, 'batch_size': 128, 'optimizer': 'SGD', 'weight_decay': 0.00020767605365623815}. Best is trial 2 with value: 0.85141666

In [17]:
study.best_value

0.86025

In [18]:
study.best_params

{'num_hidden_layers': 4,
 'neurons_per_layer': 88,
 'epochs': 40,
 'lr': 4.199089860848469e-05,
 'dropout_rate': 0.4,
 'batch_size': 16,
 'optimizer': 'Adam',
 'weight_decay': 4.291686208501945e-05}