In [1]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from torch import nn
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
import matplotlib.pyplot as plt
import optuna

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:
torch.manual_seed(42)  # for reproducibility

<torch._C.Generator at 0x796d323fbfb0>

In [4]:
df1 = pd.read_csv('/home/darshan39/Downloads/fashionmnist/fashion-mnist_train.csv')
df2 = pd.read_csv('/home/darshan39/Downloads/fashionmnist/fashion-mnist_test.csv')
df = pd.concat([df1, df2], ignore_index=True)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70000 entries, 0 to 69999
Columns: 785 entries, label to pixel784
dtypes: int64(785)
memory usage: 419.2 MB


In [6]:
x = df.iloc[:, 1:].values
y = df.iloc[:, 0].values

In [7]:
x_train, x_test, y_train, y_test = train_test_split(
    x, y,
    test_size=0.2,
    random_state=42,
)

In [8]:
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

In [9]:
x_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], shape=(56000, 784), dtype=float32)

In [10]:
class FashionMNISTDataset(Dataset):

    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

In [11]:
train_dataset = FashionMNISTDataset(x_train, y_train)

In [12]:
train_dataset[0]

(tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0039, 0.0000,
         0.0510, 0.4627, 0.1843, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0

In [13]:
test_dataset = FashionMNISTDataset(x_test, y_test)

In [14]:
class newNN(nn.Module):
    def __init__(self, input_size, output_size, num_layers, neurons_per_layer, dropout_rate):
        super(newNN, self).__init__()
        
        layer = []
        for i in range(num_layers):
            layer.append(nn.Linear(input_size, neurons_per_layer))
            layer.append(nn.BatchNorm1d(neurons_per_layer))
            layer.append(nn.ReLU())
            layer.append(nn.Dropout(dropout_rate))
            input_size = neurons_per_layer

        layer.append(nn.Linear(neurons_per_layer, output_size))
        self.model = nn.Sequential(*layer)

    def forward(self, x):
        return self.model(x)

In [15]:
#objective function
def objective(trial):
    num_layers = trial.suggest_int("num_layers", 1, 5)
    neurons_per_layer = trial.suggest_int("neurons_per_layer", 8, 128, step=8)
    num_epochs = trial.suggest_int("num_epochs", 10, 50, step=10)
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
    dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5, step=0.1)
    optimizer_name = trial.suggest_categorical("optimizer", ["SGD", "Adam", "RMSprop"])
    weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-1, log=True)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 128]) 

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

    input_size = 784
    output_size = 10

    model = newNN(input_size, output_size, num_layers, neurons_per_layer, dropout_rate).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    if optimizer_name == "Adam":
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    elif optimizer_name == "RMSprop":
        optimizer = optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    else:
        optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Ensure model is in training mode
    model.train()
    for epoch in range(num_epochs):
        # initialize epoch loss before accumulating
        total_epoch_loss = 0.0
        for batch_features, batch_labels in train_loader:
            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
            # Forward pass
            outputs = model(batch_features)
            loss = criterion(outputs, batch_labels)
            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_epoch_loss += loss.item()
        avg_epoch_loss = total_epoch_loss / len(train_loader)

    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for batch_features, batch_labels in test_loader:
            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
            outputs = model(batch_features)
            _, predicted = torch.max(outputs, 1)
            total += batch_labels.shape[0]
            correct += (predicted == batch_labels).sum().item()

        accuracy = correct / total
    return accuracy


In [16]:
study = optuna.create_study(direction="maximize")

[I 2025-11-23 11:33:44,538] A new study created in memory with name: no-name-67d5b2dd-b935-4c65-b781-51198fb8b4cc


In [17]:
study.optimize(objective, n_trials=10)

[W 2025-11-23 11:33:56,254] Trial 0 failed with parameters: {'num_layers': 1, 'neurons_per_layer': 104, 'num_epochs': 50, 'learning_rate': 2.5019011935363443e-05, 'dropout_rate': 0.30000000000000004, 'optimizer': 'SGD', 'weight_decay': 0.002022412719900334, 'batch_size': 32} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/darshan39/miniconda3/lib/python3.13/site-packages/optuna/study/_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_142438/4029181646.py", line 38, in objective
    outputs = model(batch_features)
  File "/home/darshan39/miniconda3/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
  File "/home/darshan39/miniconda3/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
    return forward_call(*args, **kwargs)
  File "/

KeyboardInterrupt: 

In [None]:
study.best_params

In [None]:
study.best_value