In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [None]:
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [None]:
!pip install optuna
import optuna

Collecting optuna
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.4.0-py3-none-any.whl (395 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m395.9/395.9 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.16.2-py3-none-any.whl (242 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.7/242.7 kB[0m [31m24.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.16.2 colorlog-6.9.0 optuna-4.4.0


In [None]:

import kagglehub

# Download latest version
path = kagglehub.dataset_download("zalando-research/fashionmnist")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/fashionmnist


In [None]:
test=pd.read_csv("/kaggle/input/fashionmnist/fashion-mnist_test.csv")
train=pd.read_csv("/kaggle/input/fashionmnist/fashion-mnist_train.csv")
train.head()


Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
x_train=train.drop(columns=["label"])
y_train=train["label"]
x_test=test.drop(columns=["label"])
y_test=test["label"]

In [None]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(60000, 784)
(60000,)
(10000, 784)
(10000,)


In [None]:

x_train=x_train/255.0
x_test=x_test/255.0

In [None]:
class MyDataset(Dataset):
  def __init__(self,features,labels):
    self.features=torch.tensor(features.values,torch.float32)
    self.labels=torch.tensor(labels.values,torch.long)

  def __len__(self):
    return len(self.features)

  def __getitem__(self,index):
    return self.features[index],self.labels[index]

In [None]:
class Model(nn.Module):
  def __init__(self,input_features,neurons_per_layer,output_features,num_hidden_layers,dropout_rate):

    super().__init__()
    layers=[]
    for i in range(num_hidden_layers):
      layers.append(nn.Linear(input_features,neurons_per_layer))
      layers.append(nn.ReLU())
      layers.append(nn.BatchNorm1d(neurons_per_layer))
      layers.append(nn.Dropout(dropout_rate))
      input_features=neurons_per_layer
    layers.append(nn.Linear(neurons_per_layer,output_features))
    self.network=nn.Sequential(*layers)
  def forward(self,x):
    return self.network(x)

In [None]:
class MyDataset(Dataset):
  def __init__(self,features,labels):
    self.features=torch.tensor(features.values, dtype=torch.float32)
    self.labels=torch.tensor(labels.values, dtype=torch.long)

  def __len__(self):
    return len(self.features)

  def __getitem__(self,index):
    return self.features[index],self.labels[index]

train_dataset=MyDataset(x_train,y_train)
test_dataset=MyDataset(x_test,y_test)

In [None]:
def objective(trial):
    num_hidden_layers = trial.suggest_int("num_hidden_layers", 1, 6)
    neurons_per_layer = trial.suggest_int("neurons_per_layer", 8, 128,step=8)
    learning_rate=trial.suggest_float("learning_rate",1e-5,1e-4,log=True)
    dropout_rate=trial.suggest_float("dropout_rate",0.1,0.5,step=0.1)
    epochs=trial.suggest_int("epochs",10,50,step=10)
    batch_size=trial.suggest_categorical("batch_size",[64,128,256,512])
    optimizer_name=trial.suggest_categorical("optimizer",["Adam","RMSprop","SGD"])
    weight_decay=trial.suggest_float("weight_decay",1e-5,1e-3,log=True)

    input_dim = 784
    output_dim = 10
    train_loader=DataLoader(train_dataset,batch_size=batch_size,shuffle=True,num_workers=2,pin_memory=True)
    test_loader=DataLoader(test_dataset,batch_size=batch_size,shuffle=False,num_workers=2,pin_memory=True)

    model = Model(input_features=input_dim,
                  neurons_per_layer=neurons_per_layer,
                  output_features=output_dim,
                  num_hidden_layers=num_hidden_layers,dropout_rate=dropout_rate)
    model.to(device)


    loss_fn = nn.CrossEntropyLoss()
    if(optimizer_name=="Adam"):
      optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    elif(optimizer_name=="RMSprop"):
      optimizer = optim.RMSprop(model.parameters(), lr=learning_rate,weight_decay=weight_decay)
    else:
      optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for batch_features, batch_labels in train_loader:
            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
            batch_features = batch_features.view(batch_features.size(0), -1)  # flatten
            output = model(batch_features)
            loss = loss_fn(output, batch_labels)
            total_loss += loss.item()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for batch_features, batch_labels in test_loader:
            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
            batch_features = batch_features.view(batch_features.size(0), -1)  # flatten
            output = model(batch_features)
            _, predicted = torch.max(output, 1)
            total += batch_labels.size(0)
            correct += (predicted == batch_labels).sum().item()
        accuracy = correct / total
        return accuracy


In [None]:
study=optuna.create_study(direction="maximize")


[I 2025-07-08 07:08:35,719] A new study created in memory with name: no-name-5a496a6b-b80f-4492-b170-44175f8ea284


In [None]:
study.optimize(objective,n_trials=10)

[I 2025-07-08 07:13:03,429] Trial 0 finished with value: 0.873 and parameters: {'num_hidden_layers': 6, 'neurons_per_layer': 72, 'learning_rate': 1.11012850926668e-05, 'dropout_rate': 0.2, 'epochs': 40, 'batch_size': 64, 'optimizer': 'RMSprop', 'weight_decay': 0.0003492585879605788}. Best is trial 0 with value: 0.873.
[I 2025-07-08 07:15:44,859] Trial 1 finished with value: 0.8841 and parameters: {'num_hidden_layers': 1, 'neurons_per_layer': 80, 'learning_rate': 8.835923764793007e-05, 'dropout_rate': 0.5, 'epochs': 50, 'batch_size': 64, 'optimizer': 'RMSprop', 'weight_decay': 0.000650002153826622}. Best is trial 1 with value: 0.8841.
[I 2025-07-08 07:17:12,999] Trial 2 finished with value: 0.8602 and parameters: {'num_hidden_layers': 5, 'neurons_per_layer': 24, 'learning_rate': 4.442849955358078e-05, 'dropout_rate': 0.2, 'epochs': 20, 'batch_size': 256, 'optimizer': 'RMSprop', 'weight_decay': 4.047790031378479e-05}. Best is trial 1 with value: 0.8841.
[I 2025-07-08 07:18:43,005] Trial 

In [None]:
study.best_value

0.8883

In [None]:
study.best_params

{'num_hidden_layers': 5,
 'neurons_per_layer': 96,
 'learning_rate': 3.516547204285174e-05,
 'dropout_rate': 0.2,
 'epochs': 30,
 'batch_size': 128,
 'optimizer': 'Adam',
 'weight_decay': 3.5123186844896437e-05}