In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [2]:
torch.manual_seed(42)

<torch._C.Generator at 0x10d613450>

In [3]:
df=pd.read_csv('./datasets/fashion-mnist_train.csv')
df.head(1)

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(device)

mps


In [5]:
x=df.iloc[:,1:].values
y=df.iloc[:,0].values

In [6]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [7]:
x_train=x_train/225.0
x_test=x_test/225.0

In [8]:
class customDataset(Dataset):
    def __init__(self,features,labels):
        self.features=torch.tensor(features,dtype=torch.float32)
        self.labels=torch.tensor(labels,dtype=torch.long)
    def __len__(self):
        return self.features.shape[0]
    def __getitem__(self,ind):
        return self.features[ind],self.labels[ind]
    

In [9]:
train_dataset = customDataset(x_train,y_train)
test_dataset = customDataset(x_test,y_test)

In [10]:
test_dataset[0]

(tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0

In [11]:
train_dataloader = DataLoader(train_dataset,batch_size=32,shuffle=True,pin_memory=True)
test_dataloader= DataLoader(test_dataset,batch_size=32,shuffle=False,pin_memory=True)

In [12]:
class MyNN(nn.Module):
    def __init__(self,input_dim,output_dim,num_of_hidden_layer,num_of_neuron):
        super().__init__()
        layer=[]
        for i in range(num_of_hidden_layer):
            layer.append(nn.Linear(input_dim,num_of_neuron))
            layer.append(nn.BatchNorm1d(num_of_neuron))
            layer.append(nn.ReLU())
            layer.append(nn.Dropout(p=0.3))
            input_dim=num_of_neuron
        layer.append(nn.Linear(num_of_neuron,output_dim))
        self.model=nn.Sequential(*layer)
    def forward(self,x):
        return self.model(x)

In [13]:
def objective(trial):
    num_of_hidden_layer = trial.suggest_int("num_of_hidden_layer",1,5)
    num_of_neuron = trial.suggest_int("num_of_neuron",8,128,step=8)
    #model initializattion
    input_dim=784
    out_dim = 10
    
    model= MyNN(input_dim,out_dim,num_of_hidden_layer,num_of_neuron)
    model.to(device)
    #hyper_param_inialization
    epochs=50
    lr=0.1
    #optimizer selection
    citeration = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),lr=lr,weight_decay=1e-4) 

    #training loop
    for epoch in range(epochs):
        for batch_features,batch_labels in train_dataloader:
            batch_features,batch_labels = batch_features.to(device),batch_labels.to(device)
            pred=model(batch_features)
            loss = citeration(pred,batch_labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    #evaluation
    correct=0
    total=0
    with torch.no_grad():
        for batch_features,batch_labels in test_dataloader : 
            batch_features,batch_labels= batch_features.to(device),batch_labels.to(device)
            pred= model(batch_features)
            _,predicted=torch.max(pred,1)
            total+=batch_labels.shape[0]
            correct+=(predicted==batch_labels).sum().item()
        accuracy = correct/total
    return accuracy

In [15]:
import optuna

In [16]:
study = optuna.create_study(direction='maximize')

[I 2025-12-18 20:24:15,712] A new study created in memory with name: no-name-529d345f-961f-4322-ae57-e6c1019cdc08


In [None]:
study.optimize(objective,n_trials=10)

[I 2025-12-18 20:31:31,001] Trial 0 finished with value: 0.8384166666666667 and parameters: {'num_of_hidden_layer': 4, 'num_of_neuron': 64}. Best is trial 0 with value: 0.8384166666666667.
[I 2025-12-18 20:47:20,285] Trial 1 finished with value: 0.8566666666666667 and parameters: {'num_of_hidden_layer': 4, 'num_of_neuron': 96}. Best is trial 1 with value: 0.8566666666666667.
[I 2025-12-18 20:49:57,391] Trial 2 finished with value: 0.8683333333333333 and parameters: {'num_of_hidden_layer': 3, 'num_of_neuron': 128}. Best is trial 2 with value: 0.8683333333333333.
[I 2025-12-18 20:55:15,991] Trial 3 finished with value: 0.8454166666666667 and parameters: {'num_of_hidden_layer': 4, 'num_of_neuron': 72}. Best is trial 2 with value: 0.8683333333333333.


In [None]:
print(study.best_value)
print(study.best_params)