<a href="https://colab.research.google.com/github/DARKINOO/Pytorch_practice/blob/main/ann_fashion_mnist_gpu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

In [None]:
#Set random seeds for reproducability
torch.manual_seed(42)

In [None]:
#Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device : {device}")

In [None]:
df = pd.read_csv('fashion-mnist_train.csv')

In [None]:
df.head()

In [None]:
df.shape

In [None]:
#Crate a 4x4 grid of images
fig, axes = plt.subplots(4,4,figsize=(10,10))
fig.suptitle("First 16 images", fontsize=16)

#plot the first 16 images from the dataset
for i, ax in enumerate(axes.flatten()):
    img = df.iloc[i,1:].values.reshape(28,28)
    ax.imshow(img, cmap='gray')
    ax.axis('off')
plt.show()

In [None]:
#train test split
X = df.iloc[:,1:].values
y = df.iloc[:,0].values
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [None]:
X_train = X_train/255.0
X_test = X_test/255.0

In [None]:
class CustomDataset(Dataset):

  def __init__(self,features,labels):

    #Convert to Pytorch tensors
    self.features = torch.tensor(features,dtype=torch.float32)
    self.labels = torch.tensor(labels,dtype=torch.long)

  def __len__(self):
    return len(self.features)

  def __getitem__(self,idx):
    return self.features[idx],self.labels[idx]


In [None]:
train_dataset = CustomDataset(X_train,y_train)
test_dataset = CustomDataset(X_test,y_test)

In [None]:
len(train_loader)

In [None]:
class MyNN(nn.Module):

  def __init__(self,input_dim, output_dim, num_hidden_layers,neurons_per_layer,dropout_rate
               ):

    super().__init__()

    layers = []

    for i in range(num_hidden_layers):

      layers.append(nn.Linear(input_dim,neurons_per_layer))
      layers.append(nn.BatchNorm1d(neurons_per_layer))
      layers.append(nn.ReLU())
      layers.append(nn.Dropout(dropout_rate))
      input_dim = neurons_per_layer


    layers.append(nn.Linear(neurons_per_layer, output_dim))

    self.model = nn.Sequential(*layers)

  def forward(self,x):
    return self.model(x)

In [None]:
# Objective fxn
def objective(trial):

  # next hyperparameter values for thr search space
  num_hidden_layers = trial.suggest_int("num_hidden_layers",1,5)
  neurons_per_layer = trial.suggest_int("neurons_per_Layer",8,128,step=8)
  epochs = trial.suggest_int("epochs",10,50, step=10)
  learning_rate = trial.suggest_float("learning_rate",1e-5,1e-1,log=True)
  dropout_rate = trial.suggest_float("dropout_rate",0.1,0.5,step=0.1)
  batch_size = trial.suggest_categorical("batch_size",[16,32,64,128])
  optimizer_name = trial.suggest_categorical("optimizer",["Adam","SGD","RMSprop"])
  weight_decay = trial.suggest_float("weight_decay",1e-5,1e-3,log=True)

  #data loaders

  train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True,pin_memory=True)
  test_loader = DataLoader(test_dataset,batch_size=batch_size,shuffle=False, pin_memory=True)

  #model init
  input_dim = 784
  output_dim= 10

  model = MyNN(input_dim,output_dim,num_hidden_layers,neurons_per_layer,dropout_rate)
  model.to(device)

  #optimizer selection
  criterion = nn.CrossEntropyLoss()

  # optimizer
  optimizer = optim.SGD(model.parameters(),lr=learning_rate, weight_decay = 1e-4)

  if optimizer_name == "Adam":
    optim.Adam(model.parameters(),lr=learning_rate, weight_decay = weight_decay)
  elif optimizer_name == "SGD":
    optim.SGD(model.parameters(),lr=learning_rate, weight_decay= weight_decay)
  else:
    optim.RMSprop(model.parameters(),lr=learning_rate, weight_decay= weight_decay)

  #training loop
  for epoch in range(epochs):

   for batch_features, batch_labels in train_loader:

    #move data to gpu
    batch_features = batch_features.to(device)
    batch_labels = batch_labels.to(device)

    #forward pass
    outputs = model(batch_features)
    loss = criterion(outputs,batch_labels)

    #backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  model.eval()

  total = 0
  correct = 0

  with torch.no_grad():

    for batch_features, batch_labels in test_loader:

      batch_features = batch_features.to(device)
      batch_labels = batch_labels.to(device)

      outputs = model(batch_features)

      _, predcited = torch.max(outputs,1)

      total = total + batch_labels.shape[0]

      correct = correct + (predcited == batch_labels).sum().item()

      accuracy = correct/total

  return accuracy


In [None]:
!pip install optuna

In [None]:
import optuna

study = optuna.create_study(direction='maximize')

In [None]:
study.optimize(objective, n_trials=10)

[I 2025-10-19 06:56:30,170] Trial 0 finished with value: 0.7205 and parameters: {'num_hidden_layers': 2, 'neurons_per_Layer': 72, 'epochs': 40, 'learning_rate': 1.0469904434116568e-05, 'dropout_rate': 0.4, 'batch_size': 16, 'optimizer': 'Adam', 'weight_decay': 6.132935566855203e-05}. Best is trial 0 with value: 0.7205.
[I 2025-10-19 06:56:51,093] Trial 1 finished with value: 0.8586666666666667 and parameters: {'num_hidden_layers': 3, 'neurons_per_Layer': 40, 'epochs': 10, 'learning_rate': 0.013153308353067434, 'dropout_rate': 0.2, 'batch_size': 64, 'optimizer': 'SGD', 'weight_decay': 9.34044903949032e-05}. Best is trial 1 with value: 0.8586666666666667.
[I 2025-10-19 06:57:51,346] Trial 2 finished with value: 0.5721666666666667 and parameters: {'num_hidden_layers': 3, 'neurons_per_Layer': 80, 'epochs': 50, 'learning_rate': 3.4003514734911134e-05, 'dropout_rate': 0.5, 'batch_size': 128, 'optimizer': 'Adam', 'weight_decay': 3.5256483617280306e-05}. Best is trial 1 with value: 0.858666666

In [None]:
study.best_value

In [None]:
study.best_params