<a href="https://colab.research.google.com/github/Rehman047/FashionMNIST-Classification-Project/blob/main/fashion_mnist_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch.nn as nn
import pandas as pd
import torch
import matplotlib.pyplot as plt
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

In [3]:
torch.manual_seed(42)

<torch._C.Generator at 0x79261d18bfd0>

In [4]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [5]:
df=pd.read_csv('fashion-mnist_train.csv')
df.shape

(60000, 785)

In [6]:
x=df.iloc[:,1:].values

In [7]:
y=df['label'].values

In [8]:
xtr,xte,ytr,yte=train_test_split(x,y,test_size=0.2)

In [9]:
xtr=xtr/255.0
xte=xte/255.0

In [10]:
class CDataset(Dataset):
  def __init__(self, x,y):
    self.x=torch.tensor(x,dtype=torch.float32)
    self.y=torch.tensor(y,dtype=torch.long)
  def __len__(self):
    return self.x.shape[0]
  def __getitem__(self, index):
    return self.x[index],self.y[index]

In [11]:
traind=CDataset(xtr,ytr)
testd=CDataset(xte,yte)

In [12]:
print(torch.__version__)

2.8.0+cu126


In [13]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.extractor=nn.Sequential(
            nn.Conv2d(1,32,kernel_size=3,padding='same'),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=2,stride=2),

            nn.Conv2d(32,64,kernel_size=3,padding='same'),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2,stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(64*7*7, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(64, 10)
        )

    def forward(self, x):
      # reshape only if input is flattened
      if x.dim() == 2:
          x = x.view(-1, 1, 28, 28)
      x = self.extractor(x)
      x = torch.flatten(x, 1)
      return self.classifier(x)


In [14]:
train_loader=DataLoader(traind,32,True,pin_memory=True)
test_loader=DataLoader(testd,32,False,pin_memory=True)

In [15]:
model = CNN()

model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, weight_decay=1e-4)
learning_rate = 0.01
epochs = 100

In [16]:
# training loop

for epoch in range(epochs):

  total_epoch_loss = 0

  for batch_features, batch_labels in test_loader:

    # move data to gpu
    batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

    # forward pass
    outputs = model(batch_features)

    # calculate loss
    loss = criterion(outputs, batch_labels)

    # back pass
    optimizer.zero_grad()
    loss.backward()

    # update grads
    optimizer.step()

    total_epoch_loss = total_epoch_loss + loss.item()

  avg_loss = total_epoch_loss/len(train_loader)
  if epoch%10==0:
    print(f'Epoch: {epoch + 1} , Loss: {avg_loss}')


Epoch: 1 , Loss: 0.46734406948586305
Epoch: 11 , Loss: 0.15631249870546163
Epoch: 21 , Loss: 0.10158562458663557
Epoch: 31 , Loss: 0.07363534550842209
Epoch: 41 , Loss: 0.058758640388065636
Epoch: 51 , Loss: 0.05366493678104598
Epoch: 61 , Loss: 0.04824248360656202
Epoch: 71 , Loss: 0.046408210881481254
Epoch: 81 , Loss: 0.04638439540650385
Epoch: 91 , Loss: 0.04294805972146181


In [23]:
# evaluation on test data
total = 0
correct = 0

with torch.no_grad():

  for batch_features, batch_labels in train_loader:

    # move data to gpu
    batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

    outputs = model(batch_features)

    _, predicted = torch.max(outputs, 1)

    total = total + batch_labels.shape[0]

    correct = correct + (predicted == batch_labels).sum().item()

print(correct/total)

0.9857916666666666


In [None]:
class Model(nn.Module):
  def __init__(self,inps,outs, num_layers, num_neurons,dropout_rate):
    super().__init__()
    layers=[]
    for i in range(num_layers):
      layers.append(nn.Linear(inps,num_neurons))
      layers.append(nn.BatchNorm1d(num_neurons))
      layers.append(nn.ReLU())
      layers.append(nn.Dropout(dropout_rate))
      inps=num_neurons
    layers.append(nn.Linear(num_neurons,outs))
    self.everything=nn.Sequential(*layers)
  def forward(self,x):
    return self.everything(x)

In [None]:
def objective(trial):

  num_layers=trial.suggest_int('num_layers',1,5)
  num_neurons=trial.suggest_int('num_neurons',64,321,step=64)
  epochs=trial.suggest_int('epochs',10,50,step=10)
  lr=trial.suggest_float('lr',1e-5,1e-1,log=True)
  dropout_rate=trial.suggest_float('dropout_rate',0.1,0.5,step=0.1)
  batch_size=trial.suggest_categorical('batch_size',[32,64,96,128])
  optim_naam=trial.suggest_categorical('optim_naam',['Adam','SGD','RMSProp'])
  wd=trial.suggest_float('wd',1e-5,1e-3,log=True)

  train=DataLoader(traind,batch_size,True,pin_memory=True)
  test=DataLoader(testd,batch_size,False,pin_memory=True)

  model=Model(784,10,num_layers,num_neurons,dropout_rate)
  model=model.to(device)

  if optim_naam=='Adam':
    optimizer=optim.Adam(model.parameters(),lr=lr,weight_decay=wd)
  elif optim_naam=='SGD':
    optimizer=optim.SGD(model.parameters(),lr=lr,weight_decay=wd)
  else:
    optimizer=optim.RMSprop(model.parameters(),lr=lr,weight_decay=wd)
  criterion=nn.CrossEntropyLoss()

  for epoch in range(epochs):

    for xt,yt in train:
      xt,yt=xt.to(device),yt.to(device)
      y_pred=model(xt)
      optimizer.zero_grad()
      loss=criterion(y_pred,yt)
      loss.backward()
      optimizer.step()

  correct=0
  model.eval()
  with torch.no_grad():
    for xt,yt in test:
      xt,yt=xt.to(device),yt.to(device)
      y_pred=model(xt)
      _,y_pred=torch.max(y_pred,dim=1)
      correct+=(y_pred==yt).sum().item()
  accuracy=correct/12000
  return accuracy

In [None]:
!pip install optuna
import optuna
study=optuna.create_study(direction='maximize')



[I 2025-08-23 08:00:54,175] A new study created in memory with name: no-name-826423e7-9e1c-4ef5-8224-70e168e6bb23


In [None]:
study.optimize(objective,n_trials=10)

[I 2025-08-23 08:01:33,516] Trial 0 finished with value: 0.7516666666666667 and parameters: {'num_layers': 2, 'num_neurons': 128, 'epochs': 20, 'lr': 0.00013044617366798854, 'dropout_rate': 0.4, 'batch_size': 64, 'optim_naam': 'SGD', 'wd': 0.0003440797123222834}. Best is trial 0 with value: 0.7516666666666667.
[I 2025-08-23 08:02:10,425] Trial 1 finished with value: 0.8905 and parameters: {'num_layers': 3, 'num_neurons': 128, 'epochs': 30, 'lr': 0.00023601659632454958, 'dropout_rate': 0.1, 'batch_size': 128, 'optim_naam': 'RMSProp', 'wd': 0.00016846400781229278}. Best is trial 1 with value: 0.8905.
[I 2025-08-23 08:02:47,507] Trial 2 finished with value: 0.78225 and parameters: {'num_layers': 4, 'num_neurons': 128, 'epochs': 20, 'lr': 0.05391696263695694, 'dropout_rate': 0.1, 'batch_size': 96, 'optim_naam': 'Adam', 'wd': 0.00023688779853168522}. Best is trial 1 with value: 0.8905.
[I 2025-08-23 08:03:45,092] Trial 3 finished with value: 0.6708333333333333 and parameters: {'num_layers':

In [None]:
study.best_params

{'num_layers': 3,
 'num_neurons': 128,
 'epochs': 30,
 'lr': 0.00023601659632454958,
 'dropout_rate': 0.1,
 'batch_size': 128,
 'optim_naam': 'RMSProp',
 'wd': 0.00016846400781229278}