In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader

In [None]:
torch.manual_seed(42)

<torch._C.Generator at 0x7ff057e678f0>

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using Device: {device}')

Using Device: cuda


In [None]:
train = pd.read_csv('/content/fashion-mnist_train.csv')
test = pd.read_csv('/content/fashion-mnist_test.csv')

In [None]:
X_train = train.iloc[:,1:].values
y_train = train.iloc[:,0].values
X_test = test.iloc[:,1:].values
y_test = test.iloc[:,0].values

In [None]:
# X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [None]:
X_train = X_train/255.0
X_test = X_test/255.0

# Artificial Neural Network

In [None]:
class CustomDataset(Dataset):

  def __init__(self, features, labels):
    self.features = torch.tensor(features, dtype = torch.float32)
    self.labels = torch.tensor(labels, dtype = torch.long)

  def __len__(self):
    return len(self.features)

  def __getitem__(self, index):
    return self.features[index], self.labels[index]

In [None]:
train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)

In [None]:
len(train_dataset), len(test_dataset)

(60000, 10000)

In [None]:
# Define NN class
class MyNN(nn.Module):
  def __init__(self, input_dim, output_dim, num_hidden_layers, neurons_per_layer, dropout_rate):

    super().__init__()

    layers = []

    for i in range(num_hidden_layers):

      layers.append(nn.Linear(input_dim, neurons_per_layer))
      layers.append(nn.BatchNorm1d(neurons_per_layer))
      layers.append(nn.ReLU())
      layers.append(nn.Dropout(dropout_rate))

      input_dim = neurons_per_layer

    layers.append(nn.Linear(neurons_per_layer, output_dim))

    self.model = nn.Sequential(*layers)

  def forward(self, X):
    return self.model(X)


In [None]:
def objective(trial):

  # next hyperparameter values from the search space
  num_hidden_layers = trial.suggest_int('num_hidden_layers', 1, 5)
  neurons_per_layer = trial.suggest_int('neurons_per_layer', 8, 128, step=8)
  epochs = trial.suggest_int('epochs', 10, 50, step=10)
  learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
  weight_decay = trial.suggest_float('weight_decay', 1e-5, 1e-3, log=True)
  dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5, step=0.1)
  batch_size = trial.suggest_categorical('batch_size', [16,32,64,128])
  optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'SGD', 'RMSprop'])

  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
  test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)

  # model initialization
  input_dim = 784
  output_dim = 10

  model = MyNN(input_dim, output_dim, num_hidden_layers, neurons_per_layer, dropout_rate)
  model.to(device)

  # optimizer selection
  criterion = nn.CrossEntropyLoss()
  if optimizer_name == 'Adam':
    optimizer = optim.Adam(model.parameters(), lr = learning_rate, weight_decay=weight_decay)
  elif optimizer_name == 'RMSprop':
    optimizer = optim.RMSprop(model.parameters(), lr = learning_rate, weight_decay=weight_decay)
  else:
    optimizer = optim.SGD(model.parameters(), lr = learning_rate, weight_decay=weight_decay)

  # training loop
  for epoch in range(epochs):

    for batch_features, batch_labels in train_loader:

      batch_features = batch_features.to(device)
      batch_labels = batch_labels.to(device)

      # forward pass
      outputs = model(batch_features)

      # loss
      loss = criterion(outputs, batch_labels)

      # back pass
      optimizer.zero_grad()
      loss.backward()

      # update grads
      optimizer.step()

  # set model for evaluation
  model.eval()

  # evaluation code
  total = 0
  correct = 0

  with torch.no_grad():

    for batch_features, batch_labels in test_loader:

      batch_features = batch_features.to(device)
      batch_labels = batch_labels.to(device)

      outputs = model(batch_features)

      _, predicted = torch.max(outputs, 1)

      total = total + batch_labels.shape[0]

      correct = correct + (predicted == batch_labels).sum().item()

  return correct/total*100

In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.7.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.7.0-py3-none-any.whl (413 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/413.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m413.9/413.9 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.7.0


In [None]:
import optuna

study = optuna.create_study(direction = 'maximize')

[I 2026-01-30 14:05:57,565] A new study created in memory with name: no-name-eca450a3-03b0-4c91-8992-23c354dcd6c5


In [None]:
study.optimize(objective, n_trials=10)

[I 2026-01-30 14:07:05,194] Trial 0 finished with value: 41.22 and parameters: {'num_hidden_layers': 4, 'neurons_per_layer': 128, 'epochs': 10, 'learning_rate': 2.116938332483784e-05, 'weight_decay': 1.0608295313646721e-05, 'dropout_rate': 0.5, 'batch_size': 32, 'optimizer': 'SGD'}. Best is trial 0 with value: 41.22.
[I 2026-01-30 14:09:58,968] Trial 1 finished with value: 85.74000000000001 and parameters: {'num_hidden_layers': 3, 'neurons_per_layer': 24, 'epochs': 50, 'learning_rate': 0.0008574255473442948, 'weight_decay': 0.0008747936315415994, 'dropout_rate': 0.2, 'batch_size': 64, 'optimizer': 'SGD'}. Best is trial 1 with value: 85.74000000000001.
[I 2026-01-30 14:16:08,923] Trial 2 finished with value: 83.19 and parameters: {'num_hidden_layers': 4, 'neurons_per_layer': 56, 'epochs': 30, 'learning_rate': 0.010260501221891143, 'weight_decay': 3.2980953091156784e-05, 'dropout_rate': 0.1, 'batch_size': 16, 'optimizer': 'RMSprop'}. Best is trial 1 with value: 85.74000000000001.
[I 2026

# Convolution Neural Network

In [None]:
class CustomDataset(Dataset):

  def __init__(self, features, labels):
    self.features = torch.tensor(features, dtype = torch.float32).reshape(-1,1,28,28)
    self.labels = torch.tensor(labels, dtype = torch.long)

  def __len__(self):
    return len(self.features)

  def __getitem__(self, index):
    return self.features[index], self.labels[index]

In [None]:
train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True, pin_memory=True)

In [None]:
from torch.nn.modules import MaxPool2d
class MyNN(nn.Module):

  def __init__(self, input_features):
    super().__init__()
    self.features = nn.Sequential(
        nn.Conv2d(1, 32, kernel_size=3, padding='same'),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),

        nn.Conv2d(32, 64, kernel_size=3, padding='same'),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),
    )
    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(7*7*64, 128),
        nn.ReLU(),
        nn.Dropout(0.4),

        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Dropout(0.4),

        nn.Linear(64, 10)
    )

  def forward(self, X):
    X = self.features(X)
    X = self.classifier(X)
    return X

In [None]:
learning_rate = 0.001
epochs = 100

model = MyNN(1)
model.to(device)

criterion  = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = learning_rate, weight_decay=0.001)

In [None]:
# training loop
for epoch in range(epochs):

  total_epoch_loss=0

  for batch_features, batch_labels in train_loader:

    batch_features = batch_features.to(device)
    batch_labels = batch_labels.to(device)

    # forward pass
    outputs = model(batch_features)

    # loss
    loss = criterion(outputs, batch_labels)

    # back pass
    optimizer.zero_grad()
    loss.backward()

    # update grads
    optimizer.step()

    total_epoch_loss = total_epoch_loss + loss.item()

  avg_loss = total_epoch_loss/len(train_loader)
  print(f'Epoch: {epoch+1}, Loss: {avg_loss}')



Epoch: 1, Loss: 1.5295624668121337
Epoch: 2, Loss: 0.8667861475308736
Epoch: 3, Loss: 0.7010966714064281
Epoch: 4, Loss: 0.6259633712848027
Epoch: 5, Loss: 0.5769741468588511
Epoch: 6, Loss: 0.5386706933895747
Epoch: 7, Loss: 0.5092511166731517
Epoch: 8, Loss: 0.48370834958950676
Epoch: 9, Loss: 0.46539886145591736
Epoch: 10, Loss: 0.4453888331055641
Epoch: 11, Loss: 0.4265836374004682
Epoch: 12, Loss: 0.4105974615573883
Epoch: 13, Loss: 0.3992057658195496
Epoch: 14, Loss: 0.3851772902488709
Epoch: 15, Loss: 0.3768309440414111
Epoch: 16, Loss: 0.369766180284818
Epoch: 17, Loss: 0.3557282606581847
Epoch: 18, Loss: 0.34772603200872737
Epoch: 19, Loss: 0.34096493636369707
Epoch: 20, Loss: 0.33548321678241094
Epoch: 21, Loss: 0.32839829950332644
Epoch: 22, Loss: 0.3229687779088815
Epoch: 23, Loss: 0.3178904294490814
Epoch: 24, Loss: 0.3126113896270593
Epoch: 25, Loss: 0.3041664656539758
Epoch: 26, Loss: 0.3036380447367827
Epoch: 27, Loss: 0.29830633973081905
Epoch: 28, Loss: 0.294272478729

In [None]:
# set model for evaluation
model.eval()

# evaluation code
total = 0
correct = 0

with torch.no_grad():

  for batch_features, batch_labels in test_loader:

    batch_features = batch_features.to(device)
    batch_labels = batch_labels.to(device)

    outputs = model(batch_features)

    _, predicted = torch.max(outputs, 1)

    total = total + batch_labels.shape[0]

    correct = correct + (predicted == batch_labels).sum().item()

print(f'Accuracy: {correct/total*100}')


Accuracy: 92.25999999999999


# Pre-trained VGG-16 Model


In [None]:
# transfromations
from torchvision.transforms import transforms

custom_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
from PIL import Image
import numpy as np

class CustomDataset(Dataset):

  def __init__(self, features, labels, transform):
    self.features = features
    self.labels = labels
    self.transform = transform

  def __len__(self):
    return len(self.features)

  def __getitem__(self, index):
    # resize to (28,28)
    image = self.features[index].reshape(28,28)
    # change datatype to uint8
    image = image.astype(np.uint8)
    # change B&W to color
    image = np.stack([image]*3, axis=-1)
    # convert array to PIL image
    image = Image.fromarray(image)
    # apply transformations
    image = self.transform(image)

    return image, torch.tensor(self.labels[index], dtype=torch.long)



In [None]:
train_dataset = CustomDataset(X_train, y_train, transform = custom_transform)
test_dataset = CustomDataset(X_test, y_test, transform = custom_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True, pin_memory=True)

In [None]:
# fetch the pretrained model

import torchvision.models as models

vgg16 = models.vgg16(pretrained=True)



Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


100%|██████████| 528M/528M [00:07<00:00, 71.6MB/s]


In [None]:
vgg16.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [None]:
for param in vgg16.features.parameters():
  param.requires_grad = False

In [None]:
vgg16.classifier = nn.Sequential(
    nn.Linear(25088, 1024),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(1024, 512),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(512, 10)
)

In [None]:
vgg16 = vgg16.to(device)

In [None]:
learning_rate = 0.0001
epochs = 10

In [None]:
criterion  = nn.CrossEntropyLoss()
optimizer = optim.Adam(vgg16.classifier.parameters(), lr = learning_rate)

In [None]:
# training loop
for epoch in range(epochs):

  total_epoch_loss=0

  for batch_features, batch_labels in train_loader:

    batch_features = batch_features.to(device)
    batch_labels = batch_labels.to(device)

    # forward pass
    outputs = vgg16(batch_features)

    # loss
    loss = criterion(outputs, batch_labels)

    # back pass
    optimizer.zero_grad()
    loss.backward()

    # update grads
    optimizer.step()

    total_epoch_loss = total_epoch_loss + loss.item()

  avg_loss = total_epoch_loss/len(train_loader)
  print(f'Epoch: {epoch+1}, Loss: {avg_loss}')



In [None]:
# set model for evaluation
vgg16.eval()

# evaluation code
total = 0
correct = 0

with torch.no_grad():

  for batch_features, batch_labels in test_loader:

    batch_features = batch_features.to(device)
    batch_labels = batch_labels.to(device)

    outputs = vgg16(batch_features)

    _, predicted = torch.max(outputs, 1)

    total = total + batch_labels.shape[0]

    correct = correct + (predicted == batch_labels).sum().item()

print(f'Accuracy: {correct/total*100}')


Accuracy: 93.02
