In [2]:
import torch


In [3]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 
import torchvision
import torchvision.transforms.v2 as T
from torch.utils.data import DataLoader , TensorDataset
import torch.nn as nn
import torchmetrics
import torch.nn.functional as F

In [25]:
import optuna

In [4]:
toTensor = T.Compose([T.ToImage() , T.ToDtype(torch.float32 , scale=True)])

train_and_valid_data = torchvision.datasets.FashionMNIST(
    root="datasets" , train=True , download=True , transform=toTensor
)

test_data = torchvision.datasets.FashionMNIST(
    root="datasets" , train=False , download=True , transform=toTensor
)

torch.manual_seed(42)
train_data , valid_data = torch.utils.data.random_split(
    train_and_valid_data , [55_000 , 5_000]
)

In [35]:
train_loader = DataLoader(train_data , shuffle=True , batch_size = 512 , pin_memory=True , num_workers=4 , prefetch_factor=2)
valid_loader = DataLoader(valid_data , shuffle=True , batch_size = 512 , pin_memory=True , num_workers=4 , prefetch_factor=2)
test_loader = DataLoader(test_data , shuffle=True , batch_size = 512 , pin_memory=True , num_workers=4 , prefetch_factor=2)




In [6]:
x_sample , y_sample = train_data[0]

In [7]:
y_sample

9

In [8]:
x_sample.shape

torch.Size([1, 28, 28])

In [9]:
x_sample.dtype

torch.float32

In [10]:
train_and_valid_data.classes[y_sample]

'Ankle boot'

In [11]:
for i in range(1 , 10):
    print(train_and_valid_data.classes[i] , end=" ")

Trouser Pullover Dress Coat Sandal Shirt Sneaker Bag Ankle boot 

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(device)

cuda


In [13]:
n_epoch = 20

In [36]:
def eval(model , metric , val_loader):
    model.eval()
    metric.reset()
    
    with torch.no_grad():
        for x_batch , y_batch in val_loader:
            x_batch , y_batch = x_batch.to(device) , y_batch.to(device)
            
            y_pred = model(x_batch)
            
            metric.update(y_pred , y_batch)
            
    return metric.compute()





def train_eval(model , train_loader , criterion , optimizer ,
               val_loader , metric , n_epoch ):
    
    history = {"train_loss" : [] , "train_metric(accuracy)" : [] , "val_metric(accuracy)" : []}

    for epoch in range(n_epoch):
        model.train()
        metric.reset()
        total_loss = 0
        for x_batch , y_batch in train_loader:
            #data
            x_batch , y_batch = x_batch.to(device) , y_batch.to(device)
            
            #forward
            
            y_pred  = model(x_batch)
            
            #loss 
            
            loss = criterion(y_pred , y_batch)
            total_loss += loss.item()
            
            #optimizer
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            metric.update(y_pred , y_batch)
            
        loss_mean = total_loss/len(train_loader)
        
        history["train_loss"].append(loss_mean)
        history["train_metric(accuracy)"].append(metric.compute().item())
        
        history["val_metric(accuracy)"].append(eval(model , metric , val_loader).item())
        
        print(f"Epoch : {epoch + 1}/{n_epoch}, "
              f"Train Loss : {history['train_loss'][-1]}, "
              f"Train Metric ( Accuracy) : {history['train_metric(accuracy)'][-1]}, "
              f"Val Metric (accuracy) : {history['val_metric(accuracy)'][-1]}")
        
        
    return history
            

In [15]:
class ImageClassifier(nn.Module):
    def __init__(self , n_inputs , n_hidden1 , n_hidden2 , n_classes):
        super().__init__()
        self.mlp = nn.Sequential(
            nn.Flatten(),
            nn.Linear(n_inputs , n_hidden1) , 
            nn.ReLU(),
            nn.Linear(n_hidden1 , n_hidden2),
            nn.ReLU(),
            nn.Linear(n_hidden2 , n_classes)
        )
        
        
    def forward(self , x):
        return self.mlp(x)
    
torch.manual_seed(42)
model = ImageClassifier(n_inputs= 28 * 28 , n_hidden1=  300 , 
                        n_hidden2= 100 , n_classes= 10).to(device)

learning_rate = 0.1

sentropy = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(model.parameters() , lr=learning_rate , momentum=0 )

accuracy_metric = torchmetrics.Accuracy(task='multiclass' , num_classes=10).to(device)

train_eval(model=model , 
           train_loader=train_loader,
           criterion=sentropy,
           optimizer=optimizer,
           val_loader=valid_loader,
           metric=accuracy_metric,
           n_epoch=n_epoch)
    

Epoch : 1/20, Train Loss : 1.406682523312392, Train Metric ( Accuracy) : 0.5595818161964417, Val Metric (accuracy) : 0.6424000263214111
Epoch : 2/20, Train Loss : 0.7617398550113043, Train Metric ( Accuracy) : 0.7130545377731323, Val Metric (accuracy) : 0.7508000135421753
Epoch : 3/20, Train Loss : 0.6311827169524299, Train Metric ( Accuracy) : 0.7703090906143188, Val Metric (accuracy) : 0.7767999768257141
Epoch : 4/20, Train Loss : 0.5653285764985614, Train Metric ( Accuracy) : 0.7989272475242615, Val Metric (accuracy) : 0.8082000017166138
Epoch : 5/20, Train Loss : 0.5227762217874881, Train Metric ( Accuracy) : 0.8143091201782227, Val Metric (accuracy) : 0.8184000253677368
Epoch : 6/20, Train Loss : 0.49167994023473177, Train Metric ( Accuracy) : 0.826090931892395, Val Metric (accuracy) : 0.8270000219345093
Epoch : 7/20, Train Loss : 0.4737724717016573, Train Metric ( Accuracy) : 0.8313272595405579, Val Metric (accuracy) : 0.8277999758720398
Epoch : 8/20, Train Loss : 0.4532129082966

In [16]:
model.eval()

x_new , y_new = next(iter(valid_loader))

print(len(valid_loader))
print(x_new.shape)
x_new = x_new[:3].to(device)
print(x_new.shape)

10
torch.Size([512, 1, 28, 28])
torch.Size([3, 1, 28, 28])


In [17]:
with torch.no_grad():
    y_pred_logits = model(x_new)
    
    
    
print(y_pred_logits)

y_pred = y_pred_logits.argmax(dim=1)

print(y_pred)

tensor([[-1.5756, -2.8279, -2.6576, -2.2836, -3.9924,  2.8587, -3.0943,  8.4436,
          1.9728,  5.3338],
        [ 3.2444, -5.2624,  2.7566, -2.0151,  2.3112, -0.4453,  3.1874, -3.2803,
          9.8131, -6.7189],
        [ 4.5906, -1.1313,  0.8625,  1.3348, -1.0837, -1.0367,  4.4671, -3.7178,
         -0.4634, -2.7325]], device='cuda:0')
tensor([7, 8, 0], device='cuda:0')


In [20]:
y_proba = F.softmax(y_pred_logits , dim=1)
y_proba.round(decimals = 3)

tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0040, 0.0000, 0.9520, 0.0010,
         0.0420],
        [0.0010, 0.0000, 0.0010, 0.0000, 0.0010, 0.0000, 0.0010, 0.0000, 0.9960,
         0.0000],
        [0.5090, 0.0020, 0.0120, 0.0200, 0.0020, 0.0020, 0.4500, 0.0000, 0.0030,
         0.0000]], device='cuda:0')

In [23]:
# getting the top four logits and proba

y_top4_logits , y_top4_indices = torch.topk(y_pred_logits , k=4 , dim=1)
y_top4_proba = F.softmax(y_top4_logits , dim=1)
y_top4_proba.round(decimals = 3)

tensor([[0.9520, 0.0420, 0.0040, 0.0010],
        [0.9960, 0.0010, 0.0010, 0.0010],
        [0.5140, 0.4540, 0.0200, 0.0120]], device='cuda:0')

In [24]:
y_top4_indices

tensor([[7, 9, 5, 8],
        [8, 0, 6, 2],
        [0, 6, 3, 2]], device='cuda:0')

In [26]:
#fine tuning the model using optuna

In [37]:
def objective(trial):
    learning_rate = trial.suggest_float("learning_rate" , 1e-5 , 1e-1 , log=True)
    n_hidden = trial.suggest_int("n_hidden" , 20 , 300)
    model = ImageClassifier(n_inputs= 1 * 28 * 28 , n_hidden1= n_hidden , n_hidden2=n_hidden,
                            n_classes=10 ).to(device)
    
    optimizer = torch.optim.SGD(model.parameters() , momentum=0 , lr = learning_rate)
    
    
    xentropy = nn.CrossEntropyLoss()
    
    accuracy_metric = torchmetrics.Accuracy(task = "multiclass" , num_classes=10).to(device)
    
    best_val_score = 0.0
    
    for epoch in range(n_epoch):
        history = train_eval(model , train_loader , xentropy , optimizer , 
                             valid_loader , accuracy_metric , n_epoch)
        
        val_score = max(history['val_metric(accuracy)'])
        
        if val_score > best_val_score:
            best_val_score = val_score
            
        trial.report(val_score , epoch)
        
        if trial.should_prune():
            raise optuna.TrialPruned()
        
    
    return best_val_score
        
        
    
        
    

In [38]:
torch.manual_seed(42)
sampler = optuna.samplers.TPESampler(seed = 42)
study = optuna.create_study(direction="maximize" , sampler=sampler)
study.optimize(objective , n_trials=5)


[32m[I 2026-01-27 17:32:17,350][0m A new study created in memory with name: no-name-c0f0b504-fae6-4be3-944f-039b27dab279[0m


Epoch : 1/20, Train Loss : 2.306647128529019, Train Metric ( Accuracy) : 0.09843636304140091, Val Metric (accuracy) : 0.10360000282526016
Epoch : 2/20, Train Loss : 2.3025815111619456, Train Metric ( Accuracy) : 0.1111818179488182, Val Metric (accuracy) : 0.11620000004768372
Epoch : 3/20, Train Loss : 2.2984959483146667, Train Metric ( Accuracy) : 0.12303636223077774, Val Metric (accuracy) : 0.12540000677108765
Epoch : 4/20, Train Loss : 2.2944650186432733, Train Metric ( Accuracy) : 0.13207273185253143, Val Metric (accuracy) : 0.13519999384880066
Epoch : 5/20, Train Loss : 2.2904361574738115, Train Metric ( Accuracy) : 0.13752727210521698, Val Metric (accuracy) : 0.1404000073671341
Epoch : 6/20, Train Loss : 2.28646292730614, Train Metric ( Accuracy) : 0.1422181874513626, Val Metric (accuracy) : 0.14319999516010284
Epoch : 7/20, Train Loss : 2.2825059162245855, Train Metric ( Accuracy) : 0.14616364240646362, Val Metric (accuracy) : 0.14339999854564667
Epoch : 8/20, Train Loss : 2.2785

[32m[I 2026-01-27 17:45:47,516][0m Trial 0 finished with value: 0.739799976348877 and parameters: {'learning_rate': 0.00031489116479568613, 'n_hidden': 287}. Best is trial 0 with value: 0.739799976348877.[0m


Epoch : 20/20, Train Loss : 0.6960452033413781, Train Metric ( Accuracy) : 0.7530727386474609, Val Metric (accuracy) : 0.739799976348877
Epoch : 1/20, Train Loss : 2.2618761923578052, Train Metric ( Accuracy) : 0.19089090824127197, Val Metric (accuracy) : 0.3192000091075897
Epoch : 2/20, Train Loss : 2.1320836190824157, Train Metric ( Accuracy) : 0.4427454471588135, Val Metric (accuracy) : 0.4805999994277954
Epoch : 3/20, Train Loss : 1.8703642586867015, Train Metric ( Accuracy) : 0.5469090938568115, Val Metric (accuracy) : 0.5996000170707703
Epoch : 4/20, Train Loss : 1.5340277541566778, Train Metric ( Accuracy) : 0.6189454793930054, Val Metric (accuracy) : 0.6046000123023987
Epoch : 5/20, Train Loss : 1.2788701245078333, Train Metric ( Accuracy) : 0.6247090697288513, Val Metric (accuracy) : 0.6223999857902527
Epoch : 6/20, Train Loss : 1.1152504947450426, Train Metric ( Accuracy) : 0.6395636200904846, Val Metric (accuracy) : 0.6417999863624573
Epoch : 7/20, Train Loss : 1.00631904105

[32m[I 2026-01-27 17:58:32,035][0m Trial 1 finished with value: 0.8841999769210815 and parameters: {'learning_rate': 0.008471801418819975, 'n_hidden': 188}. Best is trial 1 with value: 0.8841999769210815.[0m


Epoch : 20/20, Train Loss : 0.2739830908400041, Train Metric ( Accuracy) : 0.9043272733688354, Val Metric (accuracy) : 0.8733999729156494
Epoch : 1/20, Train Loss : 2.3076454047803527, Train Metric ( Accuracy) : 0.09985454380512238, Val Metric (accuracy) : 0.10199999809265137
Epoch : 2/20, Train Loss : 2.307455270378678, Train Metric ( Accuracy) : 0.09985454380512238, Val Metric (accuracy) : 0.10199999809265137
Epoch : 3/20, Train Loss : 2.3071480415485524, Train Metric ( Accuracy) : 0.09985454380512238, Val Metric (accuracy) : 0.10199999809265137
Epoch : 4/20, Train Loss : 2.3068763613700867, Train Metric ( Accuracy) : 0.09985454380512238, Val Metric (accuracy) : 0.10199999809265137
Epoch : 5/20, Train Loss : 2.3066217568185596, Train Metric ( Accuracy) : 0.09985454380512238, Val Metric (accuracy) : 0.10199999809265137
Epoch : 6/20, Train Loss : 2.3063559156877025, Train Metric ( Accuracy) : 0.09983636438846588, Val Metric (accuracy) : 0.10199999809265137
Epoch : 7/20, Train Loss : 2.

[32m[I 2026-01-27 18:11:15,231][0m Trial 2 finished with value: 0.2460000067949295 and parameters: {'learning_rate': 4.207988669606632e-05, 'n_hidden': 63}. Best is trial 1 with value: 0.8841999769210815.[0m


Epoch : 20/20, Train Loss : 2.1126702472015664, Train Metric ( Accuracy) : 0.23985454440116882, Val Metric (accuracy) : 0.2460000067949295
Epoch : 1/20, Train Loss : 2.3022329145007663, Train Metric ( Accuracy) : 0.10387272387742996, Val Metric (accuracy) : 0.10419999808073044
Epoch : 2/20, Train Loss : 2.302014156624123, Train Metric ( Accuracy) : 0.10418181866407394, Val Metric (accuracy) : 0.10480000078678131
Epoch : 3/20, Train Loss : 2.3017907010184393, Train Metric ( Accuracy) : 0.10447272658348083, Val Metric (accuracy) : 0.10480000078678131
Epoch : 4/20, Train Loss : 2.301570815068704, Train Metric ( Accuracy) : 0.10485454648733139, Val Metric (accuracy) : 0.10499999672174454
Epoch : 5/20, Train Loss : 2.3013732764456005, Train Metric ( Accuracy) : 0.1051090881228447, Val Metric (accuracy) : 0.10580000281333923
Epoch : 6/20, Train Loss : 2.3011996812290616, Train Metric ( Accuracy) : 0.10541818290948868, Val Metric (accuracy) : 0.10599999874830246
Epoch : 7/20, Train Loss : 2.3

Exception in thread Thread-3355 (_pin_memory_loop):
Traceback (most recent call last):
  File "/usr/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
[33m[W 2026-01-27 18:18:46,445][0m Trial 3 failed with parameters: {'learning_rate': 1.7073967431528103e-05, 'n_hidden': 263} because of the following error: KeyboardInterrupt().[0m
Traceback (most recent call last):
  File "/home/ateeksh/mlrepos/mllibraries/lib/python3.12/site-packages/optuna/study/_optimize.py", line 206, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/tmp/ipykernel_49782/445158313.py", line 17, in objective
    history = train_eval(model , train_loader , xentropy , optimizer ,
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_49782/316987263.py", line 28, in train_eval
    for x_batch , y_batch in train_loader:
  File "/home/ateeksh/mlrepos/mllibraries/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 73

KeyboardInterrupt: 