In [35]:
import torch
from torch import nn
import torchvision 
from torchvision import datasets 
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor 
import matplotlib.pyplot as plt 

train_data = datasets.FashionMNIST(root="data",download=True,train=True,transform=ToTensor(),target_transform=None)
test_data = datasets.FashionMNIST(root="data",download=True,train=False,transform=ToTensor(),target_transform=None)
train_dataloader = DataLoader(dataset=train_data,shuffle=True,batch_size=32)
test_dataloader = DataLoader(dataset=test_data,shuffle=True,batch_size=32)

In [36]:
batch_size = 32

In [37]:
train_features_batch,train_labels_batch = next(iter(train_dataloader))

In [38]:
class_names = train_data.classes
class_names 

['T-shirt/top',
 'Trouser',
 'Pullover',
 'Dress',
 'Coat',
 'Sandal',
 'Shirt',
 'Sneaker',
 'Bag',
 'Ankle boot']

In [39]:
torch.manual_seed(42)
rand_idx = torch.randint(0,len(train_features_batch),size=[1]).item()
img,label = train_features_batch[rand_idx], train_labels_batch[rand_idx]


In [40]:
class CNNClf(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d((2, 2), stride=2),
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d((2, 2), stride=3),
            nn.Conv2d(in_channels=128, out_channels=64, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d((2, 2), stride=2)
        )
        # Use adaptive average pooling to adapt to variable input size
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.flatten = nn.Flatten()
        self.clf = nn.Sequential(
            nn.Linear(64, 20, bias=True),
            nn.ReLU(),
            nn.Linear(20, 10, bias=True)
        )
    
    def forward(self, x):
        x = self.net(x)
        x = self.avg_pool(x)  # Apply adaptive average pooling
        x = self.flatten(x)
        x = self.clf(x)
        return x


In [41]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device 

'cuda'

In [42]:
model = CNNClf()
model.to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(),lr = 0.001)

In [43]:
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc


In [44]:
torch.manual_seed(42)
def train_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               accuracy_fn,
               device: torch.device = device):
    train_loss,train_acc = 0,0
    model.to(device)
    for batch,(X,y) in enumerate(data_loader):
        X,y = X.to(device),y.to(device)
        y_pred = model(X)
        loss = loss_fn(y_pred,y)
        train_acc += accuracy_fn(y,y_pred.argmax(dim=1))
        train_loss += loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    train_loss /= len(data_loader)
    train_acc /= len(data_loader)
    print(f"Train loss : {train_loss:.5f} Train accuracy : {train_acc:.2f}%")


def test_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               accuracy_fn,
               device: torch.device = device):
    test_loss,test_acc = 0,0
    model.eval()
    model.to(device)
    with torch.inference_mode():
        for X,y in data_loader:
            X,y = X.to(device),y.to(device)
            y_pred = model(X)
            test_loss += loss_fn(y_pred,y)
            test_acc += accuracy_fn(y,y_pred.argmax(dim=1))
        test_acc /= len(data_loader)
        test_loss /= len(data_loader)
        print(f"Test loss: {test_loss:.5f} | Test accuracy: {test_acc:.2f}%\n")
            
            
    

In [45]:
from timeit import default_timer as timer 
def print_train_time(start: float, end: float, device: torch.device = None):
    total_time = end - start
    print(f"Train time on {device}: {total_time:.3f} seconds")
    return total_time

In [46]:
torch.manual_seed(42)
from tqdm import tqdm 
# Measure time
from timeit import default_timer as timer
train_time_start_on_gpu = timer()

epochs = 3
for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n---------")
    train_step(data_loader=train_dataloader, 
        model=model, 
        loss_fn=loss_fn,
        optimizer=optimizer,
        accuracy_fn=accuracy_fn
    )
    test_step(data_loader=test_dataloader,
        model=model,
        loss_fn=loss_fn,
              optimizer=optimizer,
        accuracy_fn=accuracy_fn
    )

train_time_end_on_gpu = timer()
total_train_time_model_1 = print_train_time(start=train_time_start_on_gpu,
                                            end=train_time_end_on_gpu,
                                            device=device)

  0%|                                                     | 0/3 [00:00<?, ?it/s]

Epoch: 0
---------
Train loss : 0.53622 Train accuracy : 79.90%


 33%|███████████████                              | 1/3 [00:11<00:23, 11.83s/it]

Test loss: 0.37986 | Test accuracy: 86.10%

Epoch: 1
---------
Train loss : 0.33887 Train accuracy : 87.59%


 67%|██████████████████████████████               | 2/3 [00:24<00:12, 12.33s/it]

Test loss: 0.33261 | Test accuracy: 88.07%

Epoch: 2
---------
Train loss : 0.28584 Train accuracy : 89.61%


100%|█████████████████████████████████████████████| 3/3 [00:36<00:00, 12.14s/it]

Test loss: 0.30943 | Test accuracy: 89.32%

Train time on cuda: 36.430 seconds





In [47]:
model.state_dict()

OrderedDict([('net.0.weight',
              tensor([[[[ 1.6564e-01, -1.4651e-01,  2.4010e-01],
                        [-1.3835e-01,  1.0952e-02, -1.8570e-01],
                        [ 6.7988e-02,  2.2424e-01, -2.6230e-01]]],
              
              
                      [[[ 2.0570e-01,  4.5415e-02,  2.5758e-01],
                        [-2.5413e-02,  1.0164e-01, -2.2402e-02],
                        [ 2.0714e-01,  9.3332e-03, -1.0949e-01]]],
              
              
                      [[[ 2.0417e-01, -1.8298e-01, -2.3449e-02],
                        [-5.2343e-02,  3.6023e-01, -2.6437e-01],
                        [-1.7735e-01, -5.4305e-02, -2.6146e-01]]],
              
              
                      [[[-6.4905e-02, -3.1969e-01,  3.6601e-01],
                        [-4.9772e-01,  3.2273e-01,  6.8622e-02],
                        [-2.5455e-01,  2.4262e-01, -3.1649e-02]]],
              
              
                      [[[ 2.5433e-01, -2.7082e-02, -1.6530e-01

In [48]:
for param_tensor in model.state_dict().keys():
    print(param_tensor,"\t",model.state_dict()[param_tensor].size())
print()

net.0.weight 	 torch.Size([64, 1, 3, 3])
net.0.bias 	 torch.Size([64])
net.3.weight 	 torch.Size([128, 64, 3, 3])
net.3.bias 	 torch.Size([128])
net.6.weight 	 torch.Size([64, 128, 3, 3])
net.6.bias 	 torch.Size([64])
clf.0.weight 	 torch.Size([20, 64])
clf.0.bias 	 torch.Size([20])
clf.2.weight 	 torch.Size([10, 20])
clf.2.bias 	 torch.Size([10])



In [50]:
print("optimizer state dict")
for var_name in optimizer.state_dict():
    print(var_name,'\t',optimizer.state_dict()[var_name])

optimizer state dict
state 	 {0: {'step': tensor(5625.), 'exp_avg': tensor([[[[ 2.6159e-03,  3.5027e-03,  1.6212e-03],
          [ 1.8461e-03,  1.5831e-03,  8.7534e-04],
          [ 6.6792e-04,  8.3154e-05, -5.8122e-04]]],


        [[[-3.7389e-04,  5.8918e-04, -4.8721e-04],
          [-1.7275e-03, -2.5369e-03, -3.9789e-03],
          [-3.0185e-03, -2.4231e-03, -1.3957e-03]]],


        [[[ 1.7556e-03,  6.1058e-04, -2.9438e-04],
          [ 1.1571e-03,  1.0625e-03, -6.3093e-04],
          [ 1.6813e-03,  1.7566e-03,  2.3782e-04]]],


        [[[ 1.9641e-03,  3.7030e-03,  2.8791e-03],
          [ 3.1373e-03,  3.6420e-03,  3.1853e-03],
          [ 2.2550e-03,  2.6642e-03,  3.6071e-03]]],


        [[[-3.1996e-03, -2.8160e-03, -7.5540e-04],
          [-2.5073e-03, -2.1229e-03, -1.3988e-03],
          [-2.4116e-03, -1.2868e-03, -1.7610e-03]]],


        [[[-4.4312e-03, -3.2815e-03, -5.4608e-04],
          [-2.9897e-03, -6.1114e-04,  1.0200e-03],
          [-2.0658e-03,  3.6865e-04,  2.3849e

In [51]:
EPOCHS = 2 
for epochs in range(EPOCHS):
    print(f"EPOCH {epoch + 1}")

EPOCH 3
EPOCH 3


In [53]:
torch.save(model,"./ModelFiles/model.pt")

In [54]:
model = torch.load("./ModelFiles/model.pt")
model.to(device)

CNNClf(
  (net): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=(2, 2), stride=3, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avg_pool): AdaptiveAvgPool2d(output_size=(1, 1))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (clf): Sequential(
    (0): Linear(in_features=64, out_features=20, bias=True)
    (1): ReLU()
    (2): Linear(in_features=20, out_features=10, bias=True)
  )
)

In [56]:
import PIL.Image
from torch.utils.data import Dataset
import glob
from torchvision.models import AlexNet_Weights
model = torch.hub.load('pytorch/vision:v0.10.0',model='alexnet',weights=AlexNet_Weights.DEFAULT)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),lr=0.001)


Downloading: "https://github.com/pytorch/vision/zipball/v0.10.0" to /home/vi-b-05/.cache/torch/hub/v0.10.0.zip
Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /home/vi-b-05/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|████████████████████████████████████████| 233M/233M [00:05<00:00, 48.0MB/s]


In [57]:
print(f"Model state dict\n{model.state_dict()}")
for param_tensor in model.state_dict().keys():
    print(param_tensor,"\t",model.state_dict()[param_tensor].size())
print()

print("Optimizer state dict")
for var_name in optimizer.state_dict():
    print(var_name,'\t',optimizer.state_dict()[var_name])

EPOCHS = 2 
for epoch in range(EPOCHS):
    print("EPOCH {}".format(epoch + 1))

Model state dict
OrderedDict([('features.0.weight', tensor([[[[ 1.1864e-01,  9.4069e-02,  9.5435e-02,  ...,  5.5822e-02,
            2.1575e-02,  4.9963e-02],
          [ 7.4882e-02,  3.8940e-02,  5.2979e-02,  ...,  2.5709e-02,
           -1.1299e-02,  4.1590e-03],
          [ 7.5425e-02,  3.8779e-02,  5.4930e-02,  ...,  4.3596e-02,
            1.0225e-02,  1.3251e-02],
          ...,
          [ 9.3155e-02,  1.0374e-01,  6.7547e-02,  ..., -2.0277e-01,
           -1.2839e-01, -1.1220e-01],
          [ 4.3544e-02,  6.4916e-02,  3.6164e-02,  ..., -2.0248e-01,
           -1.1376e-01, -1.0719e-01],
          [ 4.7369e-02,  6.2543e-02,  2.4758e-02,  ..., -1.1844e-01,
           -9.5567e-02, -8.3890e-02]],

         [[-7.2634e-02, -5.7996e-02, -8.0661e-02,  ..., -6.0304e-04,
           -2.5309e-02,  2.5471e-02],
          [-6.9042e-02, -6.7562e-02, -7.6367e-02,  ..., -3.9616e-03,
           -3.0402e-02,  1.0477e-02],
          [-9.9517e-02, -8.5592e-02, -1.0521e-01,  ..., -2.6587e-02,
      