In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from datasets import load_data
from model import model_summary,S8_Model_LN
import matplotlib.pyplot as plt
import numpy as np

import sys
sys.path.append('S8/util.py')
from util import test, train, plot_acc_loss
device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu')
print(device)

  _torch_pytree._register_pytree_node(


cuda


In [2]:
train_loader ,  test_loader  = load_data()


Files already downloaded and verified
Files already downloaded and verified


In [31]:
class S8_Model_LN(nn.Module):
    def __init__(self):
        super(S8_Model_LN, self).__init__()

        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 8, kernel_size=7, padding=3, bias=False),
            nn.LayerNorm([8,32,32 ]), 
            nn.ReLU(),
            nn.Dropout(0.10),

        )

        self.conv2 = nn.Sequential(
            nn.Conv2d(8, 4, kernel_size=3, padding=1),
            nn.LayerNorm([4, 32,32]), 
            nn.ReLU(),
             nn.Dropout(0.10),

        )

        self.conv1x1_3 = nn.Sequential(
                        nn.Conv2d(4, 8, kernel_size=1),

        )

        self.pool_1 = nn.MaxPool2d(2, 2)
        
        self.conv4 = nn.Sequential(
            nn.Conv2d(8, 4, kernel_size=3, padding=1),
            nn.LayerNorm([4,16,16]),  # Adjust for post-pooling size
            nn.ReLU(),
                        nn.Dropout(0.10),

        )

        self.conv5 = nn.Sequential(
            nn.Conv2d(4, 10, kernel_size=3, padding=1),
            nn.LayerNorm([10,16,16]),
            nn.ReLU(),
                        nn.Dropout(0.10),

        )

        self.conv6 = nn.Sequential(
            nn.Conv2d(10, 8, kernel_size=3, padding=1),
            nn.LayerNorm([8, 16,16]),
            nn.ReLU(),
                        nn.Dropout(0.10),

        )

        self.conv1x1_7 = nn.Sequential(
            nn.Conv2d(8, 8, kernel_size=1),
            
        )

        self.pool_2 = nn.MaxPool2d(2, 2)
        
        self.conv8 = nn.Sequential(
            nn.Conv2d(8, 10, kernel_size=3, padding=1),
            nn.LayerNorm([10,8,8]),
                        nn.Dropout(0.10),
                        nn.ReLU(),

        )

        self.conv9 = nn.Sequential(
            nn.Conv2d(10, 16, kernel_size=3, padding=1),
            nn.LayerNorm([16,8,8]),
                        nn.Dropout(0.10),
                        nn.ReLU(),

        )

        self.conv10 = nn.Sequential(
            nn.Conv2d(16, 4, kernel_size=1, padding=1),
                        nn.Dropout(0.10),
                        nn.LayerNorm([4,10,10]),
                        nn.ReLU(),

            
        )

        self.gap = nn.AdaptiveAvgPool2d((1, 1))
        self.conv1x1_11 = nn.Sequential(
            nn.Conv2d(4, 10, kernel_size=1),
            
        )

    

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv1x1_3(x)
        x = self.pool_1(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.conv6(x)
        x = self.conv1x1_7(x)
        x = self.pool_2(x)
        x = self.conv8(x)
        x = self.conv9(x)
        x = self.conv10(x)
        x = self.gap(x)
        x = self.conv1x1_11(x)
        x = x.view(x.size(0), -1)  
        x = F.log_softmax(x, dim=1)
        return x


In [32]:
model = S8_Model_LN().to(device)
model_summary(model,input_size= (3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 32, 32]           1,176
         LayerNorm-2            [-1, 8, 32, 32]          16,384
              ReLU-3            [-1, 8, 32, 32]               0
           Dropout-4            [-1, 8, 32, 32]               0
            Conv2d-5            [-1, 4, 32, 32]             292
         LayerNorm-6            [-1, 4, 32, 32]           8,192
              ReLU-7            [-1, 4, 32, 32]               0
           Dropout-8            [-1, 4, 32, 32]               0
            Conv2d-9            [-1, 8, 32, 32]              40
        MaxPool2d-10            [-1, 8, 16, 16]               0
           Conv2d-11            [-1, 4, 16, 16]             292
        LayerNorm-12            [-1, 4, 16, 16]           2,048
             ReLU-13            [-1, 4, 16, 16]               0
          Dropout-14            [-1, 4,

In [33]:
criterion = F.nll_loss
num_epochs = 20
optimizer_sgd = optim.SGD(model.parameters(), lr=0.01,momentum=0.9)
scheduler = optim.lr_scheduler.StepLR(optimizer_sgd,gamma=0.99,step_size=5)


for epoch in range(1, num_epochs+1):
  print(f'Epoch {epoch}')
  train(model, device, train_loader, optimizer_sgd,criterion)
  test(model, device, test_loader, criterion)
  scheduler.step()



Epoch 1


Train: Loss=2.3042 Batch_id=97 Accuracy=10.19: 100%|██████████| 98/98 [00:15<00:00,  6.26it/s]


Test set: Average loss: 0.0046, Accuracy: 1063/10000 (10.63%)

Epoch 2


Train: Loss=2.2921 Batch_id=97 Accuracy=12.02: 100%|██████████| 98/98 [00:14<00:00,  6.63it/s]


Test set: Average loss: 0.0046, Accuracy: 1192/10000 (11.92%)

Epoch 3


Train: Loss=2.2729 Batch_id=40 Accuracy=13.88:  42%|████▏     | 41/98 [00:11<00:15,  3.59it/s]


KeyboardInterrupt: 

In [None]:
plot_acc_loss()

In [None]:
optimizer_adam = optim.Adam(model.parameters(), lr=1e-4,weight_decay=1e-5)
scheduler = optim.lr_scheduler.StepLR(optimizer_adam, step_size=5, gamma=0.1, verbose=True)
criterion = F.nll_loss
num_epochs = 20

for epoch in range(1, num_epochs+1):
  print(f'Epoch {epoch}')
  train(model, device, train_loader, optimizer_adam,criterion)
  test(model, device, test_loader, criterion)
  scheduler.step()




In [None]:
plot_acc_loss()

In [None]:
classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
def find_and_visualize_misclassified_images(model, device, test_loader, criterion, classes, num_images=10):
    model.eval()  # Set the model to evaluation mode
    misclassified_images = []
    misclassified_true = []
    misclassified_pred = []

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, preds = torch.max(output, 1)
            misclassified_idxs = (preds != target).nonzero(as_tuple=False).squeeze()

            for idx in misclassified_idxs:
                if len(misclassified_images) < num_images:
                    misclassified_images.append(data[idx].cpu())
                    misclassified_true.append(target[idx].cpu())
                    misclassified_pred.append(preds[idx].cpu())
                else:
                    plot_misclassified_images(misclassified_images, misclassified_true, misclassified_pred, classes)
                    return
    if misclassified_images:
        plot_misclassified_images(misclassified_images, misclassified_true, misclassified_pred, classes)
import matplotlib.pyplot as plt

def plot_misclassified_images(images, true_labels, predicted_labels, classes):
    fig, axes = plt.subplots((len(images) + 1) // 2, 2, figsize=(10, 20))
    for i, ax in enumerate(axes.flat):
        if i < len(images):
            img = images[i].numpy().transpose((1, 2, 0))
            img = (img - img.min()) / (img.max() - img.min())  # Normalize to [0,1]
            ax.imshow(img)
            ax.set_title(f"True: {classes[true_labels[i].item()]}, Pred: {classes[predicted_labels[i].item()]}")
            ax.axis('off')
        else:
            ax.axis('off')
    plt.tight_layout()
    plt.show()


# Assume 'model', 'device', 'test_loader', 'criterion', and 'classes' are already defined
find_and_visualize_misclassified_images(model, device, test_loader, criterion, classes)
