In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
import os
from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2
import multiprocessing

  check_for_updates()


**Target:**

1. Works on CIFAR10 Dataset
2. Has the architecture to C1C2C3C40 (No MaxPooling, but convolutions, where the last one has a stride of 2 instead) (NO restriction on using 1x1)
3. Total RF must be more than 44
4. One of the layers must use Depthwise Separable Convolution
5. One of the layers must use Dilated Convolution
use GAP (compulsory):- add FC after GAP to target #of classes (optional)
6. uses albumentation library and apply:
    a. horizontal flip
    b. shiftScaleRotate
    c. coarseDropout (max_holes = 1, max_height=16px,   max_width=16, min_holes = 1, min_height=16px, min_width=16px, fill_value=(mean of your dataset), mask_fill_value = None)
7. achieve 85% accuracy, as many epochs as you want. 
8. Total Params to be less than 200k.

In [2]:
class CIFAR10Dataset(datasets.CIFAR10):
    def __init__(self, root, train=True, transform=None):
        super(CIFAR10Dataset, self).__init__(
            root=root, train=train, download=True, transform=None
        )
        self.albumentations_transform = transform

    def __getitem__(self, index):
        image, label = super(CIFAR10Dataset, self).__getitem__(index)
        image = np.array(image)  # Convert PIL Image to numpy array

        if self.albumentations_transform:
            transformed = self.albumentations_transform(image=image)
            image = transformed["image"]

        return image, label

In [3]:
def load_cifar10_with_albumentations(device, use_cuda, config_dict, albumentations_transform):
    torch.manual_seed(config_dict["seed"])
    if device == "cuda":
        torch.cuda.manual_seed(config_dict["seed"])
    batch_size = config_dict["batch_size"]
    kwargs = (
        {"num_workers": multiprocessing.cpu_count(), "pin_memory": True}
        if use_cuda
        else {}
    )

    train_dataset = CIFAR10Dataset(root="./data", train=True, transform=albumentations_transform)
    test_dataset = CIFAR10Dataset(root="./data", train=False, transform=albumentations_transform)

    train_dataset, test_dataset

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, **kwargs)

    return train_loader, test_loader   

In [4]:
# For reproducibility

SEED = 1

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)

# train & test dataloader

config_dict = {"seed": SEED, "batch_size": 64}
albumentations_transform = A.Compose(
        [
            A.HorizontalFlip(p=0.5),
            A.ShiftScaleRotate(p=0.05),
            A.CoarseDropout(p=0.1, fill=(0.49139968, 0.48215827, 0.44653124)),
            A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
            ToTensorV2(),
        ]
    )

train_loader, test_loader = load_cifar10_with_albumentations(
        device=device,
        use_cuda=use_cuda,
        config_dict=config_dict,
        albumentations_transform=albumentations_transform#,
    )

  A.CoarseDropout(p=0.1, fill=(0.49139968, 0.48215827, 0.44653124)),


cuda
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:01<00:00, 96134763.45it/s] 


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [5]:
train_losses = []
test_losses = []
train_acc = []
test_acc = []

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  
  correct = 0
  processed = 0
  pbar = tqdm(train_loader)
  for batch_idx, (data, target) in enumerate(pbar):
    # get samples
    data, target = data.to(device), target.to(device)

    # Init
    optimizer.zero_grad()
    # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes. 
    # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

    # Predict
    y_pred = model(data)

    # Calculate loss
    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Update pbar-tqdm
    
    pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
    test_acc.append(100. * correct / len(test_loader.dataset))

In [6]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # C1 Block - Regular Convolution
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 48, kernel_size=5, padding=2, bias=False),  # Increased kernel size
            nn.BatchNorm2d(48),
            nn.ReLU(),
        )
        
        # C2 Block - Depthwise Separable Convolution
        self.conv2 = nn.Sequential(
            nn.Conv2d(48, 48, kernel_size=3, padding=1, groups=24, bias=False),
            nn.Conv2d(48, 64, kernel_size=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1, bias=False),  # Added layer
            nn.BatchNorm2d(64),
            nn.ReLU(),
        )
        
        # C3 Block - Dilated Convolution
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, padding=4, dilation=4, bias=False),  # Increased dilation
            nn.BatchNorm2d(64),
            nn.ReLU(),
        )
        
        # C4 Block - Strided Convolution
        self.conv4 = nn.Sequential(
            nn.Conv2d(64, 96, kernel_size=3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(96),
            nn.ReLU(),
        )
        
        self.gap = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(96, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.gap(x)
        x = x.view(-1, 96)
        x = self.fc(x)
        return F.log_softmax(x, dim=1)

In [7]:
!pip install torchsummary
from torchsummary import summary
model = Net().to(device)
summary(model, input_size=(3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 32, 32]             864
       BatchNorm2d-2           [-1, 32, 32, 32]              64
              ReLU-3           [-1, 32, 32, 32]               0
            Conv2d-4           [-1, 48, 32, 32]          38,400
       BatchNorm2d-5           [-1, 48, 32, 32]              96
              ReLU-6           [-1, 48, 32, 32]               0
            Conv2d-7           [-1, 48, 32, 32]             864
            Conv2d-8           [-1, 64, 32, 32]           3,072
       BatchNorm2d-9           [-1, 64, 32, 32]             128
             ReLU-10           [-1, 64, 32, 32]               0
           Conv2d-11           [-1, 64, 32, 32]          36,864
      BatchNorm2d-12           [-1, 64, 32, 32]             128
             ReLU-13           [-1, 64, 32, 32]               0
           Conv2d-14           [-1, 64,

In [None]:
from torch.optim.lr_scheduler import StepLR
import math

# model =  Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.015, momentum=0.9,nesterov=True,
                         weight_decay=1e-4)

def warmup_cosine_schedule(epoch):
        warmup_epochs = 3  # Reduced warmup period
        if epoch < warmup_epochs:
            return (epoch + 1) / warmup_epochs
        return 0.5 * (1 + math.cos(math.pi * (epoch - warmup_epochs) / (30 - warmup_epochs)))
    
# scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=warmup_cosine_schedule)


EPOCHS = 35
for epoch in range(EPOCHS):
    print("EPOCH:", epoch)
    train(model, device, train_loader, optimizer, epoch)
    scheduler.step()
    test(model, device, test_loader)

EPOCH: 0


Loss=1.284513235092163 Batch_id=781 Accuracy=44.60: 100%|██████████| 782/782 [00:32<00:00, 23.75it/s] 



Test set: Average loss: 1.4615, Accuracy: 4684/10000 (46.84%)

EPOCH: 1


Loss=1.0822097063064575 Batch_id=781 Accuracy=59.50: 100%|██████████| 782/782 [00:32<00:00, 24.19it/s]



Test set: Average loss: 1.1063, Accuracy: 6046/10000 (60.46%)

EPOCH: 2


Loss=0.7968603372573853 Batch_id=781 Accuracy=66.30: 100%|██████████| 782/782 [00:32<00:00, 24.25it/s]



Test set: Average loss: 0.9710, Accuracy: 6619/10000 (66.19%)

EPOCH: 3


Loss=0.6355998516082764 Batch_id=781 Accuracy=71.27: 100%|██████████| 782/782 [00:30<00:00, 25.24it/s]



Test set: Average loss: 0.8058, Accuracy: 7130/10000 (71.30%)

EPOCH: 4


Loss=0.20071467757225037 Batch_id=781 Accuracy=74.64: 100%|██████████| 782/782 [00:31<00:00, 24.48it/s]



Test set: Average loss: 0.8880, Accuracy: 6897/10000 (68.97%)

EPOCH: 5


Loss=0.5494881272315979 Batch_id=781 Accuracy=76.77: 100%|██████████| 782/782 [00:30<00:00, 25.94it/s] 



Test set: Average loss: 0.6941, Accuracy: 7552/10000 (75.52%)

EPOCH: 6


Loss=0.8475427627563477 Batch_id=781 Accuracy=78.67: 100%|██████████| 782/782 [00:31<00:00, 24.61it/s] 



Test set: Average loss: 0.6794, Accuracy: 7661/10000 (76.61%)

EPOCH: 7


Loss=0.3551082909107208 Batch_id=781 Accuracy=80.31: 100%|██████████| 782/782 [00:30<00:00, 25.68it/s] 



Test set: Average loss: 0.6345, Accuracy: 7818/10000 (78.18%)

EPOCH: 8


Loss=0.3669706881046295 Batch_id=781 Accuracy=81.26: 100%|██████████| 782/782 [00:31<00:00, 24.77it/s] 



Test set: Average loss: 0.6320, Accuracy: 7805/10000 (78.05%)

EPOCH: 9


Loss=0.8171730041503906 Batch_id=781 Accuracy=82.36: 100%|██████████| 782/782 [00:31<00:00, 25.01it/s] 



Test set: Average loss: 0.6290, Accuracy: 7823/10000 (78.23%)

EPOCH: 10


Loss=0.8088398575782776 Batch_id=781 Accuracy=83.26: 100%|██████████| 782/782 [00:31<00:00, 25.07it/s] 



Test set: Average loss: 0.6948, Accuracy: 7628/10000 (76.28%)

EPOCH: 11


Loss=0.1935376524925232 Batch_id=781 Accuracy=84.09: 100%|██████████| 782/782 [00:30<00:00, 25.37it/s] 



Test set: Average loss: 0.6405, Accuracy: 7837/10000 (78.37%)

EPOCH: 12


Loss=0.3873443603515625 Batch_id=781 Accuracy=84.56: 100%|██████████| 782/782 [00:32<00:00, 24.34it/s] 



Test set: Average loss: 0.5226, Accuracy: 8191/10000 (81.91%)

EPOCH: 13


Loss=0.3189619183540344 Batch_id=781 Accuracy=85.63: 100%|██████████| 782/782 [00:32<00:00, 24.35it/s] 



Test set: Average loss: 0.5432, Accuracy: 8132/10000 (81.32%)

EPOCH: 14


Loss=0.3648768961429596 Batch_id=781 Accuracy=86.18: 100%|██████████| 782/782 [00:31<00:00, 24.81it/s] 



Test set: Average loss: 0.5477, Accuracy: 8153/10000 (81.53%)

EPOCH: 15


Loss=0.4654165506362915 Batch_id=781 Accuracy=86.80: 100%|██████████| 782/782 [00:32<00:00, 24.23it/s] 



Test set: Average loss: 0.4959, Accuracy: 8293/10000 (82.93%)

EPOCH: 16


Loss=0.3261142373085022 Batch_id=781 Accuracy=87.27: 100%|██████████| 782/782 [00:30<00:00, 25.64it/s] 



Test set: Average loss: 0.5052, Accuracy: 8293/10000 (82.93%)

EPOCH: 17


Loss=0.3853275179862976 Batch_id=781 Accuracy=88.03: 100%|██████████| 782/782 [00:32<00:00, 24.07it/s] 



Test set: Average loss: 0.5013, Accuracy: 8287/10000 (82.87%)

EPOCH: 18


Loss=0.883782148361206 Batch_id=781 Accuracy=88.78: 100%|██████████| 782/782 [00:32<00:00, 24.24it/s]  



Test set: Average loss: 0.4781, Accuracy: 8385/10000 (83.85%)

EPOCH: 19


Loss=0.39649349451065063 Batch_id=781 Accuracy=89.29: 100%|██████████| 782/782 [00:30<00:00, 25.85it/s]



Test set: Average loss: 0.4884, Accuracy: 8307/10000 (83.07%)

EPOCH: 20


Loss=0.46497803926467896 Batch_id=781 Accuracy=89.70: 100%|██████████| 782/782 [00:31<00:00, 25.08it/s]



Test set: Average loss: 0.4371, Accuracy: 8495/10000 (84.95%)

EPOCH: 21


Loss=0.29612860083580017 Batch_id=781 Accuracy=90.18: 100%|██████████| 782/782 [00:31<00:00, 24.50it/s]



Test set: Average loss: 0.4492, Accuracy: 8434/10000 (84.34%)

EPOCH: 22


Loss=0.7159926295280457 Batch_id=781 Accuracy=90.84: 100%|██████████| 782/782 [00:31<00:00, 24.59it/s] 



Test set: Average loss: 0.4494, Accuracy: 8487/10000 (84.87%)

EPOCH: 23


Loss=0.23242917656898499 Batch_id=781 Accuracy=91.10: 100%|██████████| 782/782 [00:32<00:00, 24.19it/s]



Test set: Average loss: 0.4281, Accuracy: 8521/10000 (85.21%)

EPOCH: 24


Loss=0.1462191492319107 Batch_id=781 Accuracy=91.61: 100%|██████████| 782/782 [00:31<00:00, 25.14it/s] 



Test set: Average loss: 0.4240, Accuracy: 8563/10000 (85.63%)

EPOCH: 25


Loss=0.4644079804420471 Batch_id=781 Accuracy=92.16: 100%|██████████| 782/782 [00:30<00:00, 25.42it/s] 



Test set: Average loss: 0.4173, Accuracy: 8592/10000 (85.92%)

EPOCH: 26


Loss=0.4637536108493805 Batch_id=781 Accuracy=92.28: 100%|██████████| 782/782 [00:31<00:00, 24.48it/s] 



Test set: Average loss: 0.4191, Accuracy: 8562/10000 (85.62%)

EPOCH: 27


Loss=0.09505604952573776 Batch_id=781 Accuracy=92.80: 100%|██████████| 782/782 [00:32<00:00, 24.27it/s]



Test set: Average loss: 0.4176, Accuracy: 8594/10000 (85.94%)

EPOCH: 28


Loss=0.2846538722515106 Batch_id=781 Accuracy=92.78: 100%|██████████| 782/782 [00:31<00:00, 24.86it/s]  



Test set: Average loss: 0.4126, Accuracy: 8603/10000 (86.03%)

EPOCH: 29


Loss=0.14075541496276855 Batch_id=781 Accuracy=92.95: 100%|██████████| 782/782 [00:31<00:00, 24.65it/s] 



Test set: Average loss: 0.4172, Accuracy: 8596/10000 (85.96%)

EPOCH: 30


Loss=0.28987351059913635 Batch_id=781 Accuracy=92.90: 100%|██████████| 782/782 [00:31<00:00, 24.62it/s]



Test set: Average loss: 0.4250, Accuracy: 8595/10000 (85.95%)

EPOCH: 31


Loss=0.6214544773101807 Batch_id=781 Accuracy=92.90: 100%|██████████| 782/782 [00:31<00:00, 25.07it/s] 



Test set: Average loss: 0.4237, Accuracy: 8573/10000 (85.73%)

EPOCH: 32


Loss=0.21403680741786957 Batch_id=658 Accuracy=93.09:  84%|████████▍ | 658/782 [00:26<00:05, 23.73it/s]