<a href="https://colab.research.google.com/github/11kartheek/ERA-v2/blob/main/KartheekB_s10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libraries

In [111]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy

## Data Transformations

We first start with defining our data transformations. We need to think what our data is and how can we augment it to correct represent images which it might not see otherwise.


In [112]:
class CutoutAfterToTensor(object):
    def __init__(self, n_holes, length, fill_color=torch.tensor([0,0,0])):
        self.n_holes = n_holes
        self.length = length
        self.fill_color = fill_color

    def __call__(self, img):
        h = img.shape[1]
        w = img.shape[2]
        mask = numpy.ones((h, w), numpy.float32)
        for n in range(self.n_holes):
            y = numpy.random.randint(h)
            x = numpy.random.randint(w)
            y1 = numpy.clip(y - self.length // 2, 0, h)
            y2 = numpy.clip(y + self.length // 2, 0, h)
            x1 = numpy.clip(x - self.length // 2, 0, w)
            x2 = numpy.clip(x + self.length // 2, 0, w)
            mask[y1: y2, x1: x2] = 0.
        mask = torch.from_numpy(mask)
        mask = mask.expand_as(img)
        img = img * mask + (1 - mask) * self.fill_color[:, None, None]
        return img

In [113]:
# Train Phase transformations
train_transforms = transforms.Compose([
                                       transforms.RandomCrop(32, padding=4),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), # The mean and std have to be sequences (e.g., tuples), therefore you should add a comma after the values.
                                       CutoutAfterToTensor(1,8)
                                       ])

# Test Phase transformations
test_transforms = transforms.Compose([
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) # The mean and std have to be sequences (e.g., tuples), therefore you should add a comma after the values.

                                       ])


# Dataset and Creating Train/Test Split

In [114]:
train = datasets.CIFAR10('./data', train=True, download=True, transform=train_transforms)
test = datasets.CIFAR10('./data', train=False, download=True, transform=test_transforms)

Files already downloaded and verified
Files already downloaded and verified


# Dataloader Arguments & Test/Train Dataloaders


In [115]:
SEED = 1

# CUDA?
cuda = torch.cuda.is_available()
print("CUDA Available?", cuda)

# For reproducibility
torch.manual_seed(SEED)

if cuda:
    torch.cuda.manual_seed(SEED)

# dataloader arguments - something you'll fetch these from cmdprmt
dataloader_args = dict(shuffle=True, batch_size=512, num_workers=4, pin_memory=False) if cuda else dict(shuffle=True, batch_size=64)

# train dataloader
train_loader = torch.utils.data.DataLoader(train, **dataloader_args)

# test dataloader
test_loader = torch.utils.data.DataLoader(test, **dataloader_args)

CUDA Available? True




# The model
Let's start with the model we first saw

In [116]:
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 1. prep layer
        self.prep = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
        ) # output_size = 32

        # 2. layer 1
        # layer 1 x
        self.X1 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3), padding=1, bias=False),
            nn.MaxPool2d(2, 2),
            nn.BatchNorm2d(128),
            nn.ReLU(),
        ) # output_size = 16

        # layer1 res
        self.R1 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(),
        ) # output_size = 16

        # 3. layer 2
        self.L2 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(3, 3),  padding=1, bias=False),
            nn.MaxPool2d(2, 2),
            nn.BatchNorm2d(256),
            nn.ReLU(),
        ) # output_size = 8

       # 4. layer 3
        # layer 3 x
        self.X2 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(3, 3), padding=1, bias=False),
            nn.MaxPool2d(2, 2),
            nn.BatchNorm2d(512),
            nn.ReLU(),
        ) # output_size = 8

        # layer1 res
        self.R2 = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(512),
            nn.ReLU(),
        ) # output_size = 8



        # 5 max pool
        self.pool1 = nn.MaxPool2d(4, 4)

        # 6 fc
        self.fc1 = nn.Linear(512, 10)


    def forward(self, x):
        # 1 prep
        x = self.prep(x)
        # 2
        x = self.X1(x)
        y = self.R1(x)
        x = x + y
        # 3
        x = self.L2(x)
        # 4
        x = self.X2(x)
        y = self.R2(x)
        x = x + y
        # 5 maxpool
        x = self.pool1(x)
        # 6 fc
        x = torch.flatten(x,1)
        x = self.fc1(x)
        # 7 softmax
        return F.log_softmax(x,1)

# Model Params
Can't emphasize on how important viewing Model Summary is.
Unfortunately, there is no in-built model visualizer, so we have to take external help

In [117]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)
model = Net().to(device)
summary(model, input_size=(3, 32, 32))

# Training and Testing

Looking at logs can be boring, so we'll introduce **tqdm** progressbar to get cooler logs.

Let's write train and test functions

In [118]:
from tqdm import tqdm

train_losses = []
test_losses = []
train_acc = []
test_acc = []

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct = 0
  processed = 0
  for batch_idx, (data, target) in enumerate(pbar):
    # get samples
    data, target = data.to(device), target.to(device)

    # Init
    optimizer.zero_grad()
    # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes.
    # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

    # Predict
    y_pred = model(data)

    # Calculate loss
    loss = F.cross_entropy(y_pred, target)
    train_losses.append(loss)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Update pbar-tqdm

    pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

    test_acc.append(100. * correct / len(test_loader.dataset))

In [119]:
from torch.optim.lr_scheduler import StepLR
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model =  Net().to(device)
optimizer = optim.Adam(model.parameters())
scheduler = StepLR(optimizer, step_size=6, gamma=0.1)


EPOCHS = 24
for epoch in range(EPOCHS):
    print("EPOCH:", epoch)
    train(model, device, train_loader, optimizer, epoch)
    # scheduler.step()
    test(model, device, test_loader)

EPOCH: 0


Loss=1.1221354007720947 Batch_id=97 Accuracy=46.88: 100%|██████████| 98/98 [00:32<00:00,  2.98it/s]



Test set: Average loss: 1.0831, Accuracy: 6165/10000 (61.65%)

EPOCH: 1


Loss=0.7839524149894714 Batch_id=97 Accuracy=68.29: 100%|██████████| 98/98 [00:34<00:00,  2.87it/s]



Test set: Average loss: 0.7750, Accuracy: 7262/10000 (72.62%)

EPOCH: 2


Loss=0.5789008140563965 Batch_id=97 Accuracy=76.46: 100%|██████████| 98/98 [00:32<00:00,  3.02it/s]



Test set: Average loss: 0.6266, Accuracy: 7829/10000 (78.29%)

EPOCH: 3


Loss=0.5942178964614868 Batch_id=97 Accuracy=80.52: 100%|██████████| 98/98 [00:33<00:00,  2.95it/s]



Test set: Average loss: 0.5974, Accuracy: 8007/10000 (80.07%)

EPOCH: 4


Loss=0.5049737095832825 Batch_id=97 Accuracy=83.43: 100%|██████████| 98/98 [00:32<00:00,  3.00it/s]



Test set: Average loss: 0.5151, Accuracy: 8279/10000 (82.79%)

EPOCH: 5


Loss=0.48201823234558105 Batch_id=97 Accuracy=85.72: 100%|██████████| 98/98 [00:32<00:00,  3.02it/s]



Test set: Average loss: 0.5004, Accuracy: 8366/10000 (83.66%)

EPOCH: 6


Loss=0.2931826114654541 Batch_id=97 Accuracy=86.89: 100%|██████████| 98/98 [00:32<00:00,  3.06it/s]



Test set: Average loss: 0.4662, Accuracy: 8463/10000 (84.63%)

EPOCH: 7


Loss=0.29207298159599304 Batch_id=97 Accuracy=87.95: 100%|██████████| 98/98 [00:30<00:00,  3.18it/s]



Test set: Average loss: 0.4265, Accuracy: 8596/10000 (85.96%)

EPOCH: 8


Loss=0.3337302505970001 Batch_id=97 Accuracy=89.15: 100%|██████████| 98/98 [00:30<00:00,  3.19it/s]



Test set: Average loss: 0.4339, Accuracy: 8510/10000 (85.10%)

EPOCH: 9


Loss=0.305001825094223 Batch_id=97 Accuracy=90.24: 100%|██████████| 98/98 [00:32<00:00,  3.03it/s]



Test set: Average loss: 0.4576, Accuracy: 8431/10000 (84.31%)

EPOCH: 10


Loss=0.264148086309433 Batch_id=97 Accuracy=91.20: 100%|██████████| 98/98 [00:31<00:00,  3.14it/s]



Test set: Average loss: 0.3809, Accuracy: 8770/10000 (87.70%)

EPOCH: 11


Loss=0.30879586935043335 Batch_id=97 Accuracy=91.67: 100%|██████████| 98/98 [00:31<00:00,  3.14it/s]



Test set: Average loss: 0.3897, Accuracy: 8732/10000 (87.32%)

EPOCH: 12


Loss=0.22562000155448914 Batch_id=97 Accuracy=92.32: 100%|██████████| 98/98 [00:31<00:00,  3.07it/s]



Test set: Average loss: 0.3621, Accuracy: 8828/10000 (88.28%)

EPOCH: 13


Loss=0.19226408004760742 Batch_id=97 Accuracy=93.02: 100%|██████████| 98/98 [00:30<00:00,  3.19it/s]



Test set: Average loss: 0.3353, Accuracy: 8948/10000 (89.48%)

EPOCH: 14


Loss=0.22809289395809174 Batch_id=97 Accuracy=93.32: 100%|██████████| 98/98 [00:30<00:00,  3.19it/s]



Test set: Average loss: 0.3249, Accuracy: 8973/10000 (89.73%)

EPOCH: 15


Loss=0.17371554672718048 Batch_id=97 Accuracy=93.86: 100%|██████████| 98/98 [00:33<00:00,  2.95it/s]



Test set: Average loss: 0.3248, Accuracy: 8966/10000 (89.66%)

EPOCH: 16


Loss=0.16807758808135986 Batch_id=97 Accuracy=94.20: 100%|██████████| 98/98 [00:30<00:00,  3.19it/s]



Test set: Average loss: 0.3302, Accuracy: 9007/10000 (90.07%)

EPOCH: 17


Loss=0.130621999502182 Batch_id=97 Accuracy=94.75: 100%|██████████| 98/98 [00:31<00:00,  3.15it/s]



Test set: Average loss: 0.3539, Accuracy: 8927/10000 (89.27%)

EPOCH: 18


Loss=0.13826535642147064 Batch_id=97 Accuracy=94.84: 100%|██████████| 98/98 [00:32<00:00,  3.05it/s]



Test set: Average loss: 0.4079, Accuracy: 8780/10000 (87.80%)

EPOCH: 19


Loss=0.14525873959064484 Batch_id=97 Accuracy=95.20: 100%|██████████| 98/98 [00:30<00:00,  3.17it/s]



Test set: Average loss: 0.3327, Accuracy: 9006/10000 (90.06%)

EPOCH: 20


Loss=0.17459861934185028 Batch_id=97 Accuracy=95.33: 100%|██████████| 98/98 [00:31<00:00,  3.14it/s]



Test set: Average loss: 0.3565, Accuracy: 8952/10000 (89.52%)

EPOCH: 21


Loss=0.10983748733997345 Batch_id=97 Accuracy=95.81: 100%|██████████| 98/98 [00:32<00:00,  3.03it/s]



Test set: Average loss: 0.3217, Accuracy: 9013/10000 (90.13%)

EPOCH: 22


Loss=0.09951087087392807 Batch_id=97 Accuracy=95.88: 100%|██████████| 98/98 [00:31<00:00,  3.12it/s]



Test set: Average loss: 0.3343, Accuracy: 9036/10000 (90.36%)

EPOCH: 23


Loss=0.10788752138614655 Batch_id=97 Accuracy=96.25: 100%|██████████| 98/98 [00:31<00:00,  3.15it/s]



Test set: Average loss: 0.3967, Accuracy: 8882/10000 (88.82%)



In [None]:
!pip install torch-lr-finder

Collecting torch-lr-finder
  Downloading torch_lr_finder-0.2.1-py3-none-any.whl (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=0.4.1->torch-lr-finder)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m43.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=0.4.1->torch-lr-finder)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m58.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=0.4.1->torch-lr-finder)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31m42.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cudn

In [None]:
from torch_lr_finder import LRFinder
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-7, weight_decay=1e-2)
lr_finder = LRFinder(model, optimizer, criterion, device="cuda")
lr_finder.range_test(train_loader, end_lr=100, num_iter=100)
lr_finder.plot() # to inspect the loss-learning rate graph
lr_finder.reset()