In [1]:
%matplotlib inline
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import torchvision 
from torchvision import datasets, transforms
import numpy as np
from torchsummary import summary
from tqdm import tqdm
import time

In [2]:
# classes 
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [3]:
m = [125.30691805, 122.95039414, 113.86538318]
m = tuple([x/255 for x in m])
m

(0.49139967862745093, 0.48215840839215685, 0.44653091443137255)

In [4]:
s = [62.99321928, 62.08870764, 66.70489964]
s = tuple([x/255 for x in s])
s

(0.24703223247058823, 0.24348512800000002, 0.26158784172549016)

In [5]:
!pip install torchtoolbox
from torchtoolbox.transform import Cutout



In [6]:
# Train Phase transformations
train_transforms = transforms.Compose([
                                      
                                      #  transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.RandomCrop(size=32,padding=1),
                                       Cutout(p=0.25,scale=(0.02, 0.10)),
                                       transforms.ToTensor(),
                                       transforms.Normalize(mean=m, std=s)
                                       
                                       ])

# Test Phase transformations
test_transforms = transforms.Compose([
                                      
                                       transforms.ToTensor(),
                                       transforms.Normalize(mean=m, std=s)
                                       ])


In [7]:
trainset = torchvision.datasets.CIFAR10(root='../cifar10_data', train=True,
                                        download=True, transform=train_transforms)


testset = torchvision.datasets.CIFAR10(root='../cifar10_data', train=False,
                                       download=True, transform=test_transforms)



Files already downloaded and verified
Files already downloaded and verified


In [8]:
SEED = 101

cuda = torch.cuda.is_available()
print("CUDA Available?", cuda)
device = "cuda"
if cuda:
    device = "cuda"
    torch.cuda.manual_seed(SEED)
else:
  device = "cpu"
# dataloader arguments
dataloader_args = dict(shuffle=True, batch_size=128, num_workers=2, pin_memory=True) if cuda else dict(shuffle=True, batch_size=4)


CUDA Available? True


In [9]:
trainloader = torch.utils.data.DataLoader(trainset, **dataloader_args)

testloader = torch.utils.data.DataLoader(testset, **dataloader_args)

In [10]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


In [11]:
def ResNet18():
    return ResNet(BasicBlock, [2, 2, 2, 2])


def ResNet34():
    return ResNet(BasicBlock, [3, 4, 6, 3])


def ResNet50():
    return ResNet(Bottleneck, [3, 4, 6, 3])


def ResNet101():
    return ResNet(Bottleneck, [3, 4, 23, 3])


def ResNet152():
    return ResNet(Bottleneck, [3, 8, 36, 3])

In [12]:
from copy import deepcopy

d = {
        'misclassified': [],
        'train_losses': [],
        'test_losses': [],
        'train_accuracy': [],
        'test_accuracy': []
    }

total = ['L2+BN'] # , 'GBN', 'L2+GBN'
tracker = {}
for item in total:
    tracker[item] = deepcopy(d)

# tracker['L1+BN']['test_accuracy'].append(100)
for k, v in tracker.items():
    print(k, ">>>>", v)

L2+BN >>>> {'misclassified': [], 'train_losses': [], 'test_losses': [], 'train_accuracy': [], 'test_accuracy': []}


In [13]:
model2 = ResNet18().to(device)

In [14]:
summary(model2, input_size=(3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,728
       BatchNorm2d-2           [-1, 64, 32, 32]             128
            Conv2d-3           [-1, 64, 32, 32]          36,864
       BatchNorm2d-4           [-1, 64, 32, 32]             128
            Conv2d-5           [-1, 64, 32, 32]          36,864
       BatchNorm2d-6           [-1, 64, 32, 32]             128
        BasicBlock-7           [-1, 64, 32, 32]               0
            Conv2d-8           [-1, 64, 32, 32]          36,864
       BatchNorm2d-9           [-1, 64, 32, 32]             128
           Conv2d-10           [-1, 64, 32, 32]          36,864
      BatchNorm2d-11           [-1, 64, 32, 32]             128
       BasicBlock-12           [-1, 64, 32, 32]               0
           Conv2d-13          [-1, 128, 16, 16]          73,728
      BatchNorm2d-14          [-1, 128,

In [15]:
criterion = nn.CrossEntropyLoss()

In [16]:
def train_test(model, device, train_loader, optimizer, epochs, scheduler, test, test_loader, type_, tracker, l1_lambda=None, l2_lambda=None):
    if test and not test_loader:
        raise ValueError("`test`= True but `test_loader` not provided")


    for epoch in range(epochs):
        model.train()
        l1 = torch.tensor(0, requires_grad=False)
        correct = 0
        processed = 0
        train_loss = 0

        print(f"\n\nepoch: {epoch + 1}")
        # pbar = tqdm(train_loader)

        if "l2" in type_.lower():
            optimizer.param_groups[0]['weight_decay'] = l2_lambda
            
        for i, data in enumerate(train_loader, 0):
            inputs, target = data
            inputs, target = inputs.to(device), target.to(device)

            optimizer.zero_grad()  # set the gradients top zero to avoid accumulatin them over the epochs

            output = model(inputs)  # model's output

            loss = criterion(output, target)

            # if "l1" in type_.lower():
            #     for param in model.parameters():
            #         l1 = l1 + param.abs().sum()
            #     loss = loss + l1_lambda * l1.item()

            loss.backward()
            optimizer.step()

            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
            processed += len(data)

            train_loss += loss.item()
            if i%100 == 0:
              print(f'epoch:{epoch+1}.... batch:{i+1}...loss:{train_loss:.4f}')

        train_loss /= len(train_loader.dataset)
        acc = 100 * correct / processed
        tracker[type_]['train_losses'].append(train_loss)
        tracker[type_]['train_accuracy'].append(acc)

        # pbar.set_description(desc=f'loss={loss.item()} batch_id={batch_idx}')
        if scheduler:
            print(f'\n>>>lr: {scheduler.get_last_lr()[0]}')
            scheduler.step()
        print('\nTrain set: \t\t Accuracy: {}/{} ({:.6f}%)'.format(correct, len(train_loader.dataset),
                                                                   100.0 * correct / len(train_loader.dataset)))

        if test:
            model.eval()
            test_loss = 0
            correct = 0
            with torch.no_grad():
                for data, target in test_loader:
                    data, target = data.to(device), target.to(device)
                    output = model(data)
                    test_loss +=criterion(output, target).sum().item()  # sum up batch loss
                    pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
                    correct += pred.eq(target.view_as(pred)).sum().item()
                    for i in range(len(pred)):
                        if pred[i] != target[i]:
                            tracker[type_]['misclassified'].append((data[i], pred[i], target[i]))

            test_loss /= len(test_loader.dataset)
            t_acc = 100.0 * correct / len(test_loader.dataset)
            tracker[type_]['test_losses'].append(test_loss)
            tracker[type_]['test_accuracy'].append(t_acc)

            print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.6f}%)\n'.format(
                test_loss, correct, len(test_loader.dataset), t_acc))

In [17]:
l1 = None
l2 = 8e-4
for combo in total:
  start = time.perf_counter()
  model2 = ResNet18().to(device)

  optimizer = optim.SGD(model2.parameters(), lr=0.007, momentum=0.9, nesterov=True)
  scheduler = StepLR(optimizer=optimizer, step_size=2, gamma=0.579)

  train_test(model2, device, trainloader, optimizer, epochs=20, scheduler=scheduler, test=True, test_loader=testloader,
            type_=combo, tracker=tracker, l1_lambda=l1, l2_lambda=l2)
  end = time.perf_counter()
  print(f"\n\nTime taken for:{combo} = {(end-start)/60:.3f}")



epoch: 1
epoch:1.... batch:1...loss:2.3969
epoch:1.... batch:101...loss:167.6963
epoch:1.... batch:201...loss:297.1771
epoch:1.... batch:301...loss:408.5744

>>>lr: 0.007

Train set: 		 Accuracy: 26944/50000 (53.888000%)

Test set: Average loss: 0.0080, Accuracy: 6565/10000 (65.650000%)



epoch: 2
epoch:2.... batch:1...loss:0.9065
epoch:2.... batch:101...loss:85.7947
epoch:2.... batch:201...loss:163.8732
epoch:2.... batch:301...loss:239.0713

>>>lr: 0.007

Train set: 		 Accuracy: 36339/50000 (72.678000%)

Test set: Average loss: 0.0056, Accuracy: 7444/10000 (74.440000%)



epoch: 3
epoch:3.... batch:1...loss:0.5742
epoch:3.... batch:101...loss:58.7365
epoch:3.... batch:201...loss:115.2808
epoch:3.... batch:301...loss:170.8308

>>>lr: 0.004053

Train set: 		 Accuracy: 40310/50000 (80.620000%)

Test set: Average loss: 0.0046, Accuracy: 8012/10000 (80.120000%)



epoch: 4
epoch:4.... batch:1...loss:0.3196
epoch:4.... batch:101...loss:48.9960
epoch:4.... batch:201...loss:98.0295
epoch:4