In [1]:
!pwd

/home/studio-lab-user/Mini-Project-DL


In [2]:
!ls

CIFAResnet.ipynb  checkpoint		  data	      submission.ipynb
README.md	  cifar10_resnet18.ipynb  resnet.png


In [3]:
!python --version

Python 3.9.13


In [4]:
!pip install torch



In [5]:
!pip install torchvision



In [6]:
!pip install torch-summary



In [7]:
!pip install tqdm



In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torch.utils.data as data
import torch.nn.init as init

import torchvision
import torchvision.transforms as transforms

import argparse
import os
import sys
import time
import math
from tqdm import tqdm

In [9]:
def format_time(seconds):
    days = int(seconds / 3600/24)
    seconds = seconds - days*3600*24
    hours = int(seconds / 3600)
    seconds = seconds - hours*3600
    minutes = int(seconds / 60)
    seconds = seconds - minutes*60
    secondsf = int(seconds)
    seconds = seconds - secondsf
    millis = int(seconds*1000)

    f = ''
    i = 1
    if days > 0:
        f += str(days) + 'D'
        i += 1
    if hours > 0 and i <= 2:
        f += str(hours) + 'h'
        i += 1
    if minutes > 0 and i <= 2:
        f += str(minutes) + 'm'
        i += 1
    if secondsf > 0 and i <= 2:
        f += str(secondsf) + 's'
        i += 1
    if millis > 0 and i <= 2:
        f += str(millis) + 'ms'
        i += 1
    if f == '':
        f = '0ms'
    return f

In [10]:
TOTAL_BAR_LENGTH = 65.
last_time = time.time()
begin_time = last_time
def progress_bar(current, total, msg=None):
    global last_time, begin_time
    if current == 0:
        begin_time = time.time()  # Reset for new bar.

    cur_len = int(TOTAL_BAR_LENGTH*current/total)
    rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1

    sys.stdout.write(' [')
    for i in range(cur_len):
        sys.stdout.write('=')
    sys.stdout.write('>')
    for i in range(rest_len):
        sys.stdout.write('.')
    sys.stdout.write(']')

    cur_time = time.time()
    step_time = cur_time - last_time
    last_time = cur_time
    tot_time = cur_time - begin_time

    L = []
    L.append('  Step: %s' % format_time(step_time))
    L.append(' | Tot: %s' % format_time(tot_time))
    if msg:
        L.append(' | ' + msg)

    msg = ''.join(L)
    sys.stdout.write(msg)
    for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3):
        sys.stdout.write(' ')

    # Go back to the center of the bar.
    for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2):
        sys.stdout.write('\b')
    sys.stdout.write(' %d/%d ' % (current+1, total))

    if current < total-1:
        sys.stdout.write('\r')
    else:
        sys.stdout.write('\n')
    sys.stdout.flush()

In [11]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [12]:
start_epoch = 0
best_acc = 0

In [13]:
# Data transforms for train and test sets
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])


In [14]:
print('==> Preparing data..')
trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)

==> Preparing data..
Files already downloaded and verified
Files already downloaded and verified


In [15]:
trainset

Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: Compose(
               RandomCrop(size=(32, 32), padding=4)
               RandomHorizontalFlip(p=0.5)
               ToTensor()
               Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.201))
           )

In [16]:
testset

Dataset CIFAR10
    Number of datapoints: 10000
    Root location: ./data
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.201))
           )

In [17]:
# Splitting valid set from training data
VALID_RATIO = 0.9

n_train_examples = int(len(trainset) * VALID_RATIO)
n_valid_examples = len(trainset) - n_train_examples

train_data, valid_data = data.random_split(trainset, 
                                           [n_train_examples, n_valid_examples])

In [18]:
train_data

<torch.utils.data.dataset.Subset at 0x7fcbe745bf70>

In [19]:
valid_data

<torch.utils.data.dataset.Subset at 0x7fcbe745bc70>

In [20]:
trainset=train_data
valset = valid_data

In [21]:
#Data Loaders

trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=128, shuffle=True, num_workers=2)

valloader = torch.utils.data.DataLoader(
    valset, batch_size=128, shuffle=True, num_workers=2)

testloader = torch.utils.data.DataLoader(
    testset, batch_size=100, shuffle=False, num_workers=2)

In [22]:
classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

In [23]:
# Model defination
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        """
        in_planes: C_in for conv1 and Sequential.Conv2d
        planes: F_i
        """
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks,  num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 32
        """
        in_planes: C_out of layer1
        """
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.layer1 = self._make_layer(block, 32, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 64, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 128, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 256, num_blocks[3], stride=2)
        self.linear = nn.Linear(256*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
            """
            in_planes becomes the output of the current layer
            """
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4) # to be understood
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNet18():
    return ResNet(BasicBlock, [2, 2, 2, 2])

In [24]:
print('==> Building model..')
net = ResNet18()
net = net.to(device)

==> Building model..


In [25]:
from torchsummary import summary
summary(net, input_size=(3, 32, 32))

Layer (type:depth-idx)                   Param #
├─Conv2d: 1-1                            864
├─BatchNorm2d: 1-2                       64
├─Sequential: 1-3                        --
|    └─BasicBlock: 2-1                   --
|    |    └─Conv2d: 3-1                  9,216
|    |    └─BatchNorm2d: 3-2             64
|    |    └─Conv2d: 3-3                  9,216
|    |    └─BatchNorm2d: 3-4             64
|    |    └─Sequential: 3-5              --
|    └─BasicBlock: 2-2                   --
|    |    └─Conv2d: 3-6                  9,216
|    |    └─BatchNorm2d: 3-7             64
|    |    └─Conv2d: 3-8                  9,216
|    |    └─BatchNorm2d: 3-9             64
|    |    └─Sequential: 3-10             --
├─Sequential: 1-4                        --
|    └─BasicBlock: 2-3                   --
|    |    └─Conv2d: 3-11                 18,432
|    |    └─BatchNorm2d: 3-12            128
|    |    └─Conv2d: 3-13                 36,864
|    |    └─BatchNorm2d: 3-14            128
|   

Layer (type:depth-idx)                   Param #
├─Conv2d: 1-1                            864
├─BatchNorm2d: 1-2                       64
├─Sequential: 1-3                        --
|    └─BasicBlock: 2-1                   --
|    |    └─Conv2d: 3-1                  9,216
|    |    └─BatchNorm2d: 3-2             64
|    |    └─Conv2d: 3-3                  9,216
|    |    └─BatchNorm2d: 3-4             64
|    |    └─Sequential: 3-5              --
|    └─BasicBlock: 2-2                   --
|    |    └─Conv2d: 3-6                  9,216
|    |    └─BatchNorm2d: 3-7             64
|    |    └─Conv2d: 3-8                  9,216
|    |    └─BatchNorm2d: 3-9             64
|    |    └─Sequential: 3-10             --
├─Sequential: 1-4                        --
|    └─BasicBlock: 2-3                   --
|    |    └─Conv2d: 3-11                 18,432
|    |    └─BatchNorm2d: 3-12            128
|    |    └─Conv2d: 3-13                 36,864
|    |    └─BatchNorm2d: 3-14            128
|   

In [26]:
device

'cuda'

In [27]:
if device == 'cuda':
    net = torch.nn.DataParallel(net)
    cudnn.benchmark = True

In [28]:
resume = False
if resume == True:
    # Load checkpoint.
    print('==> Resuming from checkpoint..')
    assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
    checkpoint = torch.load('./checkpoint/ckpt.pth')
    net.load_state_dict(checkpoint['net'])
    best_acc = checkpoint['acc']
    start_epoch = checkpoint['epoch']

In [29]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0001,
                      #momentum=0.9, 
                       weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

In [30]:
len(trainloader)

352

In [31]:
# Training
def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    steps = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        steps = steps+1
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

        #progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
        #             % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
        if steps%87==0:
            print(f"Epoch:{epoch} --Progress(%):{batch_idx/len(trainloader)*100.:.2f}-- Loss:{train_loss/(batch_idx+1):.2f} -- Acc:{100.*correct/total:.2f}")



In [32]:
# Validation function
def val(epoch):
    global best_acc
    net.eval()
    val_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(valloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            val_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

            #progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
            #             % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
    print(f"Epoch:{epoch} --Val Loss:{val_loss/(batch_idx+1):.2f} -- Acc:{100.*correct/total:.2f}")
    
    # Save checkpoint.
    acc = 100.*correct/total
    if acc > best_acc:
        print('Saving the best model.')
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        
        torch.save(net.state_dict(), './checkpoint/ckpt.pth')
        best_acc = acc

In [33]:
# Testing Function
def test(epoch):
    print("***** Begin testing *****")
    net.load_state_dict(torch.load('./checkpoint/ckpt.pth'))
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

            #progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
            #             % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
    print(f"Epoch:{epoch} --Test Loss:{test_loss/(batch_idx+1):.2f} -- Acc:{100.*correct/total:.2f}")

In [34]:
# Training and testing the model
for epoch in range(start_epoch, start_epoch+80):
    train(epoch)
    scheduler.step()
    val(epoch)
    
test(epoch)


Epoch: 0
Epoch:0 --Progress(%):24.43-- Loss:1.88 -- Acc:29.99
Epoch:0 --Progress(%):49.15-- Loss:1.73 -- Acc:35.87
Epoch:0 --Progress(%):73.86-- Loss:1.63 -- Acc:39.48
Epoch:0 --Progress(%):98.58-- Loss:1.56 -- Acc:42.20
Epoch:0 --Val Loss:1.31 -- Acc:52.28
Saving the best model.

Epoch: 1
Epoch:1 --Progress(%):24.43-- Loss:1.27 -- Acc:54.67
Epoch:1 --Progress(%):49.15-- Loss:1.24 -- Acc:55.59
Epoch:1 --Progress(%):73.86-- Loss:1.21 -- Acc:56.57
Epoch:1 --Progress(%):98.58-- Loss:1.19 -- Acc:57.27
Epoch:1 --Val Loss:1.15 -- Acc:59.66
Saving the best model.

Epoch: 2
Epoch:2 --Progress(%):24.43-- Loss:1.05 -- Acc:62.11
Epoch:2 --Progress(%):49.15-- Loss:1.04 -- Acc:62.71
Epoch:2 --Progress(%):73.86-- Loss:1.03 -- Acc:63.19
Epoch:2 --Progress(%):98.58-- Loss:1.02 -- Acc:63.49
Epoch:2 --Val Loss:1.09 -- Acc:61.32
Saving the best model.

Epoch: 3
Epoch:3 --Progress(%):24.43-- Loss:0.92 -- Acc:67.35
Epoch:3 --Progress(%):49.15-- Loss:0.93 -- Acc:66.95
Epoch:3 --Progress(%):73.86-- Loss:0.9