In [3]:
import os
import numpy as np 
import random
import time
import datetime
from PIL import Image
import collections
from types import SimpleNamespace

In [4]:
import matplotlib.pyplot as plt
%matplotlib inline 
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg', 'pdf') # For export
import matplotlib
matplotlib.rcParams['lines.linewidth'] = 2.0
import seaborn as sns
sns.reset_orig()

  set_matplotlib_formats('svg', 'pdf') # For export


In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.optim as optim

In [6]:
import torchvision


  warn(f"Failed to load image Python extension: {e}")


In [7]:
from torchvision.datasets import CIFAR10

In [8]:
from torchvision import transforms


In [9]:
from tqdm.notebook import tqdm

In [10]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

In [11]:
torch.manual_seed(42) # Setting the seed

<torch._C.Generator at 0x1875d9eb910>

In [12]:
#detecting the GPU 
gpu_avail = torch.cuda.is_available()
print(f"Is the GPU available? {gpu_avail}")
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print("Device:", device)
if gpu_avail:
    print("Device name: " + torch.cuda.get_device_name(0))

Is the GPU available? True
Device: cuda
Device name: NVIDIA GeForce GTX 1060 6GB


In [13]:
# Path to the folder
DATASET_PATH = "../Desktop/ML"
#setting the seed
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
set_seed(42)

if torch.cuda.is_available():
    torch.backends.cudnn.determinstic = True
    torch.backends.cudnn.benchmark = False

In [14]:
train_dataset = CIFAR10(root=DATASET_PATH, train=True, download=True)
DATA_MEANS = (train_dataset.data / 255.0).mean(axis=(0,1,2))
DATA_STD = (train_dataset.data / 255.0).std(axis=(0,1,2))
print("Data mean", DATA_MEANS)
print("Data std", DATA_STD)

Files already downloaded and verified
Data mean [0.49139968 0.48215841 0.44653091]
Data std [0.24703223 0.24348513 0.26158784]


In [15]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = F.max_pool2d(torch.relu(self.conv2(out)), 2)
        out = out.view(-1, 8 * 8 * 8)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [16]:
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in tqdm(range(1, n_epochs + 1)):
        loss_train = 0.0
        for imgs, labels in train_loader:
            
            imgs = imgs.to(device=device)
            labels = labels.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_train += loss.item()

        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))
            
def validate(model, train_loader, val_loader):
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():  # <1>
            for imgs, labels in loader:
                imgs = imgs.to(device=device)
                labels = labels.to(device=device)
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1) # <2>
                total += labels.shape[0]  # <3>
                correct += int((predicted == labels).sum())  # <4>

        print("Accuracy {}: {:.2f}".format(name , correct / total))

In [17]:
test_transform = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize(DATA_MEANS, DATA_STD)
                                     ])
train_transform = transforms.Compose([transforms.RandomHorizontalFlip(),
                                      transforms.RandomResizedCrop((32,32),scale=(0.8,1.0),ratio=(0.9,1.1)),
                                      transforms.ToTensor(),
                                      transforms.Normalize(DATA_MEANS, DATA_STD)
                                     ])

train_dataset = CIFAR10(root=DATASET_PATH, train=True, transform=train_transform, download=True)
val_dataset = CIFAR10(root=DATASET_PATH, train=True, transform=test_transform, download=True)
set_seed(42)
train_set, _ = torch.utils.data.random_split(train_dataset, [45000, 5000])
set_seed(42)
_, val_set = torch.utils.data.random_split(val_dataset, [45000, 5000])

train_loader = data.DataLoader(train_set, batch_size=128, shuffle=True, drop_last=True, pin_memory=True, num_workers=4)
val_loader = data.DataLoader(val_set, batch_size=128, shuffle=False, drop_last=False, num_workers=4)

Files already downloaded and verified
Files already downloaded and verified


In [18]:
train_loader = data.DataLoader(train_set, batch_size=128, shuffle=True, drop_last=True, pin_memory=True, num_workers=4)
val_loader = data.DataLoader(val_set, batch_size=128, shuffle=False, drop_last=False, num_workers=4)
model = Net()

numel_list = [p.numel() for p in model.parameters()]


model = Net().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2) 
loss_fn = nn.CrossEntropyLoss()  



In [23]:
#Finally, training for 300 Epochs
training_loop(  
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

validate(model, train_loader, val_loader)

  0%|          | 0/300 [00:00<?, ?it/s]

2022-03-29 13:08:41.471312 Epoch 1, Training loss 2.273516862141101
2022-03-29 13:11:25.055877 Epoch 10, Training loss 1.4325055325472797
2022-03-29 13:15:05.856786 Epoch 20, Training loss 1.2439932571856724
2022-03-29 13:19:06.880713 Epoch 30, Training loss 1.142515715540644
2022-03-29 13:22:39.765654 Epoch 40, Training loss 1.0807891426942287
2022-03-29 13:26:19.875471 Epoch 50, Training loss 1.0384035915391059
2022-03-29 13:29:29.048677 Epoch 60, Training loss 1.0102278948509456
2022-03-29 13:32:42.893782 Epoch 70, Training loss 0.9824230529983499
2022-03-29 13:36:29.495652 Epoch 80, Training loss 0.963858820773937
2022-03-29 13:39:50.431559 Epoch 90, Training loss 0.9437842117755162
2022-03-29 13:42:49.623984 Epoch 100, Training loss 0.9308946175113363
2022-03-29 13:45:47.963887 Epoch 110, Training loss 0.9162210081717228
2022-03-29 13:48:48.668771 Epoch 120, Training loss 0.9048870745887104
2022-03-29 13:51:53.833245 Epoch 130, Training loss 0.897735546626936
2022-03-29 13:55:12.7

In [24]:
#Part B, adding more layers
class Net2Layers(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(8, 4, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(4 * 4 * 4, 32)
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = F.max_pool2d(torch.relu(self.conv2(out)), 2)
        out = F.max_pool2d(torch.relu(self.conv3(out)), 2)
        out = out.view(-1, 4 * 4 * 4)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [26]:
model = Net2Layers()

numel_list = [p.numel() for p in model.parameters()]


model = Net2Layers().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2) 
loss_fn = nn.CrossEntropyLoss()  


In [28]:
#training 
training_loop(  
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

validate(model, train_loader, val_loader)

  0%|          | 0/300 [00:00<?, ?it/s]

2022-03-29 15:19:55.406099 Epoch 1, Training loss 2.2480272119201485
2022-03-29 15:23:29.764595 Epoch 10, Training loss 1.4279922274103192
2022-03-29 15:27:46.827025 Epoch 20, Training loss 1.2805870404610267
2022-03-29 15:31:17.499342 Epoch 30, Training loss 1.1850827212347264
2022-03-29 15:35:22.516037 Epoch 40, Training loss 1.1196661858137515
2022-03-29 15:40:02.597008 Epoch 50, Training loss 1.0601725919633849
2022-03-29 15:44:25.015122 Epoch 60, Training loss 1.017804021169657
2022-03-29 15:48:24.022534 Epoch 70, Training loss 0.9918310010535085
2022-03-29 15:52:22.826827 Epoch 80, Training loss 0.9684683979406655
2022-03-29 15:56:43.597989 Epoch 90, Training loss 0.9422862385412907
2022-03-29 16:01:02.186348 Epoch 100, Training loss 0.9197289113305572
2022-03-29 16:05:02.505344 Epoch 110, Training loss 0.8988188792158056
2022-03-29 16:09:13.763919 Epoch 120, Training loss 0.8855711062069972
2022-03-29 16:13:40.838800 Epoch 130, Training loss 0.8754912889920748
2022-03-29 16:17:3

In [19]:
#Problem 2
class ResBlock(nn.Module):
    def __init__(self, n_chans):
        super(ResBlock, self).__init__()
        self.conv = nn.Conv2d(n_chans, n_chans, kernel_size=3, padding=1, bias=False)
        #torch.nn.init.kaiming_normal_(self.conv.weight, nonlinearity='relu')

    def forward(self, x):
        out = self.conv(x)
        out = torch.relu(out)
        return out + x #Skip connection

In [20]:
class ResNet_10(nn.Module):
    def __init__(self, n_chans1=32, n_blocks=10):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.resblocks = nn.Sequential(
            *(n_blocks * [ResBlock(n_chans=n_chans1)]))
        self.fc1 = nn.Linear(8 * 8 * n_chans1, 32)
        self.fc2 = nn.Linear(32, 10)
        
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = self.resblocks(out)
        out = F.max_pool2d(out, 2)
        out = out.view(-1, 8 * 8 * self.n_chans1)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [21]:
model = ResNet_10()

numel_list = [p.numel() for p in model.parameters()]
print(sum(numel_list), numel_list)

model = ResNet_10().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=3e-3)
loss_fn = nn.CrossEntropyLoss()


76010 [864, 32, 9216, 65536, 32, 320, 10]


In [None]:

#Part B
#Weight Decay:
def training_loop_l2reg(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in tqdm(range(1, n_epochs + 1)):
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device=device)
            labels = labels.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)

            l2_lambda = 0.001
            l2_norm = sum(p.pow(2.0).sum()
                          for p in model.parameters())  # <1>
            loss = loss + l2_lambda * l2_norm

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            loss_train += loss.item()
        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))


In [43]:
class ResBlock_DO(nn.Module):
    def __init__(self, n_chans, p):
        super(ResBlock_DO, self).__init__()
        self.conv = nn.Conv2d(n_chans, n_chans, kernel_size=3,
                              padding=1, bias=False)
        self.dropout = nn.Dropout2d(p = p)
        
    def forward(self, x):
        out = self.conv(x)
        out = self.dropout(out)
        out = torch.relu(out)
        return out + x

In [44]:
class ResNet10_DO(nn.Module):
    def __init__(self, n_chans1=32, n_blocks=10, p=0.3):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.resblocks = nn.Sequential(
            *(n_blocks * [ResBlock_DO(n_chans=n_chans1, p=p)]))
        self.fc1 = nn.Linear(8 * 8 * n_chans1, 32)
        self.fc2 = nn.Linear(32, 10)
        
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = self.resblocks(out)
        out = F.max_pool2d(out, 2)
        out = out.view(-1, 8 * 8 * self.n_chans1)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out



In [45]:

class ResBlock_BN(nn.Module):
    def __init__(self, n_chans):
        super(ResBlock_BN, self).__init__()
        self.conv = nn.Conv2d(n_chans, n_chans, kernel_size=3,
                              padding=1, bias=False)
        self.batch_norm = nn.BatchNorm2d(num_features=n_chans)
        torch.nn.init.kaiming_normal_(self.conv.weight,
                                      nonlinearity='relu')
        torch.nn.init.constant_(self.batch_norm.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm.bias)

    def forward(self, x):
        out = self.conv(x)
        out = self.batch_norm(out)
        out = torch.relu(out)
        return out + x



In [46]:

model = ResNet_10()

numel_list = [p.numel() for p in model.parameters()]
print(sum(numel_list), numel_list)

model = ResNet_10().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=3e-3)
loss_fn = nn.CrossEntropyLoss()


76010 [864, 32, 9216, 65536, 32, 320, 10]


In [None]:
training_loop_l2reg(
    n_epochs = 150,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

validate(model, train_loader, val_loader)



In [47]:
#Dropout Model:
model = ResNet10_DO()

numel_list = [p.numel() for p in model.parameters()]
print(sum(numel_list), numel_list)

model = ResNet10_DO().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=3e-3)
loss_fn = nn.CrossEntropyLoss()



76010 [864, 32, 9216, 65536, 32, 320, 10]


In [None]:
training_loop(
    n_epochs = 150,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

validate(model, train_loader, val_loader)


In [48]:
model = ResNet10_BN()

numel_list = [p.numel() for p in model.parameters()]
print(sum(numel_list), numel_list)

model = ResNet10_BN().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=3e-3)
loss_fn = nn.CrossEntropyLoss()


76074 [864, 32, 9216, 32, 32, 65536, 32, 320, 10]


In [None]:

training_loop(
    n_epochs = 50,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

validate(model, train_loader, val_loader)