# Deep learning baseline project

## Data setup

In [1]:
from resnet import ResNet18, ResNet50
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision.datasets import MNIST, CIFAR100, CIFAR10
import torch.nn as nn
import torch.optim as optim
import torch
from typing import Tuple
from sklearn.metrics import accuracy_score
import numpy as np
import matplotlib.pyplot as plt
import os
import logging
import time
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]=f'{0}'


# Logging
filename = "ResNet50CIFAR100"
logging.basicConfig(filename = f"logs/{filename}", filemode = 'w', format='%(asctime)s - %(message)s', level=logging.DEBUG)
%matplotlib inline
%load_ext autoreload
%autoreload 2

## Cuda

In [2]:
!nvidia-smi

Wed Dec 22 19:30:46 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 495.44       CUDA Version: 11.5     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-PCI...  On   | 00000000:37:00.0 Off |                    0 |
| N/A   39C    P0    33W / 250W |      0MiB / 40536MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA A100-PCI...  On   | 00000000:86:00.0 Off |                    0 |
| N/A   72C    P0    78W / 250W |   9526MiB / 40536MiB |      0%      Defaul

In [3]:
if torch.cuda.is_available():
    print("cuda")
    device = torch.device("cuda")
    print(torch.cuda.current_device())
else:
    print("cpu")
    device = torch.device("cpu")

cuda
0


## Data

In [4]:
load_mnist = False
load_cifar = False
load_cifar100 = True

if load_mnist:
    transform = transforms.Compose([transforms.ToTensor()])
    train_data_set = MNIST("./temp/", train=True, download=True, transform=transform)
    test_data_set = MNIST("./temp/", train=False, download=True, transform=transform)

if load_cifar:
    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))
        ]
    )
    train_data_set = CIFAR10("./temp/", train=True, download=True, transform=transform)
    test_data_set = CIFAR10("./temp/", train=False, download=True, transform=transform)
if load_cifar100:
    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))
        ]
    )
    train_data_set = CIFAR100("./temp/", train=True, download=True, transform=transform)
    test_data_set = CIFAR100("./temp/", train=False, download=True, transform=transform)
trainloader = DataLoader(train_data_set, batch_size=16, shuffle=True, num_workers=2)
testloader = DataLoader(test_data_set, batch_size=16, shuffle=True, num_workers=2)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./temp/cifar-100-python.tar.gz


1.8%IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

4.8%IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

7.9%IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

10.9%IOPub me

In [5]:
x, y = next(iter(trainloader))
batch_size, channels, img_x, img_y = x.shape
data_points = len(trainloader)*batch_size
n_classes = len(set(train_data_set.targets))

## ResNet model

In [6]:
net = ResNet50(
    channels = channels,
    num_classes = n_classes
).cuda()
print(net)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
      (shortcut): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False

In [None]:
# Init
losses = []
test_acc = []
train_acc = []
# Save time
train_time = []

# Init network
net = ResNet50(
    channels = channels,
    num_classes = n_classes
).cuda()

# setting hyperparameters and gettings epoch sizes
num_epochs = 100

# Data subsets
get_slice = lambda i, size: range(i * size, (i + 1) * size)

# Optimizer
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.8)
criterion = nn.CrossEntropyLoss()

logging.debug("Started training")
for epoch in range(num_epochs):
    start_time = time.perf_counter()
    # Forward -> Backprob -> Update params
    
    ## Train
    cur_loss = 0
    net.train()
    for data in trainloader:
        # Get data
        x_train, y_train = data
        
        # Optimizer and batch
        optimizer.zero_grad()
        output = net(x_train.cuda())
        
        # compute gradients given loss
        batch_loss = criterion(output, y_train.cuda())
        batch_loss.backward()
        optimizer.step()
        
        cur_loss += batch_loss
    
    end_time = time.perf_counter()
    train_time.append(end_time - start_time)
    # Save losses
    losses.append(cur_loss / batch_size) 
    
    ### Evaluate training
    net.eval()
    train_preds, train_targs = [], []
    for data in trainloader:
        # Get data
        x_train, y_train = data
        output = net(x_train.cuda())
        
        preds = torch.max(output, 1)[1]
        
        train_targs += list(y_train.numpy())
        train_preds += list(preds.data.cpu().numpy())
    
    ### Evaluate testing
    test_preds, test_targs = [], []
    for data in testloader:
        # Get data
        x_test, y_test = data
        output = net(x_test.cuda())
        preds = torch.max(output, 1)[1]
        test_targs += list(y_test.numpy())
        test_preds += list(preds.data.cpu().numpy())
        

    train_acc_cur = accuracy_score(train_targs, train_preds)
    test_acc_cur = accuracy_score(test_targs, test_preds)
    
    train_acc.append(train_acc_cur)
    test_acc.append(test_acc_cur)
    
    if epoch % 1 == 0:
        string = "Epoch %2i : Train Loss %f, Train acc %f, Test acc %f, Epoch train time %.2f min" % (
                epoch+1, losses[-1], train_acc_cur, test_acc_cur, train_time[-1]/60)
        logging.debug(string)
        print(string)

logging.debug(f"Total time: {sum(train_time)/(60*60)} hours")

Epoch  1 : Train Loss 795.305542, Train acc 0.156920, Test acc 0.149000, Epoch train time 1.79 min
Epoch  2 : Train Loss 676.465515, Train acc 0.261200, Test acc 0.229900, Epoch train time 1.78 min
Epoch  3 : Train Loss 601.816833, Train acc 0.348080, Test acc 0.283800, Epoch train time 1.90 min
Epoch  4 : Train Loss 535.959534, Train acc 0.420540, Test acc 0.320200, Epoch train time 1.75 min
Epoch  5 : Train Loss 476.758087, Train acc 0.487120, Test acc 0.338200, Epoch train time 1.69 min
Epoch  6 : Train Loss 418.424286, Train acc 0.588360, Test acc 0.362000, Epoch train time 1.77 min
Epoch  7 : Train Loss 357.025238, Train acc 0.641120, Test acc 0.364000, Epoch train time 1.77 min
Epoch  8 : Train Loss 300.169983, Train acc 0.743220, Test acc 0.386500, Epoch train time 2.00 min
Epoch  9 : Train Loss 241.306793, Train acc 0.790960, Test acc 0.373800, Epoch train time 1.80 min
Epoch 10 : Train Loss 188.607956, Train acc 0.855620, Test acc 0.393700, Epoch train time 1.91 min
Epoch 11 :

In [None]:
epoch = np.arange(len(train_acc))
plt.figure()
plt.plot(epoch, train_acc, 'r', epoch, test_acc, 'b')
plt.legend(['Train Accucary','Testing Accuracy'])
plt.xlabel('Updates')
plt.ylabel('Acc')
plt.show()

In [None]:
### Evaluate test set
net.eval()
print(f'Training accuracy: {train_acc[-1]*100:.2f}%\Testing accuracy: {test_acc[-1]*100:.2f}%')

In [None]:
logging.debug(f"Total parameters: {sum([par.numel() for par in net.parameters()])}")

In [None]:
sum([par.numel() for par in net.parameters()])