In [1]:
import torch
from torch import nn
from torch.optim import SGD, Adam, lr_scheduler
from torch.nn.utils import clip_grad_value_
from torch.utils.data import random_split, DataLoader

from torchvision.datasets import CIFAR10
from torchvision import transforms

import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm

In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")
    
device

device(type='cuda', index=0)

In [3]:
train_transform = transforms.Compose([
    transforms.RandomCrop((32, 32), padding=4, padding_mode="reflect"),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010], inplace=True)
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010], inplace=True)
])

In [4]:
train_dataset = CIFAR10("../5. CNN/cifar-10", download=True, train=True, transform=train_transform)
test_dataset = CIFAR10("../5. CNN/cifar-10", train=False, transform=test_transform)

Files already downloaded and verified


In [5]:
train_loader = DataLoader(train_dataset, batch_size=400, shuffle=True, num_workers=12, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=800, num_workers=6, pin_memory=True)

In [6]:
class ResNet9(nn.Module):
    
    def __init__(self, c=3):
        super().__init__()
        
        self.conv1 = self.conv_layer(c, 64)
        self.conv2 = self.conv_layer(64, 128, pool=True)
        self.conv3_res1 = self.conv_layer(128, 128)
        self.conv4_res1 = self.conv_layer(128, 128, activate=False)
        self.conv5 = self.conv_layer(128, 256, pool=True)
        self.conv6 = self.conv_layer(256, 512, pool=True)
        self.conv7_res2 = self.conv_layer(512, 512)
        self.conv8_res2 = self.conv_layer(512, 512, activate=False)
        self.pool8 = nn.MaxPool2d(kernel_size=4, stride=4)
        
        self.classifier9 = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(p=0.2),
            nn.Linear(in_features=512, out_features=10)
        )
    
    def conv_layer(self, c_in, c_out, activate=True, pool=False):
        layers = [
                nn.Conv2d(in_channels=c_in, out_channels=c_out, kernel_size=3, padding=1),
                nn.BatchNorm2d(num_features=c_out) 
        ]
        if activate:
            layers.append(nn.ReLU())
        else:
            assert pool is False
            
        if pool:
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        
        return nn.Sequential(*layers)
    
    def forward(self, xb):
        a1 = self.conv1(xb)
        a2 = self.conv2(a1)
        a3 = self.conv3_res1(a2)
        
        z4 = self.conv4_res1(a3)
        a4 = nn.ReLU()(z4 + a2)
        
        a5 = self.conv5(a4)
        a6 = self.conv6(a5)
        a7 = self.conv7_res2(a6)
        
        z8 = self.conv8_res2(a7)
        a8 = nn.ReLU()(z8 + a6)
        a8 = self.pool8(a8)
        
        out = self.classifier9(a8)
        return out        

In [7]:
def fit(model, train_loader, val_loader, alpha, num_epochs, weight_decay=0, grad_clip=None):
    
    loss_function = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=alpha, weight_decay=weight_decay)
    # Learning rate scheduling
    scheduler = lr_scheduler.OneCycleLR(optimizer, max_lr=alpha, 
                                        epochs=num_epochs, steps_per_epoch=len(train_loader))
    
    for e in range(num_epochs):
        model.train()  # Set back to training mode
        
        batch_losses = []
        for xb, yb in tqdm(train_loader):
            xb, yb = xb.to(device), yb.to(device)  # Copies batch to gpu
            
            zb = model(xb)
            loss = loss_function(zb, yb)
            loss.backward()
            if grad_clip:
                clip_grad_value_(model.parameters(), grad_clip)
            optimizer.step()
            optimizer.zero_grad()
        
            last_lr = optimizer.param_groups[0]["lr"]  # Last learning rate used in the current epoch
            scheduler.step()
            
            batch_losses.append(loss.item())
        
        epoch_loss = np.mean(batch_losses)
        val_loss, val_acc = evaluate(model, val_loader, loss_function)
        print(f"Epoch: {e+1}, Last lr: {last_lr}, Train Loss: {epoch_loss}, Val Loss: {val_loss}, Val Acc: {val_acc}") 
        
        
def evaluate(model, val_loader, loss_function=None):
    model.eval()  # Set to evaluation mode, so BatchNorm/Dropout will behave correctly 
    
    with torch.no_grad():  # No computation graph    
        
        batch_losses = []
        batch_accs = []
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)  # Copies batch to gpu

            zb = model(xb)
            if loss_function is not None:
                loss = loss_function(zb, yb)
                batch_losses.append(loss.item())

            batch_accs.append(accuracy(zb, yb))

    if loss_function is None:
        return np.mean(batch_accs)
    else:
        return [np.mean(batch_losses), np.mean(batch_accs)]
    
def accuracy(zb, yb):
    _, predicted = torch.max(zb, dim=1)
    acc = torch.sum(predicted==yb) / yb.shape[0]
    return acc.item()

In [8]:
model = ResNet9()
model = model.to(device)

In [9]:
max_lr = 0.01
num_epochs = 8
grad_clip = 0.1
weight_decay = 1e-4

In [10]:
%%time
fit(model, train_loader, test_loader, alpha=max_lr, num_epochs=num_epochs, weight_decay=weight_decay, grad_clip=grad_clip)

100%|██████████| 125/125 [00:23<00:00,  5.33it/s]
  0%|          | 0/125 [00:00<?, ?it/s]

Epoch: 1, Last lr: 0.003929274947543202 Train Loss: 1.4836134548187256, Val Loss: 1.358323574066162, Val Acc: 0.5743269140903766


100%|██████████| 125/125 [00:23<00:00,  5.35it/s]
  0%|          | 0/125 [00:00<?, ?it/s]

Epoch: 2, Last lr: 0.009352712767003208 Train Loss: 1.0447519469261168, Val Loss: 1.1338582222278302, Val Acc: 0.6505769124397864


100%|██████████| 125/125 [00:22<00:00,  5.53it/s]
  0%|          | 0/125 [00:00<?, ?it/s]

Epoch: 3, Last lr: 0.009719417773875232 Train Loss: 0.7896337532997131, Val Loss: 0.7748635136164151, Val Acc: 0.7301922944875864


100%|██████████| 125/125 [00:23<00:00,  5.24it/s]
  0%|          | 0/125 [00:00<?, ?it/s]

Epoch: 4, Last lr: 0.008117456539497631 Train Loss: 0.6112672920227051, Val Loss: 0.6978815885690542, Val Acc: 0.7440384442989643


100%|██████████| 125/125 [00:23<00:00,  5.32it/s]
  0%|          | 0/125 [00:00<?, ?it/s]

Epoch: 5, Last lr: 0.005559840141227017 Train Loss: 0.486379576921463, Val Loss: 0.48605384276463437, Val Acc: 0.8332692017922034


100%|██████████| 125/125 [00:22<00:00,  5.47it/s]
  0%|          | 0/125 [00:00<?, ?it/s]

Epoch: 6, Last lr: 0.0028306099820869924 Train Loss: 0.3876775047779083, Val Loss: 0.39436071194135225, Val Acc: 0.8651922941207886


100%|██████████| 125/125 [00:23<00:00,  5.41it/s]
  0%|          | 0/125 [00:00<?, ?it/s]

Epoch: 7, Last lr: 0.0007664159383425639 Train Loss: 0.28283466649055483, Val Loss: 0.31125691303840053, Val Acc: 0.8956730595001807


100%|██████████| 125/125 [00:22<00:00,  5.48it/s]


Epoch: 8, Last lr: 4e-08 Train Loss: 0.21663150990009308, Val Loss: 0.28656160143705517, Val Acc: 0.9048076776357797
CPU times: user 3min 13s, sys: 4.41 s, total: 3min 18s
Wall time: 3min 20s


In [11]:
print(evaluate(model, test_loader))

0.9048076776357797


In [12]:
torch.save(model.state_dict(), "./saved_model.pth")

model2 = ResNet9().to(device)
model2.load_state_dict(torch.load("./saved_model.pth"))

evaluate(model2, test_loader)

0.9048076776357797