# COMP34212 Summative Lab
## Resnet Architecture on CIFAR-10 Dataset

# Imports

In [1]:
#import the necessary libraries
from math import sqrt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import torch.optim as optim
import torch.utils.data as data
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torchsummary import summary
#The following imports are used for Bayesian Optimization
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.model_selection import GridSearchCV
from skopt import gp_minimize
from skopt.space import Real, Integer
from skopt.utils import use_named_args
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from skopt import gp_minimize
from skopt.space import Real
from skopt.utils import use_named_args
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Fetch Data (Load CIFAR 10 and apply data augementation)

In [2]:
def fetch_data():
    # Load the CIFAR-10 dataset and apply data augmentation
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
    ])

    # Load the CIFAR-10 dataset
    train_set = datasets.CIFAR10(root='./dataset', train=True, download=True, transform=transform_train)
    test_set = datasets.CIFAR10(root='./dataset', train=False, download=True, transform=transform_test)

    # Split train set into train and validation sets
    train_size = int(0.8 * len(train_set))
    val_size = len(train_set) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(train_set, [train_size, val_size])

    # Create data loaders for train, validation, and test sets
    train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=True)
    test_data_loader = torch.utils.data.DataLoader(test_set, batch_size=32, shuffle=False)

    return train_data_loader, val_data_loader, test_data_loader


# DEFINE RESNET Architecture

In [3]:
# Define the ResNet architecture
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear1 = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride, kernel_size=3):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride, kernel_size))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear1(out)
        return out

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1, kernel_size=3):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=kernel_size, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=kernel_size,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes),
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

# DEFINE UTILITY FUNCTIONS

In [4]:
def train(model, optimizer, criterion, trainloader):
    model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        train_total += targets.size(0)
        train_correct += predicted.eq(targets).sum().item()

    train_accuracy = 100. * train_correct / train_total
    train_loss /= len(trainloader)
    print('Training - Loss: {:.4f}, Accuracy: {:.4f}%'.format(train_loss, train_accuracy))

    return train_loss, train_accuracy


def evaluate(model, criterion, testloader):
    model.eval()
    total = 0
    correct = 0
    test_loss = 0.0

    with torch.no_grad():
        for inputs, labels in testloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            test_loss += loss.item() * labels.size(0)

    test_accuracy = 100 * correct / total
    test_loss /= len(testloader.dataset)
    
    print('Validation - Loss: {:.4f}, Accuracy: {:.4f}%'.format(test_loss , test_accuracy))
   
    
    # scheduler.step(test_accuracy)

    return test_loss, test_accuracy


def train_and_validate(model, optimizer, criterion, trainloader, valloader, num_epochs):
    model.to(device)

    train_loss = 0.0
    train_accuracy=0.0
    
    val_total = 0
    val_accuracy=0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)
        
        train_loss,train_accuracy = train(model,optimizer,criterion,trainloader)
        val_loss,val_accuracy = evaluate(model,criterion,valloader)
        
       

    return train_loss, train_accuracy, val_loss, val_accuracy

def test(model,criterion,testloader):
    
    model.eval()
    total = 0
    correct = 0
    test_loss = 0.0

    with torch.no_grad():
        for inputs, labels in testloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            test_loss += loss.item() * labels.size(0)

    test_accuracy = 100 * correct / total
    test_loss /= len(testloader.dataset)
    
    print('Testing - Loss: {:.4f}, Accuracy: {:.4f}%'.format(test_loss , test_accuracy))
    
    

    return test_loss, test_accuracy


In [5]:
# Load the CIFAR-10 dataset 
trainloader,valloader,testloader = fetch_data()
criterion = nn.CrossEntropyLoss()

Files already downloaded and verified
Files already downloaded and verified


# 

In [7]:
#Second RUN with changes in data splitting and train function

# Define the search space for the hyperparameters
search_space = [Real(1e-6, 1e+0, prior='log-uniform', name='lr'),
                Real(0.1, 0.999, name='beta1'),
                Real(0.1, 0.999, name='beta2'),
                Real(1e-6, 1e-3, prior='log-uniform', name='weight_decay')]

n_epoch = 20

# Define the objective function for the Bayesian optimization
@use_named_args(search_space)
def objective_ADAM(lr, beta1, beta2, weight_decay):

    # Define the model
    model = ResNet(BasicBlock, [2, 1, 1, 1])

    print(f'\n Testing hyperparameters: lr={lr}, beta1={beta1}, beta2={beta2}, weight_decay={weight_decay}')


    # Define the optimizer with the given hyperparameters
    optimizer = optim.Adam(model.parameters(), lr=lr, betas=(beta1, beta2), weight_decay=weight_decay)

    # Train the model and return the validation accuracy
    train_losses, train_accuracies, val_losses, val_accuracies = train_and_validate(model, optimizer, criterion, trainloader, valloader, n_epoch)


    return -val_accuracies  # minimize negative accuracy

# Perform the Bayesian optimization
result = gp_minimize(objective_ADAM, search_space, n_calls=10, random_state=0)

# Print the best hyperparameters and the corresponding validation accuracy
print('Best hyperparameters:', result.x)
print('Best validation accuracy:', -result.fun)



 Testing hyperparameters: lr=0.0036063486397284665, beta1=0.8589949079743348, beta2=0.8712931102428585, weight_decay=0.0003481410351917474
Epoch 1/20
----------
Training - Loss: 1.7548, Accuracy: 33.9575%
Validation - Loss: 1.6401, Accuracy: 39.6600%
Epoch 2/20
----------
Training - Loss: 1.3544, Accuracy: 50.4150%
Validation - Loss: 1.2186, Accuracy: 55.1100%
Epoch 3/20
----------
Training - Loss: 1.1552, Accuracy: 58.6425%
Validation - Loss: 1.1627, Accuracy: 58.5900%
Epoch 4/20
----------
Training - Loss: 1.0485, Accuracy: 62.8150%
Validation - Loss: 1.2691, Accuracy: 56.1500%
Epoch 5/20
----------
Training - Loss: 0.9862, Accuracy: 65.0125%
Validation - Loss: 1.1857, Accuracy: 58.5000%
Epoch 6/20
----------
Training - Loss: 0.9325, Accuracy: 67.0675%
Validation - Loss: 0.9947, Accuracy: 64.7800%
Epoch 7/20
----------
Training - Loss: 0.8957, Accuracy: 68.4350%
Validation - Loss: 0.8782, Accuracy: 69.3300%
Epoch 8/20
----------
Training - Loss: 0.8576, Accuracy: 70.1525%
Validation

In [8]:
# Define the best hyperparameters found by Bayesian optimization
best_lr, best_beta1, best_beta2, best_weight_decay = result.x

# Define the model and dataset
model = ResNet(BasicBlock, [2, 1, 1, 1])
summary(model.to(device), input_size=(3, 32, 32))
# Define the Adam optimizer with the best hyperparameters
optimizer = optim.Adam(model.parameters(), lr=best_lr, betas=(best_beta1, best_beta2), weight_decay=best_weight_decay)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max',patience=3)


# Train the model with the best hyperparameters and validate
train_and_validate(model,optimizer,criterion,trainloader,valloader,100)

Layer (type:depth-idx)                   Param #
├─Conv2d: 1-1                            1,728
├─BatchNorm2d: 1-2                       128
├─Sequential: 1-3                        --
|    └─BasicBlock: 2-1                   --
|    |    └─Conv2d: 3-1                  36,864
|    |    └─BatchNorm2d: 3-2             128
|    |    └─Conv2d: 3-3                  36,864
|    |    └─BatchNorm2d: 3-4             128
|    |    └─Sequential: 3-5              --
|    └─BasicBlock: 2-2                   --
|    |    └─Conv2d: 3-6                  36,864
|    |    └─BatchNorm2d: 3-7             128
|    |    └─Conv2d: 3-8                  36,864
|    |    └─BatchNorm2d: 3-9             128
|    |    └─Sequential: 3-10             --
├─Sequential: 1-4                        --
|    └─BasicBlock: 2-3                   --
|    |    └─Conv2d: 3-11                 73,728
|    |    └─BatchNorm2d: 3-12            256
|    |    └─Conv2d: 3-13                 147,456
|    |    └─BatchNorm2d: 3-14        

(0.1583023210145533, 94.595, 0.3111201276183128, 90.0)

In [9]:
test(model,criterion,testloader)

Testing - Loss: 0.3483, Accuracy: 89.7700%


(0.348280015411973, 89.77)

# Perform Bayesian Optimization with SGD

In [10]:
# Define the search space for the hyperparameters
search_space = [Real(1e-6, 0.4, prior='log-uniform', name='lr'),
                Real(0.1, 0.999, name='momentum'),
                Real(1e-6, 1e-3, prior='log-uniform', name='weight_decay')]

n_epoch = 40

# Define the objective function for the Bayesian optimization
@use_named_args(search_space)
def objective_SGD(lr, momentum, weight_decay):

    # Define the model
    model = ResNet(BasicBlock, [2, 1, 1, 1])

    print(f'\n Testing hyperparameters: lr={lr}, momentum={momentum}, weight_decay={weight_decay}')
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max',patience=3)
    # Define the optimizer with the given hyperparameters
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
    # Train the model and return the validation accuracy
    train_losses, train_accuracies, val_losses, val_accuracies = train_and_validate(model, optimizer, criterion, trainloader, valloader, n_epoch)


    return -val_accuracies  # minimize negative accuracy

# Perform the Bayesian optimization
result = gp_minimize(objective_SGD, search_space, n_calls=10, random_state=0)

# Print the best hyperparameters and the corresponding validation accuracy
print('Best hyperparameters:', result.x)
print('Best validation accuracy:', -result.fun)


 Testing hyperparameters: lr=0.0020948412486806525, momentum=0.8589949079743348, weight_decay=0.0003748321662847933


UnboundLocalError: cannot access local variable 'optimizer' where it is not associated with a value

In [None]:
# Define the best hyperparameters found by Bayesian optimization
best_lr, best_momentum, best_weight_decay = [0.0015078369731868298, 0.8516348084201026, 0.001028462547983764]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the model and dataset
model = ResNet(BasicBlock, [2, 1, 1, 1])
# Define the SGD optimizer with the best hyperparameters
optimizer = optim.SGD(model.parameters(), lr=best_lr, momentum=best_momentum, weight_decay=best_weight_decay)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max')
criterion = nn.CrossEntropyLoss()

# Train the model with the best hyperparameters and validate
train_and_validate(model,optimizer,criterion,trainloader,valloader,100)