## CIFAR10 Classification using PreResNet110
## Comparing results of SGD and SGD with landscape modification

Firstly, we import the necessary libraries.

In [27]:
import numpy
import pandas as pd
import random
import matplotlib.pyplot as plt

import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn as nn

import os
from google.colab import files

# import modified optimizer
from SGD_IKSA import SGD_IKSA
# import model
from preresnet import PreResNet

In [20]:
# We set the seed at the beginning of each experiment to ensure reproducibility.
# We also make sure no non-deterministic methods are used.
def setup_seed(s):
    torch.manual_seed(s)
    random.seed(s)  
    numpy.random.seed(s)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.use_deterministic_algorithms(True)

Next, we define a function that will perform an experiment, by training PreResNet on CIFAR10, using both SGD and SGD with landscape modification, and report the results.

Training process inspired by: https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html, https://pytorch.org/docs/stable/notes/randomness.html

In [21]:
def compare_SGD_SGD_LM(device, 
                       seed, 
                       model, 
                       trainset, 
                       testset, 
                       batch_size, 
                       no_epochs, 
                       lr, 
                       momentum, 
                       weight_decay, 
                       optimizer_type, 
                       LM_f, 
                       LM_c, 
                       LM_c_run_min):
    """This function trains a given model on a given train dataset, using both SGD 
    and SGD with landscape modification. Subsequently, it computes the testing accuracy on the given test data.
    

    Args:
        device : CPU/GPU used for training process;
        seed : seed to be used in the random processes involved;
        model : classification model to be used;
        trainset : training data;
        testset : testing data;
        batch_size: batch-size to be used in training;
        no_epochs : number of epochs that the model will be trained for;
        lr : learninf rate of SGD optimizer;
        momentum : momentum value of the SGD optimizer;
        weight_decay : weight decay value of the SGD optimizer;
        optimizer_type : bool indicating whether landscape modification will be used
        LM_f : function to be used when performing landscape modification
        LM_c : c value to be used when performing landscape modification
        LM_c_run_min : bool indicating whether c will be be taken as the running minimum of 
                       U(x), with LM_c as initial value.

    Returns:
        [dict]: a dictionary that contains: a dictionary of parameters, a list of loss values,
        a list of c values, a list of average loss values, a list of testing accuracy values,
        and the final testing accuracy value.
    """
    # We want to save the hyperparameters used for this experiment in a dictionary.
    param_dict = {"no_epochs": no_epochs,
                  "batch_size": batch_size,
                  "lr": lr,
                  "momentum": momentum,
                  "wd": weight_decay,
                  "optimizer_type": optimizer_type,
                  "LM_f": LM_f.__name__,
                  "LM_c": LM_c,
                  "LM_c_run_min": LM_c_run_min}
    
    # We keep the loss values.
    loss_list = []
    # We keep the c values.
    c_list = []
    # We save a list of loss values averaged every 20 minibatches.
    average_loss_list = []
    # We save a list of accuracy scores for each epoch.
    accuracy_list = []

    # We set up a seed.
    setup_seed(seed)

    # We preserve reproducibility in the data loading process
    def seed_worker(worker_id):
        worker_seed = torch.initial_seed() % 2**32

    g = torch.Generator()
    g.manual_seed(seed)
    
    # We prepare our data for training.
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                        shuffle=True, num_workers=4, worker_init_fn = seed_worker, generator = g)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                        shuffle=False, num_workers=4, worker_init_fn = seed_worker, generator = g)

    # Move model to device.
    model.to(device)

    # We define the loss function.
    criterion = nn.CrossEntropyLoss()

    # We define an optimizer, depending on the optimizer type given.
    if optimizer_type == "Original":
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
    elif optimizer_type == "LM":
        optimizer = SGD_IKSA(model.parameters(), LM_f, lr=lr, momentum=momentum, weight_decay= weight_decay)
        
    # We train the model for the given number of epochs.
    for epoch in range(no_epochs):

        running_loss = 0.0
        # We go through all minibatches in the trainloader:
        for i, data in enumerate(trainloader, 0):
            
            # We move data to device.
            inputs = data[0].to(device)
            labels = data[1].to(device)

            # We empty the gradients of the parameters.
            optimizer.zero_grad()
            
            # We compute predictions.
            outputs = model(inputs)
            
            # We compute the loss.
            loss = criterion(outputs, labels)
            loss_list.append(loss)
            # We backpropagate.
            loss.backward()

            # If we are using landscape modification, check if we are taking 
            # the running loss as c.
            if optimizer_type == "LM":
                if LM_c_run_min:
                    # Update c as the running minimum of the loss.
                    if loss < LM_c:
                      LM_c = loss.item()
                # Let the optimizer take a step.
                optimizer.step(LM_c, loss)
            else:
                optimizer.step()
            
            c_list.append(LM_c)
            running_loss += loss.item()
            
            # Compute the average loss every 20 mini-batches.
            if i % 20 == 19:
                print(epoch + 1, i + 1, running_loss / 20)
                average_loss_list.append(running_loss / 20)
                running_loss = 0.0

        # For each epoch, compute testing accuracy.
        correct = 0
        total = 0
        with torch.no_grad():
            # Iterate through the testing data.
            for data in testloader:
                images, labels = data
                images = images.to(device)
                labels = labels.to(device)
                # Compute output.
                outputs = model(images)
                # Compute prediction, by taking the max output.
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        # Compute accuracy.
        accuracy = 100 * correct / total
        accuracy_list.append(accuracy)
        print('Accuracy:', (100 * correct / total))


    print('End of training.')

    # Compute final testing accuracy:
    correct = 0
    total = 0
    with torch.no_grad():
        # Iterate through the testing data.
        for data in testloader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            # Compute output.
            outputs = model(images)
            # Compute prediction, by taking the max output.
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    # Compute accuracy.
    accuracy = 100 * correct / total
    accuracy_list.append(accuracy)
    print('Accuracy:', (100 * correct / total))

    # Return a dictionary with all necessary information.
    results_dict = {"param_dict": param_dict,
                    "loss_list": loss_list,
                    "c_list": c_list,
                    "average_loss_list": average_loss_list,
                    "accuracy": accuracy,
                    "accuracy_list": accuracy_list}
    
    return results_dict

Next, we define some functions that create useful reports for our analysis.

In [22]:
def create_report_accuracy_list(list_of_results_dicts):
    """This function takes a list of dictionaries as resulted from 
    the function defined above, and produces a dataframe with 
    accuracy values for the two models (SGD with/without landscape modification).
    """
    accuracy_dict = {}
    # Iterate through all dictionaries of results:
    for d in list_of_results_dicts:
        optimizer = d["param_dict"]["optimizer_type"]
        epochs = d["param_dict"]["no_epochs"]
        lr = d["param_dict"]["lr"]
        momentum = d["param_dict"]["momentum"]
        wd = d["param_dict"]["wd"]
        f = d["param_dict"]["LM_f"] # will need to be formatted to string
        c = d["param_dict"]["LM_c"]
        c_running_loss = str(d["param_dict"]["LM_c_run_min"])
        
        # We create a unique key for each experiment.
        key = f"{optimizer}_{epochs}_{lr}_{momentum}_{wd}_{f}_{c}_{c_running_loss}"
        # We assign the corresponding loss values to the key.
        value = d["accuracy_list"]
        # We create a dictionary that will be turned to a dataframe.
        accuracy_dict[key] = value

    accuracy_df = pd.DataFrame(accuracy_dict)

    return accuracy_df


def create_report(list_of_results_dicts):

    columns = ["Optimizer", "Epochs", "Learning rate", "Momentum", "Weight Decay", "LM_f", "LM_C", "C_running_min",\
                "Min Loss", "Last Loss", "Last Average Loss", "Test Accuracy"]

    rows = []

    for d in list_of_results_dicts:

        optimizer = d["param_dict"]["optimizer_type"]
        epochs = d["param_dict"]["no_epochs"]
        lr = d["param_dict"]["lr"]
        momentum = d["param_dict"]["momentum"]
        wd = d["param_dict"]["wd"]
        f = d["param_dict"]["LM_f"] # will need to be formatted to string
        c = d["param_dict"]["LM_c"]
        c_running_loss = str(d["param_dict"]["LM_c_run_min"])
        min_loss = min(d["loss_list"]).item()
        last_loss = d["loss_list"][-1].item()
        last_average_loss = d["average_loss_list"][-1]
        test_accuracy = d["accuracy"]
        rows.append([optimizer, epochs, lr, momentum, wd,\
            f, c, c_running_loss, min_loss, last_loss, last_average_loss, test_accuracy])


    report_df = pd.DataFrame(columns = columns, data = rows)

    return report_df

def create_loss_sheet(list_of_results_dicts):

    loss_dict = {}
    for d in list_of_results_dicts:

        optimizer = d["param_dict"]["optimizer_type"]
        epochs = d["param_dict"]["no_epochs"]
        lr = d["param_dict"]["lr"]
        momentum = d["param_dict"]["momentum"]
        wd = d["param_dict"]["wd"]
        f = d["param_dict"]["LM_f"] # will need to be formatted to string
        c = d["param_dict"]["LM_c"]
        c_running_loss = str(d["param_dict"]["LM_c_run_min"])

        key = f"{optimizer}_{epochs}_{lr}_{momentum}_{wd}_{f}_{c}_{c_running_loss}"
        value = d["loss_list"]

        # turn into list of floats
        map_obj = map(torch.Tensor.item, value)
        value = list(map_obj)

        loss_dict[key] = value

    loss_df = pd.DataFrame(loss_dict)

    return loss_df

def create_c_sheet(list_of_results_dicts):

    c_dict = {}
    for d in list_of_results_dicts:

        optimizer = d["param_dict"]["optimizer_type"]
        epochs = d["param_dict"]["no_epochs"]
        lr = d["param_dict"]["lr"]
        momentum = d["param_dict"]["momentum"]
        wd = d["param_dict"]["wd"]
        f = d["param_dict"]["LM_f"] # will need to be formatted to string
        c = d["param_dict"]["LM_c"]
        c_running_loss = str(d["param_dict"]["LM_c_run_min"])

        key = f"{optimizer}_{epochs}_{lr}_{momentum}_{wd}_{f}_{c}_{c_running_loss}"
        value = d["c_list"]
        
        c_dict[key] = value

    c_df = pd.DataFrame(c_dict)

    return c_df


def create_average_loss_sheet(list_of_results_dicts):

    average_loss_dict = {}
    for d in list_of_results_dicts:

        optimizer = d["param_dict"]["optimizer_type"]
        epochs = d["param_dict"]["no_epochs"]
        lr = d["param_dict"]["lr"]
        momentum = d["param_dict"]["momentum"]
        wd = d["param_dict"]["wd"]
        f = d["param_dict"]["LM_f"] # will need to be formatted to string
        c = d["param_dict"]["LM_c"]
        c_running_loss = str(d["param_dict"]["LM_c_run_min"])

        key = f"{optimizer}_{epochs}_{lr}_{momentum}_{wd}_{f}_{c}_{c_running_loss}"
        value = d["average_loss_list"]

        average_loss_dict[key] = value

    average_loss_df = pd.DataFrame(average_loss_dict)

    return average_loss_df

Next, we load our CIFAR10 data, applying transformations as per https://github.com/timgaripov/swa.

In [23]:
seed = 0
setup_seed(seed)

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

batch_size = 128


trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform_test)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


We define the parameters:

In [30]:
# Use a GPU if available.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

# SGD Parameters
lr = 0.1
momentum = 0.9
wd = 0

# LM parameters
def x(x):
    return x

c = 10**5

# seeds 
seed_list = [0, 10, 100]

# number of epochs
epochs = 150

# model
model = PreResNet()


cpu


In [31]:
final_results = []

for seed in seed_list:
    results_dict_SGD =  compare_SGD_SGD_LM(device, seed, model, trainset, testset, batch_size, epochs, lr, momentum, wd, "Original", x, c, True)
    final_results.append(results_dict_SGD)
    results_dict_SGD_LM = compare_SGD_SGD_LM(device, seed, model, trainset, testset, batch_size, epochs, lr, momentum, wd, "LM", x, c, True)
    final_results.append(results_dict_SGD_LM)


AttributeError: Can't pickle local object 'compare_SGD_SGD_LM.<locals>.seed_worker'

In [None]:
report_df = create_report(final_results)
report_df.to_csv("report.csv")
files.download("report.csv")
loss_df = create_loss_sheet(final_results)
loss_df.to_csv("report1.csv")
files.download("report1.csv")
avg_loss_df = create_average_loss_sheet(final_results)
avg_loss_df.to_csv("report2.csv")
files.download("report2.csv")
c_list = create_c_sheet(final_results)
c_list.to_csv("report3.csv")
files.download("report3.csv")
accuracy_list = create_report_accuracy_list(final_results)
accuracy_list.to_csv("report4.csv")
files.download("report4.csv")