In [1]:
#@title Imports
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils, models
import matplotlib.pyplot as plt
import glob
import math
from pathlib import *
import re
import random
import copy
from tqdm.notebook import tqdm, trange
import gc
import datetime
from torch_lr_finder import LRFinder
from torchvision import datasets, transforms

  from tqdm.autonotebook import tqdm


In [2]:
seed_value=42
np.random.seed(seed_value) # cpu vars
random.seed(seed_value) # Python
os.environ['PYTHONHASHSEED'] = str(seed_value)
os.environ['CUBLAS_WORKSPACE_CONFIG']=":4096:8"
torch.manual_seed(seed_value) # cpu  vars
torch.use_deterministic_algorithms(True,warn_only=True)
torch.cuda.manual_seed_all(seed_value) # gpu vars
torch.backends.cudnn.deterministic = True  #needed
torch.backends.cudnn.benchmark = False

## Standalone VGG11 accuracy

This code was employed to check the accuracy of VGG11 in the CIFAR-10 dataset. In order to initialize the weights at random or inherit the weights pretrained on ImageNet, please change the following variable correspondingly.

In [3]:
pretrained = False
n_tests = 32

In [4]:
#@title Free GPU Cache function
def free_gpu_cache():
# This function is used to clear the GPU cache and avoid memory problems when dealing with large populations and big models
    gc.collect()
    torch.cuda.ipc_collect()
    torch.cuda.empty_cache()
    torch.cuda.synchronize()

def trainloop(model, epochs = 10000, lr = 1e-4, patience = 8):
    model.to(device)
    optimizer = optim.AdamW(model.parameters(), lr)
    
    model.train()
    training_loss = []
    validation_loss = []
    overfit = 0
    best_loss = np.inf
    with tqdm(total = epochs, leave = False, desc='Training model') as pbar1:
        for epoch in range(epochs):
            running_loss = 0.0
            with tqdm(total=len(train_loader), leave = False, desc='Epoch progress') as pbar2:
                for inputs, labels in train_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    # zero the parameter gradients
                    optimizer.zero_grad()
                    # forward + backward + optimize
                    outputs = model(inputs).squeeze()
                    loss = criterion(outputs, labels)
                    #print(loss)
                    loss.backward()
                    optimizer.step()
                    
                    running_loss += loss.item()
                    pbar2.update(1)
                pbar2.update(np.inf)
            training_loss.append(running_loss/len(train_loader))

            if val_loader:
                model.eval()
                running_loss = 0.
                with tqdm(total=len(val_loader), leave = False, desc='Calculating validation loss') as pbar3:
                    with torch.no_grad():
                        for inputs, labels in val_loader:
                            inputs, labels = inputs.to(device), labels.to(device)
                            outputs = model.forward(inputs)
                            loss = criterion(outputs, labels)
                            running_loss += loss.detach().cpu().numpy()
                            pbar3.update(1)
                        pbar3.update(np.inf)
                validation_loss.append(running_loss/len(val_loader))
        
                if patience:
                    if validation_loss[-1] < best_loss:
                        best_loss = validation_loss[-1]
                        overfit = 0
                        torch.save(model.state_dict(), "training_model.pt")
                    else:
                        overfit += 1

                    if overfit >= patience/2:
                        optimizer = optim.AdamW(model.parameters(), optimizer.defaults["lr"]/2)
                    
                    if overfit >= patience: # If the nectar decreases for an entire cycle we assume overfitting
                        pbar1.update(epochs)
                        break
            pbar1.update(1)
        pbar1.update(np.inf)
    model.to("cpu")
    if patience:
        model.load_state_dict(torch.load("training_model.pt"))
    
    del optimizer, loss, inputs, labels, outputs
    free_gpu_cache()

def accmetric(model, dataloader):
    model.to(device)
    model.eval()
    
    accuracy = 0
    # Turn off gradients for validation, saves memory and computations
    with tqdm(total=len(dataloader), leave = False, desc='Calculating accuracy') as pbar:
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                logprobs = model.forward(inputs)
                top_p, top_class = logprobs.topk(1, dim=1)
                equals = (top_class == labels.view(inputs.shape[0], 1))
                accuracy += torch.mean(equals.type(torch.FloatTensor))
                pbar.update(1)
    model.to("cpu")
    free_gpu_cache()
    return accuracy.detach().numpy()/len(dataloader)

In [5]:
criterion = nn.NLLLoss()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.45,), (0.25,))])

# Fetch the test data
testset = datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)
test_loader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

train_accs = []
val_accs = []
test_accs = []
for test in range(n_tests):    
    # Load the training data
    trainset = datasets.CIFAR10(root='./data', train=True, download=False, transform=transform)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
    val_loader = copy.deepcopy(train_loader)  # Creates a copy of the object 

    idx = [i for i in range(len(train_loader.dataset.targets))]
    random.shuffle(idx)
    
    #We take the first 45k images for training
    train_loader.dataset.data = train_loader.dataset.data[idx[:45000],:,:]
    train_loader.dataset.targets = [train_loader.dataset.targets[lbl] for lbl in idx[:45000]]
    
    #And the rest for validation
    val_loader.dataset.data = val_loader.dataset.data[idx[45000:],:,:]
    val_loader.dataset.targets = [val_loader.dataset.targets[lbl] for lbl in idx[45000:]]
    
    model = torch.hub.load('pytorch/vision:v0.10.0', "vgg11", pretrained=True)
    model.classifier = nn.Sequential(*[*model.classifier.children()][:-1], nn.Linear(in_features=4096, out_features=10, bias=True), nn.LogSoftmax(dim=1))

    if not pretrained:
        for index in range(len(model.features)):
            if type(model.features[index]) == torch.nn.modules.conv.Conv2d:
                nn.init.xavier_uniform_(model.features[index].weight.data, gain=nn.init.calculate_gain('relu'))
        for index in range(len(model.classifier)):
            if type(model.features[index]) == torch.nn.modules.linear.Linear:
                nn.init.xavier_uniform_(model.classifier[index].weight.data, gain=nn.init.calculate_gain('relu'))
    
    trainloop(model, epochs = 10000, lr = 1e-4, patience = 8)

    train_accs = train_accs + [accmetric(model, train_loader)]
    val_accs = val_accs + [accmetric(model, val_loader)]
    test_accs = test_accs + [accmetric(model, test_loader)]


In [6]:
print("train_accs", 100*np.mean(train_accs), "±", np.std(100*np.array(train_accs)), "( max", 100*np.max(train_accs),")")
print("val_accs  ", 100*np.mean(val_accs), "±", np.std(100*np.array(val_accs)), " ( max", 100*np.max(val_accs),")")
print("test_accs ", 100*np.mean(test_accs), "±", np.std(100*np.array(test_accs)), "( max", 100*np.max(test_accs),")")

train_accs 85.42730158025567 ± 3.4289264229665015 ( max 91.77689985795455 )
val_accs   70.75751582278481 ± 0.975356895819105  ( max 72.46835443037975 )
test_accs  70.56006170382165 ± 0.8466334835371856 ( max 71.984474522293 )
