In [10]:
import matplotlib as mpl

mpl.use('Agg')
import matplotlib.pyplot as plt
from torch import nn
import numpy as np
import torch
import os
import random
from tqdm import tqdm as tqdm
from IPython import display

from models.vgg import VGG_A
from models.vgg import VGG_A_BatchNorm
from data.loaders import get_cifar_loader

### Constants (parameters) initialization

In [11]:
batch_size = 128

# add our package dir to path
module_path = os.path.dirname(os.getcwd())
home_path = module_path
figures_path = os.path.join(home_path, 'reports', 'figures')
models_path = os.path.join(home_path, 'reports', 'models')
# Make sure you are using the right device.
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
print(torch.cuda.get_device_name(0))

cuda
GeForce GTX 1050 Ti


In [12]:
# Initialize your data loader and
# make sure that dataloader works
# as expected by observing one
# sample from it.
train_loader = get_cifar_loader(train=True)
val_loader = get_cifar_loader(train=False)

Files already downloaded and verified
Files already downloaded and verified


In [13]:
# This function is used to calculate the accuracy of model classification
def get_accuracy(dataloader, model, loss_fn):
    ## --------------------
    # Add code as needed
    size = len(dataloader.dataset)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X.to(device)).to(device)
            test_loss += loss_fn(pred, y.to(device)).item()
            correct += (pred.argmax(1) == y.to(device)).type(torch.float).sum().item()

    test_loss /= size
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return test_loss, correct

In [14]:
# Set a random seed to ensure reproducible results
def set_random_seeds(seed_value=0, device='cpu'):
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    random.seed(seed_value)
    if device != 'cpu':
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

In [15]:
# We use this function to complete the entire
# training process. In order to plot the loss landscape,
# you need to record the loss value of each step.
# Of course, as before, you can test your model
# after drawing a training round and save the curve
# to observe the training
def train(model, optimizer, criterion, train_loader, val_loader, scheduler=None, epochs_n=100, best_model_path=None):
    model.to(device)
    learning_curve = [np.nan] * epochs_n
    train_accuracy_curve = [np.nan] * epochs_n
    val_accuracy_curve = [np.nan] * epochs_n
    max_val_accuracy = 0
    max_val_accuracy_epoch = 0

    batches_n = len(train_loader)
    losses_list = []
    for epoch in tqdm(range(epochs_n), unit='epoch'):
        if scheduler is not None:
            scheduler.step()
        model.train()

        loss_list = []  # use this to record the loss value of each step
        grad = []  # use this to record the loss gradient of each step
        learning_curve[epoch] = 0  # maintain this to plot the training curve

        for data in train_loader:
            x, y = data
            x = x.to(device)
            y = y.to(device)
            optimizer.zero_grad()
            prediction = model(x)
            loss = criterion(prediction, y)
            # You may need to record some variable values here
            # if you want to get loss gradient, use
            # grad = model.classifier[4].weight.grad.clone()
            ## --------------------
            # Add your code
            loss_list.append(loss.item())
            # if model.classifier[4].weight.grad is not None:
            #     loss_grad = model.classifier[4].weight.grad.clone().cpu()
            #     grad.append(loss_grad)

            ## --------------------

            loss.backward()
            optimizer.step()

        losses_list.append(loss_list)
        display.clear_output(wait=True)
        f, axes = plt.subplots(1, 2, figsize=(15, 3))

        learning_curve[epoch] /= batches_n
        axes[0].plot(learning_curve)

        # Test your model and save figure here (not required)
        # remember to use model.eval()
        ## --------------------
        # Add code as needed
        model.eval()
        cur_val_loss, cur_val_accuracy = get_accuracy(dataloader=val_loader, model=model, loss_fn=criterion)
        max_val_accuracy = max(max_val_accuracy, cur_val_accuracy)
        if max_val_accuracy == cur_val_accuracy:
            max_val_accuracy_epoch = epoch

        ## --------------------

    losses_list = losses_list + loss_list
    return losses_list


In [16]:
# Train your model
# feel free to modify
epo = 5
loss_save_path = ''
grad_save_path = ''

set_random_seeds(seed_value=2020, device=device)
lr_list = [0.001, 0.002, 0.0001, 0.0005]
criterion = nn.CrossEntropyLoss()
loss = []
grads = []
for idx in range(len(lr_list)):
    model = VGG_A_BatchNorm()
    model._init_weights()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr_list[idx])
    loss_single_lr = train(model, optimizer, criterion, train_loader, val_loader, epochs_n=epo)
    np.savetxt(os.path.join(loss_save_path, 'loss' + str(idx) +'withBN.txt'), loss_single_lr, fmt='%s', delimiter=' ')

for idx in range(len(lr_list)):
    model = VGG_A()
    model._init_weights()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr_list[idx])
    loss_single_lr = train(model, optimizer, criterion, train_loader, val_loader, epochs_n=epo)
    np.savetxt(os.path.join(loss_save_path, 'loss' + str(idx) +'.txt'), loss_single_lr, fmt='%s', delimiter=' ')




100%|██████████| 5/5 [03:35<00:00, 43.10s/epoch]

Test Error: 
 Accuracy: 74.8%, Avg loss: 0.006106 






In [24]:
loss_lr0 = np.loadtxt(f'loss0.txt', delimiter=',')
loss_lr1 = np.loadtxt(f'loss1.txt', delimiter=',')
loss_lr2 = np.loadtxt(f'loss2.txt', delimiter=',')
loss_lr3 = np.loadtxt(f'loss3.txt', delimiter=',')

In [25]:
loss_lr0_bn = np.loadtxt(f'loss0withBN.txt', delimiter=',')
loss_lr1_bn = np.loadtxt(f'loss1withBN.txt', delimiter=',')
loss_lr2_bn = np.loadtxt(f'loss2withBN.txt', delimiter=',')
loss_lr3_bn = np.loadtxt(f'loss3withBN.txt', delimiter=',')

In [32]:
mpl.use('tkagg')

In [34]:
loss_lr0.reshape(-1)

(1955,)

In [42]:
loss_lr0 = loss_lr0.reshape(-1)
loss_lr1 = loss_lr1.reshape(-1)
loss_lr2 = loss_lr2.reshape(-1)
loss_lr3 = loss_lr3.reshape(-1)
loss_lr0_bn = loss_lr0_bn.reshape(-1)
loss_lr1_bn = loss_lr1_bn.reshape(-1)
loss_lr2_bn = loss_lr2_bn.reshape(-1)
loss_lr3_bn = loss_lr3_bn.reshape(-1)

In [43]:
max_curve, min_curve = [], []
max_curve_bn, min_curve_bn = [], []
for i in range(len(loss_lr0)):
    max_curve.append(max(loss_lr0[i], loss_lr1[i], loss_lr2[i], loss_lr3[i]))
    min_curve.append(min(loss_lr0[i], loss_lr1[i], loss_lr2[i], loss_lr3[i]))
    max_curve_bn.append(max(loss_lr0_bn[i], loss_lr1_bn[i], loss_lr2_bn[i], loss_lr3_bn[i]))
    min_curve_bn.append(min(loss_lr0_bn[i], loss_lr1_bn[i], loss_lr2_bn[i], loss_lr3_bn[i]))


In [52]:
plt.fill_between(x=np.arange(0, 1955), y1=max_curve, y2=min_curve, label='without BN')
plt.fill_between(x=np.arange(0, 1955), y1=max_curve_bn, y2=min_curve_bn, label='with BN')
plt.legend()
plt.show()