# VGG_beta_smooth

In [1]:
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
from torch import nn
import numpy as np
import torch
import os
import random
from tqdm import tqdm as tqdm
from IPython import display

from model.vgg import VGG_A
from model.vgg_batchnorm import VGG_A_BatchNorm
from torchvision import datasets, transforms

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import os
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="1,2"

In [3]:
# train and evaluation function

# Constants (parameters) initialization
# device_id = [0,1,2,3]
num_workers = 4
batch_size = 64

# add our package dir to path 
module_path = os.path.dirname(os.getcwd())
home_path = module_path
figures_path = os.path.join(home_path, 'reports', 'figures')
models_path = os.path.join(home_path, 'reports', 'models')

# Make sure you are using the right device.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)


transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


# # load dataset from cifar10
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

validset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
val_loader = torch.utils.data.DataLoader(validset, batch_size=64, shuffle=False, num_workers=2)

testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)



# This function is used to calculate the accuracy of model classification
def get_accuracy(pred,y):
    return pred.eq(y.view_as(pred)).sum().item()
    
# train function
def train(model, optimizer, criterion, train_loader, val_loader, scheduler=None, epochs_n=100, best_model_path=None):
    model.to(device)
    learning_curve = [0] * epochs_n
    train_accuracy_curve = [0] * epochs_n
    val_accuracy_curve = [0] * epochs_n
    max_val_accuracy = 0
    max_val_accuracy_epoch = 0

    batches_n = len(train_loader)
    losses_list = []
    grads = []
    for epoch in tqdm(range(epochs_n), unit='epoch'):
        if scheduler is not None:
            scheduler.step()
        model.train()

        loss_list = []  # use this to record the loss value of each step
        grad = []  # use this to record the loss gradient of each step
        learning_curve[epoch] = 0  # maintain this to plot the training curve

        for data in train_loader:
            x, y = data
            x = x.to(device)
            y = y.to(device)
            optimizer.zero_grad()
            
            prediction = model(x)
            loss = criterion(prediction, y)
            # You may need to record some variable values here
            # if you want to get loss gradient, use
            # grad = model.classifier[4].weight.grad.clone()
            loss_list.append(loss.item())
            loss.backward()
            
            temp = model.classifier[4].weight.grad.clone()
            # print(temp)
            grad.append(temp)
            
            pred = prediction.argmax(dim = 1)

            
            optimizer.step()

        losses_list.append(loss_list)
        grads.append(grad)
        display.clear_output(wait=True)
        #f, axes = plt.subplots(1, 2, figsize=(15, 3))

        #learning_curve[epoch] /= batches_n
        #axes[0].plot(learning_curve)

        # Test your model and save figure here (not required)
        # remember to use model.eval()
        model.eval()
        batches_n = len(val_loader.dataset)
        for data in val_loader:
            x, y = data
            x = x.to(device)
            y = y.to(device)
            optimizer.zero_grad()
            prediction = model(x)
            loss = criterion(prediction, y)
            pred = prediction.argmax(dim = 1)
            val_accuracy_curve[epoch] += get_accuracy(pred,y)
            
        val_accuracy_curve[epoch]  = val_accuracy_curve[epoch] /batches_n
        if max_val_accuracy < val_accuracy_curve[epoch]:
            max_val_accuracy = val_accuracy_curve[epoch]
            max_val_accuracy_epoch = epoch
        
        print("epoch:{}, valid accuracy:{}, max valid accuracy:{}, max valid accuracy epoch:{}".format(epoch, val_accuracy_curve[epoch], max_val_accuracy,max_val_accuracy_epoch))
    

    return losses_list, grads, val_accuracy_curve

# change file save path
epo = 20
loss_save_path = ''
grad_save_path = ''

cuda
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [4]:
def l2_dist(grad,lr):
    r = []
    l = len(grad)
    for i in range(l-1):
        g1 = grad[i].cpu().numpy()
        g2 = grad[i+1].cpu().numpy()
        g_norm = np.linalg.norm(g2-g1)/(lr)
        r.append(g_norm)
    return r

def VGG_Grad_Pred(VGG_A_grads,lr):
    r = []
    l = len(VGG_A_grads)
    for i in range(l):
        temp = l2_dist(VGG_A_grads[i],lr)
        r.append(temp)
    return r

In [6]:
print('----First model----'+'\n')

torch.cuda.empty_cache()

lr_list = [1e-3, 2e-3, 1e-4, 5e-4]
loss_list = []
grad_list = []
for lr in lr_list:
    model = VGG_A()
    optimizer = torch.optim.Adam(model.parameters(), lr = lr)
    criterion = nn.CrossEntropyLoss()
    VGG_A_loss, VGG_A_grads, val_accuracy_curve = train(model, optimizer, criterion, train_loader, val_loader, epochs_n=epo)
    loss_list.append(VGG_A_loss)
    
    grads_l2_dist = VGG_Grad_Pred(VGG_A_grads,lr)
    grad_list.append(grads_l2_dist)

min_curve = []
max_curve = []

for epoch in range(epo):
    eplen = len(grad_list[0][epoch])
    for ele in range(eplen):
        max_loss = max(grad_list[0][epoch][ele],grad_list[1][epoch][ele],grad_list[2][epoch][ele],grad_list[3][epoch][ele])
        max_curve.append(max_loss)
        min_loss = min(grad_list[0][epoch][ele],grad_list[1][epoch][ele],grad_list[2][epoch][ele],grad_list[3][epoch][ele])
        min_curve.append(min_loss)



100%|██████████| 20/20 [05:25<00:00, 16.25s/epoch]

epoch:19, valid accuracy:0.7754, max valid accuracy:0.7761, max valid accuracy epoch:10





In [7]:
print('----Next model----'+'\n')

lr_list = [1e-3, 2e-3, 1e-4, 5e-4]
loss_list = []
grad_list = []
for lr in lr_list:
    model = VGG_A_BatchNorm()
    optimizer = torch.optim.Adam(model.parameters(), lr = lr)
    criterion = nn.CrossEntropyLoss()
    VGG_A_loss, VGG_A_grads, val_accuracy_curve = train(model, optimizer, criterion, train_loader, val_loader, epochs_n=epo)
    loss_list.append(VGG_A_loss)
    
    grads_l2_dist = VGG_Grad_Pred(VGG_A_grads,lr)
    grad_list.append(grads_l2_dist)


min_curve_BN = []
max_curve_BN = []
for epoch in range(epo):
    eplen = len(grad_list[0][epoch])
    for ele in range(eplen):
        max_loss = max(grad_list[0][epoch][ele],grad_list[1][epoch][ele],grad_list[2][epoch][ele],grad_list[3][epoch][ele])
        max_curve_BN.append(max_loss)
        min_loss = min(grad_list[0][epoch][ele],grad_list[1][epoch][ele],grad_list[2][epoch][ele],grad_list[3][epoch][ele])
        min_curve_BN.append(min_loss)

100%|██████████| 20/20 [05:43<00:00, 17.15s/epoch]

epoch:19, valid accuracy:0.8203, max valid accuracy:0.8325, max valid accuracy epoch:16





In [8]:
def write_file(ls,fname):
    f = open(fname, "w",encoding='UTF-8')
    i = 0
    for ele in ls:
        i = i+1
        f.write(str(ele)+'\t')
        if i % 100 == 0:
            f.write('\n')
    f.close()
    
write_file(min_curve_BN,'min_curve2_BN.txt')
write_file(max_curve_BN,'max_curve2_BN.txt')
write_file(min_curve,'min_curve2.txt')
write_file(max_curve,'max_curve2.txt')

## plot function

In [11]:
def plot_loss_landscape(min_curve_BN, max_curve_BN, min_curve, max_curve):
    plt.clf()
    x = list(range(len(min_curve)))
    x = np.array(x) 
    x_bn=list(range(len(min_curve_BN)))
    min_curve_BN = np.array(min_curve_BN)[:len(min_curve)] 
    max_curve_BN = np.array(max_curve_BN)[:len(min_curve)]
    min_curve = np.array(min_curve) 
    max_curve = np.array(max_curve) 
    
    # ax1 = plt.subplot(1, 2, 1, frameon = False)
    plt.plot(x, min_curve, color = 'blue',alpha=0.7)
    plt.plot(x, max_curve, color = 'blue',alpha=0.7)
    p1 = plt.fill_between(x, min_curve, max_curve, facecolor="blue", alpha=0.3)
    plt.title('Standard VGG')
    plt.ylabel('beta-smoothness')
    plt.xlabel('Step')
    
    # ax2 = plt.subplot(1, 2, 2, frameon = False)
    plt.plot(x, min_curve_BN, color = 'red',alpha=0.7)
    plt.plot(x, max_curve_BN, color = 'red',alpha=0.7)
    p2 = plt.fill_between(x, min_curve_BN, max_curve_BN, facecolor="red", alpha=0.3)
    
    
    l1 = plt.legend([p1, p2], ["VGG_A", "VGG_A_BatchNorm"], loc='upper right')
    plt.gca().add_artist(l1)
    
    plt.title('beta-smoothness')
    
    # plt.ylim((0, 6))
    plt.savefig("beta-smooth.jpg")

def ReadFile(address):
    f = open(address, encoding='UTF-8')
    line = f.readline()
    ls = []
    while line:
        line_ = line.replace('\n','')
        line_ = line_.split('\t')
        line_ = line_[:-1]
        line_ = list(map(float,line_))
        ls = ls + line_
        line = f.readline()
    f.close()
    return ls


min_curve_BN = ReadFile('min_curve2_BN.txt') 
max_curve_BN = ReadFile('max_curve2_BN.txt')
min_curve = ReadFile('min_curve2.txt')
max_curve = ReadFile('max_curve2.txt')
plot_loss_landscape(min_curve_BN, max_curve_BN, min_curve, max_curve)