In [None]:
from six.moves import cPickle
import os
import matplotlib.pyplot as plt
import numpy as np
import importlib
import utils as ut
import functions as fu
from tqdm import tqdm

In [None]:
K, d, n = 10, 3072, 10000
np.random.seed(42)
mu, sigma = 0, 0.01 
batch_start, batch_end= 0, 20

# Explore cifar-10 dataset 

In [None]:
importlib.reload(ut)
X, y, _ = ut.loadData('data_batch_1', reshape=True, clipping=False)
ut.plotCifar(X, y)

# Exercise 1: Trianing a multi-linear classifier

## Data Preprocessing and Loading 

In [None]:
importlib.reload(ut)
X_train, y_train, Y_train = ut.loadData('data_batch_1', clipping=True)
X_val, y_val, Y_val = ut.loadData('data_batch_2', clipping=True)
X_test, y_test, Y_test = ut.loadData('data_batch_3', clipping=True)

## normalize with mean and std of train set 
mean_X = np.mean(X_train, axis=1)
std_X = np.std(X_train, axis=1)

X_train -= np.outer(mean_X, np.ones(X_train.shape[1]))
X_train /= np.outer(std_X, np.ones(X_train.shape[1]))

X_val -= np.outer(mean_X, np.ones(X_val.shape[1]))
X_val /= np.outer(std_X, np.ones(X_val.shape[1]))

X_test -= np.outer(mean_X, np.ones(X_test.shape[1]))
X_test /= np.outer(std_X, np.ones(X_test.shape[1]))

In [None]:
X_train.shape

In [None]:
Y_train.shape

In [None]:
mean_X.shape

## Initialize parameter

In [None]:
W = np.random.normal(mu, sigma, (K,d))
b = np.random.normal(mu, sigma, (K,1))

In [None]:
importlib.reload(ut)
P = ut.EvaluateClassifier(X_train[:, :100], W, b)
print(P.shape)
print(np.argmax(P, axis=0))

In [None]:
np.sum(np.multiply(Y_train[:,:100], P), axis=0).shape

In [None]:
importlib.reload(ut)
cost = ut.ComputeCost(X_train[:,:100], Y_train[:, :100], W , b, 1e-2)
cost

In [None]:
acc = ut.ComputeAccuracy(X_train[:, :100], y_train[:100], W, b)
acc

In [None]:
importlib.reload(ut)
_lambda = 1e-2
P = ut.EvaluateClassifier(X_train[:, batch_start:batch_end], W, b)
ga_w, ga_b = ut.ComputeGradients(X_train[:, batch_start:batch_end], Y_train[:, batch_start:batch_end], P, W, _lambda)
print(ga_w.shape, ga_b.shape)

In [None]:
importlib.reload(fu)
gn_w, gn_b = fu.ComputeGradsNumSlow(X_train[:, batch_start:batch_end], Y_train[:, batch_start:batch_end], W, b, _lambda, 1e-6)

In [None]:
np.mean(gn_w - ga_w)

In [None]:
importlib.reload(ut)
np.mean(ut.compare_gradients(ga_w, gn_w, eps=1e-4))

In [None]:
np.mean(ut.compare_gradients(ga_b.reshape(-1,1), gn_b.reshape(-1,1), eps=1e-4))

## MiniBatch Train

In [None]:
importlib.reload(ut)

W = np.random.normal(mu, sigma, (K,d))
b = np.random.normal(mu, sigma, (K,1))
GDparams = {"n_batch":100, "n_epochs":40, "eta":1e-3, "lambda":0}
W, b, train_loss, val_loss, train_acc, val_acc = ut.minibatchGD(X_train, Y_train, y_train,  X_val, Y_val, y_val, GDparams, W, b, verbose=True)

In [None]:
importlib.reload(ut)
ut.montage(W, GDparams)
ut.plot_metric(train_loss, val_loss, GDparams, type="loss")
ut.plot_metric(train_acc, val_acc, GDparams, type="accuracy")

## Experiments

In [None]:
importlib.reload(ut)

GDparams = [{"lambda":0, "n_epochs":40, "n_batch":100, "eta":.1}, {"lambda":0, "n_epochs":40, "n_batch":100, "eta":.001}, 
          {"lambda":.1, "n_epochs":40, "n_batch":100, "eta":.001}, {"lambda":1, "n_epochs":40, "n_batch":100, "eta":.001}]

np.random.seed(42)
seeds = np.random.randint(0, 100, 5)
stats = {i:{"val_loss":[], "train_loss":[], "val_acc":[], "train_acc":[]}for i in range(4)}

for i, GDparam in enumerate(GDparams): 
    for seed in seeds:
        np.random.seed(seed)
        W = np.random.normal(mu, sigma, (K,d))
        b = np.random.normal(mu, sigma, (K,1))
        W, b, train_loss, val_loss, train_acc, val_acc = ut.minibatchGD(X_train, Y_train, y_train,  X_val, Y_val, y_val, GDparam, W, b, verbose=False)

        stats[i]["train_loss"].append(train_loss[-1])
        stats[i]["val_loss"].append(val_loss[-1])
        stats[i]["train_acc"].append(train_acc[-1])
        stats[i]["val_acc"].append(val_acc[-1])
    
    ut.montage(W, GDparams)
    ut.plot_metric(train_loss, val_loss, GDparams, type="loss")
    ut.plot_metric(train_acc, val_acc, GDparams, type="accuracy")
np.save("History/stats.npy", stats)

In [None]:
for i in range(4):
    stats[i]['stats_val_acc'] = [round(np.mean(stats[i]['val_acc']),5), round(np.std(stats[i]['val_acc']),5)]
    stats[i]['stats_train_acc'] = [round(np.mean(stats[i]['train_acc']),5), round(np.std(stats[i]['train_acc']),5)]
    stats[i]['stats_val_loss'] = [round(np.mean(stats[i]['val_loss']),5), round(np.std(stats[i]['val_loss']),5)]
    stats[i]['stats_train_loss'] = [round(np.mean(stats[i]['train_loss']),5), round(np.std(stats[i]['train_loss']),5)]

    print(f"Conf {i} -> val_acc: {stats[i]['stats_val_acc']} train_acc: {stats[i]['stats_train_acc']} val_loss: {stats[i]['stats_val_loss']}train_loss: {stats[i]['stats_train_loss']}")

np.save("History/stats.npy", stats)

# Bonus
## Early Stopping 

In [None]:
importlib.reload(ut)
setting = {"lambda":0.1, "n_epochs":200, "n_batch":100, "eta":.01}
np.random.seed(42)
mu, sigma = 0, 0.01
W = np.random.normal(mu, sigma, (K,d))
b = np.random.normal(mu, sigma, (K,1))
W, b, train_loss, val_loss, train_acc, val_acc = ut.minibatchGD(X_train, Y_train, y_train,  X_val, Y_val, y_val, setting, W, b, verbose=True, patience=5)

In [None]:
ut.montage(W, setting)
ut.plot_metric(train_loss, val_loss, setting, type="loss")
ut.plot_metric(train_acc, val_acc, setting, type="accuracy")

In [None]:
val_acc[-1]

## Xavier Initialization

In [None]:
importlib.reload(ut)
setting = {"lambda":.1, "n_epochs":40, "n_batch":50, "eta":.001}

np.random.seed(42)
seeds = np.random.randint(0, 100, 5)
stats_xavier = {"val_loss":[], "train_loss":[], "val_acc":[], "train_acc":[]}

for seed in seeds:
    np.random.seed(seed)
    mu, sigma = 0, 1/np.sqrt(d)
    W = np.random.normal(mu, sigma, (K,d))
    b = np.random.normal(mu, sigma, (K,1))
    W, b, train_loss, val_loss, train_acc, val_acc = ut.minibatchGD(X_train, Y_train, y_train,  X_val, Y_val, y_val, setting, W, b, verbose=False)

    stats_xavier["train_loss"].append(train_loss[-1])
    stats_xavier["val_loss"].append(val_loss[-1])
    stats_xavier["train_acc"].append(train_acc[-1])
    stats_xavier["val_acc"].append(val_acc[-1])


stats_xavier['stats_val_acc'] = [round(np.mean(stats_xavier['val_acc']),5), round(np.std(stats_xavier['val_acc']),5)]
stats_xavier['stats_train_acc'] = [round(np.mean(stats_xavier['train_acc']),5), round(np.std(stats_xavier['train_acc']),5)]
stats_xavier['stats_val_loss'] = [round(np.mean(stats_xavier['val_loss']),5), round(np.std(stats_xavier['val_loss']),5)]
stats_xavier['stats_train_loss'] = [round(np.mean(stats_xavier['train_loss']),5), round(np.std(stats_xavier['train_loss']),5)]

In [None]:

print(f"Conf -> val_acc: {stats_xavier['stats_val_acc']} train_acc: {stats_xavier['stats_train_acc']} val_loss: {stats_xavier['stats_val_loss']}train_loss: {stats_xavier['stats_train_loss']}")

np.save('History/stats_xavier.npy', stats_xavier)

In [None]:
ut.montage(W, setting)
ut.plot_metric(train_loss, val_loss, setting, type="loss")
ut.plot_metric(train_acc, val_acc, setting, type="accuracy")

## Learning rate Annealing 

In [None]:
importlib.reload(ut)
setting = {"lambda":.1, "n_epochs":100, "n_batch":50, "eta":.01, "eta_decay":0.9 , 'eta_decay_freq':10}
mu, sigma = 0, 0.01
W = np.random.normal(mu, sigma, (K,d))
b = np.random.normal(mu, sigma, (K,1))
W, b, train_loss, val_loss, train_acc, val_acc = ut.minibatchGD(X_train, Y_train, y_train,  X_val, Y_val, y_val, setting, W, b, verbose=True, annealing=True)

In [None]:
ut.montage(W, setting)
ut.plot_metric(train_loss, val_loss, setting, type="loss")
ut.plot_metric(train_acc, val_acc, setting, type="accuracy")

## Shuffle the order 

In [None]:
importlib.reload(ut)
setting = {"lambda":.1, "n_epochs":40, "n_batch":50, "eta":.001}

np.random.seed(42)
seeds = np.random.randint(0, 100, 5)
stats_shuffle = {"val_loss":[], "train_loss":[], "val_acc":[], "train_acc":[]}

for seed in seeds:
    np.random.seed(seed)
    mu, sigma = 0, 0.01 
    W = np.random.normal(mu, sigma, (K,d))
    b = np.random.normal(mu, sigma, (K,1))
    W, b, train_loss, val_loss = ut.minibatchGD(X_train, Y_train, y_train,  X_val, Y_val, y_val, setting, W, b, verbose=True, patience=2, reorder=True)

    stats_shuffle["train_loss"].append(train_loss[-1])
    stats_shuffle["val_loss"].append(val_loss[-1])
    stats_shuffle["train_acc"].append(train_acc[-1])
    stats_shuffle["val_acc"].append(val_acc[-1])


stats_shuffle['stats_val_acc'] = [round(np.mean(stats_shuffle['val_acc']),5), round(np.std(stats_shuffle['val_acc']),5)]
stats_shuffle['stats_train_acc'] = [round(np.mean(stats_shuffle['train_acc']),5), round(np.std(stats_shuffle['train_acc']),5)]
stats_shuffle['stats_val_loss'] = [round(np.mean(stats_shuffle['val_loss']),5), round(np.std(stats_shuffle['val_loss']),5)]
stats_shuffle['stats_train_loss'] = [round(np.mean(stats_shuffle['train_loss']),5), round(np.std(stats_shuffle['train_loss']),5)]

print(f"Conf {i} -> val_acc: {stats_shuffle['stats_val_acc']} train_acc: {stats_shuffle['stats_train_acc']} val_loss: {stats_shuffle['stats_val_loss']}train_loss: {stats_shuffle['stats_train_loss']}")

np.save('History/shuffle_stats.npy')

In [None]:
ut.montage(W, setting)
ut.plot_metric(train_loss, val_loss, setting, type="loss")
ut.plot_metric(train_acc, val_acc, setting, type="accuracy")

## TODOs

* Use all the available training data for training (all five batches minus a small
subset of the training images for a validation set). Decrease the size of the
validation set down to around 1000.
* Train for a longer time and use your validation set to make sure you don't
overfit or to keep a record of the best model before you begin to overfit.
* Play around with decaying the learning rate by a factor around :9 after each epoch.
Or you can decay the learning rate by a factor of 10 after every nth epoch.
* Shuffle the order of your training examples at the beginning of every epoch.

## Bonus SVM