In [None]:
# Various torch packages
import torch
import torch.nn as nn
import torch.nn.functional as F

# torchvision
from torchvision import datasets, transforms

# ------------------------
# get up one directory 
import sys, os
sys.path.append(os.path.abspath('../'))
# ------------------------

# custom packages
import models.aux_funs as maf
import optimizers as op
import regularizers as reg
import train
import math
import utils.configuration as cf
import utils.datasets as ud
from models.fully_connected import fully_connected

# Fix the random seed

In [None]:
random_seed = 2
cf.seed_torch(random_seed)

# Configure the experiment

In [None]:
sparse_init = 0.01
r = [1,0.7/math.sqrt(sparse_init)]

conf_args = {#
    # data specification
    'data_file':"../../Data", 'train_split':0.95, 'data_set':"MNIST", 'download':False,
    # cuda
    'use_cuda':True, 'num_workers':4, 'cuda_device':0, 'pin_memory':True, 'train_split':0.95,
    #
    'epochs':30,
    # optimizer
    'delta':1.0, 'lr':0.1, 'lamda':1e-3, 'optim':"LinBreg",'beta':0.0,
    # initialization
    'sparse_init':sparse_init, 'r':r,
    # misc
    'random_seed':random_seed, 'eval_acc':True,
}

conf = cf.Conf(**conf_args)

# Initiate the model

In [None]:
model_kwargs = {'mean':conf.data_set_mean, 'std':conf.data_set_std}    

sizes = [784, 200, 80, 10]
act_fun = torch.nn.ReLU()
    
model = fully_connected(sizes, act_fun, **model_kwargs)
best_model = train.best_model(fully_connected(sizes, act_fun, **model_kwargs).to(conf.device))

# Weight initialization

In [None]:
model_kwargs = {'mean':conf.data_set_mean, 'std':conf.data_set_std}    
def init_weights(conf, model):
    # sparsify
    maf.sparse_bias_uniform_(model, 0,conf.r[0])
    maf.sparse_weight_normal_(model, conf.r[1])
    
    maf.sparsify_(model, conf.sparse_init)
    model = model.to(conf.device)
    return model

model = init_weights(conf,model)

# Optimizer

In [None]:
def init_opt(conf, model):
    weights_linear = maf.get_weights_linear(model)
    biases = maf.get_bias(model)

    if conf.optim == "SGD":
        opt = torch.optim.SGD(model.parameters(), lr=conf.lr, momentum=conf.beta)
    elif conf.optim == "LinBreg":
        opt = op.LinBreg([{'params': weights_linear, 'lr' : conf.lr, 'reg' : reg.reg_l1(lamda=conf.lamda), 'momentum':conf.beta, 'delta':conf.delta},
                          {'params': biases, 'lr': conf.lr, 'momentum':conf.beta}])
    elif conf.optim == "adam":
        opt = op.AdaBreg([{'params': weights_linear, 'lr' : conf.lr, 'reg' : reg.reg_l1(lamda=conf.lamda)},
                          {'params': biases, 'lr': conf.lr}])
    elif conf.optim == "ProxSGD":
        opt = op.ProxSGD([{'params': weights_linear, 'lr' : conf.lr, 'reg' : reg.reg_l1(lamda=conf.lamda)},
                          {'params': biases, 'lr': conf.lr}])
    else:
        raise ValueError("Unknown Optimizer specified")

    # learning rate scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, factor=0.5, patience=5,threshold=0.01)
    
    return opt, scheduler

# Dataset

In [None]:
train_loader, valid_loader, test_loader = ud.get_data_set(conf)

# History and Runs

In [None]:
# Initialize history
tracked = ['acc', 'loss', 'linear_sparse', 'reg_vals']

def reset_hist(tracked):
    train_hist = {}
    val_hist = {}
    return train_hist, val_hist

# Initialize runs
params = [
    # LinBreg Runs
    {'optim': 'LinBreg','reps':2, 'lamda': 1e-3, 'random_seed':0, 'label':'LinBreg ($\lambda=1$e-3)'}, # LinBreg, lamda:1e-3
    {'optim': 'LinBreg','reps':2, 'lamda': 1e-1, 'random_seed':0, 'label':'LinBreg ($\lambda=1$e-1)'}, # LinBreg, lamda:1e-1
    # SGD Runs (Equivalent to LinBreg with lamda = 0.0)
    {'optim': 'LinBreg','reps':2, 'lamda': 0.0, 'random_seed':0, 'label':'SGD'}, # SGD
    # ProxGD Runs
    {'optim': 'ProxSGD','reps':2, 'lamda': 1e-4, 'random_seed':0, 'label':'ProxSDG ($\lambda=1$e-4)'}, # ProxSGD, lamda:1e-4
]

runs = cf.run(params)

# Training

In [None]:
while runs.step(conf):
    # -----------------------------------------------------------------------------------
    # Reinit weigts and the corresponding optimizer
    # -----------------------------------------------------------------------------------
    train_hist, val_hist = reset_hist(tracked)
    model = init_weights(conf, model)
    opt, scheduler = init_opt(conf, model)
    
    # -----------------------------------------------------------------------------------
    # train the model
    # -----------------------------------------------------------------------------------
    for epoch in range(conf.epochs):
        print(25*"<>")
        print(50*"|")
        print(25*"<>")
        print('Epoch:', epoch)

        # ------------------------------------------------------------------------
        # train step, log the accuracy and loss
        # ------------------------------------------------------------------------
        train_data = train.train_step(conf, model, opt, train_loader)

        # update history
        for key in tracked:
            if key in train_data:
                var_list = train_hist.setdefault(key, [])
                var_list.append(train_data[key])        

        # ------------------------------------------------------------------------
        # validation step
        val_data = train.validation_step(conf, model, opt, valid_loader)

        # update history
        for key in tracked:
            
            
            if key in val_data:
                var = val_data[key]
                if isinstance(var, list):
                    for i, var_loc in enumerate(var):
                        key_loc = key+"_" + str(i)
                        var_list = val_hist.setdefault(key_loc, [])
                        val_hist[key_loc].append(var_loc)
                else:
                    var_list = val_hist.setdefault(key, [])
                    var_list.append(var)    

        # scheduler step
        scheduler.step(train_data['loss'])
        print("Learning rate:",opt.param_groups[0]['lr'])
        
        # update beset model
        best_model(train_data['acc'], val_data['acc'], model=model)

        
    # add values to the run history
    runs.add_history(train_hist, "train")
    runs.add_history(val_hist, "val")
            
    # update random seed
    cf.seed_torch(conf.random_seed)

# Prepare Data
In this step we average over different runs of the same parameter configuration.

In [None]:
import numpy as np

hist = runs.history
keys = ['train_acc','val_acc','val_reg_vals_0','val_linear_sparse']


hist_idx = 0
for param in params:
    data = {}
    for key in keys:
        if not key in hist[hist_idx]:
            continue
        
        if key == 'train_acc' or key == 'val_acc' or key == 'val_linear_sparse':
            rescale = 100
        else:
            rescale = 1/param['lamda'] if param['lamda'] > 0.0 else 0.0
            
        n = len(hist[hist_idx][key])
        m = param.get('reps',1)
        data_loc = np.zeros(shape=(n,m))
        
        # assign data and save it into local array for mean and average
        for i in range(m):
            var = np.array(hist[hist_idx + i][key])
            data_loc[:,i] = rescale*var
            data[key+"_run_" + str(i)] = rescale*var

        # mean and std of the data
        data[key+"_mean"] = np.mean(data_loc,axis=1)
        data[key+"_std"] = np.std(data_loc,axis=1)
        
        param['result'] = data
        
        # update the history index
    hist_idx += m

# Setup plots and appearance

In [None]:
import matplotlib
import matplotlib.pyplot as plt

plt.style.use('default')
plt.style.use('ggplot')
matplotlib.rcParams['mathtext.fontset'] = 'cm'
matplotlib.rcParams['font.family'] = 'STIXGeneral'
matplotlib.rcParams['font.size']=8
matplotlib.rcParams['lines.linewidth'] = 1
matplotlib.rcParams['lines.markersize'] = 2
matplotlib.rcParams['text.color'] = 'black'

In [None]:
def plot_training_stats(ax, keys, data, label='', color='k',alpha=1.0, alpha_fill=0.2):
    for i in range(len(keys)):
        if not (keys[i]+'_mean') in data:
            continue
        # --------------------------------
        var_mean = data[keys[i]+'_mean']
        var_std = data[keys[i]+'_std']
        # --------------------------------
        epochs = np.arange(len(var_mean))
        ax[i].plot(epochs,var_mean, label=label, color=color,alpha=alpha)
        ax[i].fill_between(epochs, var_mean - var_std, var_mean + var_std, color=color, alpha=alpha_fill)

# Colors

In [None]:
cmp = matplotlib.cm.get_cmap(name='Accent')
colors = [
    cmp(0.7), #
    cmp(0.4), #
    cmp(0.0), #
    cmp(0.2), #
    cmp(0.8), #
    cmp(0.8), #
    cmp(0.3), #
]

for i, param in enumerate(params):
    param['color'] = colors[i]
    param.setdefault('label', param['optim'])

# Final Plot

In [None]:
fig, ax = plt.subplots(2,2)
ax = np.ravel(ax)

for param in params:
    plot_training_stats(ax, keys, param['result'], color = param['color'], label = param['label'])

# Specify axes
## Train Acc
ax[0].set_ylabel('Train Accuracy [%]')
ax[0].set_xlabel('Epoch')
ax[0].set_ylim(85, 101)
## Validation Acc
ax[1].set_ylabel('Validation Accuracy [%]')
ax[1].set_xlabel('Epoch')
## L1-Norm
ax[2].set_ylabel('$\ell_1$-Norm')
ax[2].set_xlabel('Epoch')
## Sparsity
ax[3].set_ylabel('Non-Zero Entries [%]')
ax[3].set_xlabel('Epoch');

# Legend
handles, labels = ax[0].get_legend_handles_labels()
ax[0].legend(handles, labels, loc='best',frameon=1,prop={'size': 7}, ncol = 1)

# Adjust size
width = 5.50107/0.8
height = 8.02778/(2.0)
fig.set_size_inches(width, height)