In [None]:
# Various torch packages
import torch
import torch.nn as nn
import torch.nn.functional as F

# torchvision
from torchvision import datasets, transforms

# ------------------------
# get up one directory 
import sys, os
sys.path.append(os.path.abspath('../'))
# ------------------------

# custom packages
import models.aux_funs as maf
import optimizers as op
import regularizers as reg
import train
import math
import utils.configuration as cf
import utils.datasets as ud
from models.fully_connected import fully_connected

# Fix the random seed

In [None]:
random_seed = 2
cf.seed_torch(random_seed)

# Configure the experiment

In [None]:
sparse_init = 0.01
r = [1,0.7/math.sqrt(sparse_init)]

conf_args = {#
    # data specification
    'data_file':"../../Data", 'train_split' : 0.95, 'data_set':"MNIST", 'download':False,
    # cuda
    'use_cuda': True, 'num_workers':4, 'cuda_device':0, 'pin_memory':True, 'train_split':0.95,
    #
    'epochs':100,
    # optimizer
    'delta': 1.0, 'lr': 0.1, 'mu': 1e-3, 'optim':"LinBreg",'beta':0.0,
    # initialization
    'sparse_init':sparse_init, 'r':r,
    # misc
    'random_seed':random_seed, 'eval_acc':True,
}

conf = cf.Conf(**conf_args)

# Initiate the model

In [None]:
model_kwargs = {'mean':conf.data_set_mean, 'std':conf.data_set_std}    

sizes = [784, 200, 80, 10]
act_fun = torch.nn.ReLU()
    
model = fully_connected(sizes, act_fun, **model_kwargs)
best_model = train.best_model(fully_connected(sizes, act_fun, **model_kwargs).to(conf.device))

# Weight initialization

In [None]:
model_kwargs = {'mean':conf.data_set_mean, 'std':conf.data_set_std}    
def init_weights(conf, model):
    # sparsify
    maf.sparse_bias_uniform_(model, 0,conf.r[0])
    maf.sparse_weight_normal_(model, conf.r[1])
    
    maf.sparsify_(model, conf.sparse_init)
    model = model.to(conf.device)
    return model

model = init_weights(conf,model)

# Optimizer

In [None]:
def init_opt(conf, model):
    weights_linear = maf.get_weights_linear(model)
    biases = maf.get_bias(model)

    if conf.optim == "SGD":
        opt = torch.optim.SGD(model.parameters(), lr=conf.lr, momentum=conf.beta)
    elif conf.optim == "LinBreg":
        opt = op.LinBreg([{'params': weights_linear, 'lr' : conf.lr, 'reg' : reg.reg_l1(mu=conf.mu), 'momentum':conf.beta, 'delta':conf.delta},
                          {'params': biases, 'lr': conf.lr, 'momentum':conf.beta}])
    elif conf.optim == "adam":
        opt = op.AdamBreg([{'params': weights_linear, 'lr' : conf.lr, 'reg' : reg.reg_l1(mu=conf.mu)},
                          {'params': biases, 'lr': conf.lr}])
    elif conf.optim == "PSGD":
        opt = op.ProxSGD([{'params': weights_linear, 'lr' : conf.lr, 'reg' : reg.reg_l1(mu=conf.mu)},
                          {'params': biases, 'lr': conf.lr}])
    else:
        raise ValueError("Unknown Optimizer specified")

    # learning rate scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, factor=0.5, patience=5,threshold=0.01)
    
    return opt, scheduler

# Dataset

In [None]:
train_loader, valid_loader, test_loader = ud.get_data_set(conf)

# History and Runs

In [None]:
# Initialize history
tracked = ['loss', 'node_sparse']
train_history = {key: [] for key in tracked}
val_history = {key: [] for key in tracked}

# Initialize runs
runs = cf.run(**{'num_runs':0})

# Training

In [None]:
while runs.step():
    # -----------------------------------------------------------------------------------
    # Reinit weigts and the corresponding optimizer
    # -----------------------------------------------------------------------------------
    model = init_weights(conf, model)
    opt, scheduler = init_opt(conf, model)
    
    # -----------------------------------------------------------------------------------
    # train the model
    # -----------------------------------------------------------------------------------
    for epoch in range(conf.epochs):
        print(25*"<>")
        print(50*"|")
        print(25*"<>")
        print('Epoch:', epoch)

        # ------------------------------------------------------------------------
        # train step, log the accuracy and loss
        # ------------------------------------------------------------------------
        train_data = train.train_step(conf, model, opt, train_loader)

        # update history
        for key in tracked:
            if key in train_data:
                train_history[key].append(train_data[key])        

        # ------------------------------------------------------------------------
        # validation step
        val_data = train.validation_step(conf, model, opt, valid_loader, opt_reg_eval=False)

        # update history
        for key in tracked:
            if key in val_data:
                val_history[key].append(val_data[key])

        for i,reg_val in enumerate(val_data['node_sparse']):
            key = "node_sparse" + str(i)
            if key in val_history:
                val_history[key].append(reg_val)
            else:
                val_history[key] = [reg_val]


        scheduler.step(train_data['loss'])
        print("Learning rate:",opt.param_groups[0]['lr'])
        best_model(train_data['acc'], val_data['acc'], model=model)

        
    # add values to the run history
    runs.add_history(train_history, "train")
    runs.add_history(val_history, "val")
            
    # update random seed
    conf.random_seed += 1
    cf.seed_torch(conf.random_seed)