In [10]:
# Various torch packages
import torch
import torch.nn as nn
import torch.nn.functional as F

# torchvision
from torchvision import datasets, transforms

# ------------------------
# get up one directory 
import sys, os
sys.path.append(os.path.abspath('../'))
# ------------------------

# custom packages
import models.aux_funs as maf
import optimizers as op
import regularizers as reg
import train
import math
import utils.configuration as cf
import utils.datasets as ud
from train import best_model
from utils.datasets import get_data_set, GaussianSmoothing
from models.fully_connected import fully_connected

In [11]:
# -----------------------------------------------------------------------------------
# Fix random seed
# -----------------------------------------------------------------------------------
random_seed = 2
cf.seed_torch(random_seed)

# Parameters

In [12]:
conf_args = {#
    # data specification
    'data_file':"../../Data", 'train_split':0.95, 'data_set':"Fashion-MNIST", 'download':False,
    # cuda
    'use_cuda':False, 'num_workers':0, 'cuda_device':0, 'pin_memory':True,
    #
    'epochs':100,
    # optimizer
    'delta':1.0, 'lr':0.001, 'lamda_0':0.05, 'lamda_1':0.05, 'optim':"AdaBreg", 'row_group':True,
    'reg':reg.reg_l1_l2, 'beta':0.0,
    # model
    'model_size':7*[28*28], 'act_fun':torch.nn.ReLU(),
    # initialization
    'sparse_init':0.03, 'r':[1,5,1],
    # misc
    'random_seed':random_seed, 'eval_acc':True, 'name':'---', 'super_type':'---'
}
conf = cf.Conf(**conf_args)

# Define DenseNet model

In [13]:
class BasicBlock(nn.Module):
    def __init__(self, in_planes, out_planes):
        super(BasicBlock, self).__init__()
        self.relu = nn.ReLU(inplace=True)
        
        
        self.out_planes = out_planes
        self.in_planes = in_planes
        
        self.bn1 = nn.BatchNorm2d(out_planes)
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=1,padding=1)
        s = torch.zeros((self.in_planes,))              
        
    def forward(self, x):
        out = self.relu(self.conv1(x))

        return torch.cat([x, out], 1)
    
class DenseBlock(nn.Module):
    def __init__(self, num_layers, in_planes, out_planes):
        super(DenseBlock, self).__init__()
        
        layers = []
        for i in range(num_layers):
            layers.append(BasicBlock(in_planes + i*out_planes, out_planes))
        
        self.layer = nn.Sequential(*layers)

    def forward(self, x):
        return self.layer(x)

class LinearBlock(nn.Module):
    def __init__(self, in_size, out_size):
        super(LinearBlock, self).__init__()
        self.linear = nn.Linear(in_size, out_size)
        self.act = nn.ReLU()
    
    def forward(self, x):
        x = nn.Flatten()(x[:,-1,:])
        return self.act(self.linear(x))

class DenseNet(nn.Module):
    def __init__(self, depth, planes, num_classes, mean = 0.0, std = 1.0, im_channels = 3, im_size=32):
        super(DenseNet, self).__init__()
        self.mean = mean
        self.std = std
        
        self.depth = depth
        self.planes = planes
        self.num_classes = num_classes
        
        self.conv1 = nn.Conv2d(im_channels, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.block1 = DenseBlock(depth, planes, planes)
        
        self.trans1 = BasicBlock((depth + 1) * planes, 1)
        self.fc = LinearBlock(im_size*im_size, num_classes)
    
    def forward(self, x):
        out = (x-self.mean)/self.std
        out = self.conv1(out)
        out = self.block1(out)
        out = self.trans1(out)
        return self.fc(out)     

In [17]:
# -----------------------------------------------------------------------------------
# define the model and an instance of the best model class
# -----------------------------------------------------------------------------------
model_kwargs = {'mean':conf.data_set_mean, 'std':conf.data_set_std, 'im_size':conf.im_shape[1],
                'im_channels':conf.im_shape[0]}    

model = DenseNet(5, 12, 10, **model_kwargs)

best_model = train.best_model(model.to(conf.device))

In [18]:
print(model)

DenseNet(
  (conv1): Conv2d(1, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (block1): DenseBlock(
    (layer): Sequential(
      (0): BasicBlock(
        (relu): ReLU(inplace=True)
        (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv1): Conv2d(12, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      )
      (1): BasicBlock(
        (relu): ReLU(inplace=True)
        (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv1): Conv2d(24, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      )
      (2): BasicBlock(
        (relu): ReLU(inplace=True)
        (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv1): Conv2d(36, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      )
      (3): BasicBlock(
        (relu): ReLU(inplace=True)
        (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=

In [19]:
# -----------------------------------------------------------------------------------
# define the model and an instance of the best model class
# -----------------------------------------------------------------------------------
model_kwargs = {'mean':conf.data_set_mean, 'std':conf.data_set_std}    

def init_weights(conf, model):
    # sparsify
    maf.sparse_bias_uniform_(model, 0, conf.r[0])
    maf.sparse_weight_normal_(model, conf.r[1])
    maf.sparse_weight_normal_(model, conf.r[1],ltype=nn.Conv2d)
    maf.sparsify_(model, conf.sparse_init, ltype = nn.Conv2d, row_group = conf.row_group)
    model = model.to(conf.device)
    
    return model

In [20]:
# -----------------------------------------------------------------------------------
# Optimizer
# -----------------------------------------------------------------------------------
def get_skips(model):
    for m in model.modules():
        if hasattr(m,'skips'):
            yield m.skips
        else:
            continue

def print_skips(model):
    for m in model.modules():
        if hasattr(m,'skips'):
            print((0.001*torch.round(1000*m.skips.data).cpu()))
            
def skips_to_list(model):
    skips = []
    for m in model.modules():
        if hasattr(m,'skips'):
            skips.append(m.skips.data.tolist())
    return skips
    

def init_opt(conf, model):
    # Get access to different model parameters
    weights_linear = maf.get_weights_linear(model)
    weights_conv = maf.get_weights_conv(model)
    biases = maf.get_bias(model)
    skips = get_skips(model)

    # -----------------------------------------------------------------------------------
    # Initialize optimizer
    # -----------------------------------------------------------------------------------
    reg1 = conf.reg(lamda=conf.lamda_0)
    reg2 = reg.reg_l1(lamda=conf.lamda_1)
    reg3 = reg.reg_l1_l2_conv(lamda=conf.lamda_0)

    if conf.optim == "SGD":
        opt = torch.optim.SGD(model.parameters(), lr=conf.lr, momentum=conf.beta)
    elif conf.optim == "AdaBreg":
        opt = op.AdaBreg([{'params': weights_linear, 'lr' : conf.lr},
                           {'params': weights_conv, 'lr' : conf.lr, 'reg' : reg3},
                           {'params': biases, 'lr': conf.lr},
                           {'params': skips, 'lr':conf.lr, 'reg':reg2}])

    # learning rate scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, factor=0.5, patience=5,threshold=0.01)
    
    return opt, scheduler

In [21]:
save_params = False
if save_params:
    conf.write_to_csv()

# Dataset

In [22]:
train_loader, valid_loader, test_loader = ud.get_data_set(conf)

# History and Run Specification

In [23]:
# -----------------------------------------------------------------------------------
# initalize history
# -----------------------------------------------------------------------------------
tracked = ['loss', 'node_sparse']
train_hist = {}
val_hist = {}

# Training

In [None]:
# -----------------------------------------------------------------------------------
# Reinit weigts and the corresponding optimizer
# -----------------------------------------------------------------------------------
model = init_weights(conf, model)
opt, scheduler = init_opt(conf, model)

# -----------------------------------------------------------------------------------
# train the model
# -----------------------------------------------------------------------------------
for epoch in range(conf.epochs):
    print(25*"<>")
    print(50*"|")
    print(25*"<>")
    print('Epoch:', epoch)

    # ------------------------------------------------------------------------
    # train step, log the accuracy and loss
    # ------------------------------------------------------------------------
    train_data = train.train_step(conf, model, opt, train_loader)

    # update history
    for key in tracked:
        if key in train_data:
            var_list = train_hist.setdefault(key, [])
            var_list.append(train_data[key])        

    # ------------------------------------------------------------------------
    # validation step
    val_data = train.validation_step(conf, model, opt, valid_loader)

    print_skips(model)

    # update history
    for key in tracked:
        if key in val_data:
            var = val_data[key]
            if isinstance(var, list):
                for i, var_loc in enumerate(var):
                    key_loc = key+"_" + str(i)
                    var_list = val_hist.setdefault(key_loc, [])
                    val_hist[key_loc].append(var_loc)
            else:
                var_list = val_hist.setdefault(key, [])
                var_list.append(var) 


    scheduler.step(train_data['loss'])
    print("Learning rate:",opt.param_groups[0]['lr'])
    best_model(train_data['acc'], val_data['acc'], model=model)

<><><><><><><><><><><><><><><><><><><><><><><><><>
||||||||||||||||||||||||||||||||||||||||||||||||||
<><><><><><><><><><><><><><><><><><><><><><><><><>
Epoch: 0
