## This section illustrates training and extrapolation testing of Task Subset Selection

In [1]:
import torch
import torch.nn.functional as F

import numpy as np

from tqdm import tqdm_notebook as tqdm

from data_generator_helper import generate_synthetic_selection_dataset
from models_new.nalu_b import NALU
from models_new.nac import NAC

from torchvision import datasets
import torchvision.models as models
import torchvision.utils as vutils
from tensorboardX import SummaryWriter

import datetime
import os

import matplotlib
import matplotlib.pyplot as plt
#from mpl_toolkits.mplot3d import Axes3D as plt3

from ipywidgets import interactive
from ipywidgets import widgets

In [2]:
def reportLoss(loss):
    print(loss)
    
def train(model, optimizer, x_train, y_train, epochs, batch_size, model_param):
    for epoch in range(epochs):
        for batch in range(len(x_train) // batch_size):

            model.train()
            optimizer.zero_grad()

            x_batch_train = x_train[batch:(batch+batch_size),:]
            y_batch_train = y_train[batch:(batch+batch_size),:]
            out = model(x_batch_train)
            
            loss = F.mse_loss(out, y_batch_train)
            
            if loss != loss:
                print("nan detected")
            #losses[epoch,batch] = loss
            loss.backward()
            optimizer.step()
    return test(model,x_train,y_train)

     
    
def test(model, x_test, y_test):
    model.eval()
    output_test = model(x_test)
    loss = F.mse_loss(output_test, y_test)
    return loss

In [7]:
# Training Parameters
lr = 0.02
epochs = 1000
batch_size = 1
sample_size = 100
set_size = 200
init = 'Kai_Uni'

# Model Parameters
in_dim = sample_size
hidden_dim = 1
out_dim = 2
num_layers = 1
model_param = "NALU"

# Interpolation range
value = [0,1]

# Extrapolation: Scale and range
extr_scale = [5,10,100]
extr_values = [[0,1],[-1,1]]

# Defining model and optimizer
model = NALU(num_layers, in_dim, hidden_dim, out_dim, init)
optimizer = torch.optim.RMSprop(model.parameters(),lr=lr)


# Generating dataset
x_train, y_train, boundaries = generate_synthetic_selection_dataset(value[0], 
                                                                    value[1], sample_size, 
                                                                    set_size, boundaries = None)

x_test, y_test, _ = generate_synthetic_selection_dataset(value[0], value[1],
                                                         sample_size, set_size, boundaries = boundaries)
x_train = x_train.type(torch.DoubleTensor)
y_train = y_train.type(torch.DoubleTensor)
x_test = x_test.type(torch.DoubleTensor)
y_test = y_test.type(torch.DoubleTensor)

# Training model
loss = train(model, optimizer, x_train, y_train, epochs, batch_size, model_param)
out = loss.data.numpy()
print("Interpolation Loss: ",'{:.2e}'.format(out))

# Testing extrapolation capabilites
print("Extrapolation Loss: ", end='')
for e_s in extr_scale:
    for e_val in extr_values:
        x_test, y_test, _ = generate_synthetic_selection_dataset(e_s*e_val[0]*value[1], e_s*e_val[1]*value[1],
                                                         sample_size, set_size, boundaries = boundaries)

        x_test = x_test.type(torch.DoubleTensor)
        y_test = y_test.type(torch.DoubleTensor)               
        test_loss  = test(model, x_test, y_test)
        out = test_loss.data.numpy() / np.max(x_test.data.numpy())
        print('{:.2e}'.format(out), end=' ')

Interpolation Loss:  8.35e-11
Extrapolation Loss: 6.96e-03 6.38e-03 2.60e+04 4.41e+04 1.09e+25 1.65e+25 

We see that by training on [0,1], the model extrapolates to [0,5] and [0,10] but seemingly not to [0,100], [-5,5],[-10,10],[-100,100].

## This section illustrates the movement of weights across the optimization surface

In [4]:
from wmps import WeightMov

In [5]:
_ = WeightMov("NALU_init_Xav_norm_set_size_100_cut.npy",20)

In [6]:
_ = WeightMov("NALU_init_Kai_norm_set_size_100_cut.npy",20)