### Imports

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pwd

/content


In [3]:
PATH_TO_FOLDER = '/content/drive/MyDrive/DL_project/EXPERIMENTS'

In [4]:
%cd {PATH_TO_FOLDER}

/content/drive/MyDrive/DL_project/EXPERIMENTS


In [5]:
%load_ext autoreload
%autoreload 2

In [6]:
from models import MLP, NAC, NALU

In [7]:
import math
import random
import numpy as np
from tqdm.notebook import tqdm
from copy import deepcopy

import torch
import torch.nn as nn
import torch.nn.functional as F

# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1)
torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
%%capture
!pip install wandb --upgrade

In [8]:
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mgalmitr[0m (use `wandb login --relogin` to force relogin)


True

### INTERPOLATION Parameters

In [10]:
NAMES = dict(
    project = 'INTERPOLATION',
    tags = 'preparation'
)

MODEL_PARAMETERS = dict(
    in_dim=2,
    out_dim=1,
    NUM_LAYERS = 2,
    HIDDEN_DIM = 2,
)

ARITHMETIC_PARAMETERS = dict(
    num_train=500, 
    num_test=50,
    dim=100, 
    num_sum=5,
    RANGE = [5, 10]
)

TRAIN_PARAMETERS = dict(
                        LEARNING_RATE = 1e-2,
                        # NUM_ITERS = int(1e5),
                        NUM_ITERS = int(50000),
                        activation='relu6'
                        )

ARITHMETIC_FUNCTIONS = {
    'add': lambda x, y: x + y,
    'sub': lambda x, y: x - y,
    'mul': lambda x, y: x * y,
    'div': lambda x, y: x / y,
    'squared': lambda x, y: torch.pow(x, 2),
    'root': lambda x, y: torch.sqrt(x)
    }

MODELS = {
        'MLP': MLP, 
        'NAC': NAC, 
        'NALU': NALU
        }

In [None]:
# def get_id():
#     id_dct = {}
#     for i, key in enumerate(ARITHMETIC_FUNCTIONS.keys()):
#         id_dct[key] = i
#     return id_dct

# FUNC_id = get_id()

### Config for Wandb

In [12]:
def create_config(model, function):
    config = {'model': model,
              'function': function}
              
    models = MODEL_PARAMETERS.copy()
    funcs = ARITHMETIC_PARAMETERS.copy()
    params = TRAIN_PARAMETERS.copy()
    names = NAMES.copy()

    for key, value in models.items():
        config[key] = value
    for key, value in funcs.items():
        config[key] = value
    for key, value in params.items():
        config[key] = value
    for key, value in names.items():
        config[key] = value

    return config

### Example of config

In [13]:
config = create_config('MLP', 'add')

In [14]:
config

{'HIDDEN_DIM': 2,
 'LEARNING_RATE': 0.01,
 'NUM_ITERS': 50000,
 'NUM_LAYERS': 2,
 'RANGE': [5, 10],
 'activation': 'relu6',
 'dim': 100,
 'function': 'add',
 'in_dim': 2,
 'model': 'MLP',
 'num_sum': 5,
 'num_test': 50,
 'num_train': 500,
 'out_dim': 1,
 'project': 'INTERPOLATION',
 'tags': 'preparation'}

### Iteration on models and arithmetic functions

In [17]:
mdls = MODELS.keys()
fncts = ARITHMETIC_FUNCTIONS.keys()

for function in fncts:
    for arch in mdls:
        config = create_config(arch, function)
        # Build, train and analyze the model with the pipeline
        model = model_pipeline(config)

	1/50000: loss: 5403.0131836 - mea: 73.4043427
	1001/50000: loss: 0.9293643 - mea: 0.9614011
	2001/50000: loss: 0.6107851 - mea: 0.7814663
	3001/50000: loss: 0.6091944 - mea: 0.7804537
	4001/50000: loss: 0.6065084 - mea: 0.7787365
	5001/50000: loss: 0.6039546 - mea: 0.7770998


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

[34m[1mwandb[0m: [32m[41mERROR[0m Control-C detected -- Run data was not synced


KeyboardInterrupt: ignored

### Wandb pipeline

In [16]:
"""
pipeline for training model and tracking weights, gradients and metrics via Wandb
"""

def model_pipeline(hyperparameters):
    with wandb.init(project=f"{hyperparameters['project']}",
                    group=f"{hyperparameters['function']}", 
                    job_type=f"{hyperparameters['model']}",
                    name=f"{hyperparameters['function']}_{hyperparameters['model']}",
                    tags=f"{hyperparameters['tags']}",
                    config=hyperparameters):
        config = wandb.config

        random_model, model, data, criterion, optimizer = make(config)
        # print(model)

        # data
        train_data, test_data = data
 
        # random_model
        random_mea = []
        for i in range(100):
            abs = test(random_model, test_data)
            random_mea.append(abs.mean().item())
        random_result = np.mean(random_mea)

        # model
        train(model, 
            train_data,
            criterion, 
            optimizer,
            config.NUM_ITERS)
        
        abs = test(model, test_data)
        result = abs.mean().item()
        normalized_result = 100.0 * result/random_result

        print('model_type:', config.model)
        print('function:', config.function)
        print('test_result:', result)
        print('normalized_test_result:', normalized_result)

        ###wand plots
        # id = FUNC_id[config.function]

        table_res = wandb.Table(data=[[config.function, result]], 
                                columns=["FUNCTION", "Mean_of_MAE"])
        
        table_norm_res = wandb.Table(data=[[config.function, normalized_result]], 
                                     columns=["FUNCTION", "Scaled_Mean_of_MAE"])

        wandb.log({
        'Results': wandb.plot.bar(table_res, 
                                "FUNCTION",
                                "Mean_of_MAE",
                                title="Results"),
                   
        'Normalized_Results': wandb.plot.bar(table_norm_res, 
                                            "FUNCTION",
                                            "Scaled_Mean_of_MAE", 
                                            title="Normalized_Results")
        })

    return model


def make(config):
    # Make the model
    model = MODELS[config.model](config.NUM_LAYERS, 
                                config.in_dim, 
                                config.HIDDEN_DIM, 
                                config.out_dim).model

    random_model = MLP(config.NUM_LAYERS, 
                        config.in_dim, 
                        config.HIDDEN_DIM, 
                        config.out_dim).model

    # Make the data
    data = generate_data(config.function,
                         config.num_train, 
                         config.num_test, 
                         config.dim, 
                         config.num_sum, 
                         config.RANGE)
    
    # Make the loss and optimizer
    criterion = F.mse_loss
    optimizer = torch.optim.RMSprop(model.parameters(), lr=config.LEARNING_RATE)

    # return random_model, model, criterion, optimizer
    return random_model, model, data, criterion, optimizer


def generate_data(function, num_train, num_test, dim, num_sum, support):

    fn = ARITHMETIC_FUNCTIONS[function]
    data = torch.FloatTensor(dim).uniform_(*support).unsqueeze_(1)
    X, y = [], []
    for i in range(num_train + num_test):
        idx_a = random.sample(range(dim), num_sum)
        idx_b = random.sample([x for x in range(dim) if x not in idx_a], num_sum)
        a, b = data[idx_a].sum(), data[idx_b].sum()
        X.append([a, b])
        y.append(fn(a, b))
    X = torch.FloatTensor(X)
    y = torch.FloatTensor(y).unsqueeze_(1)
    indices = list(range(num_train + num_test))
    np.random.shuffle(indices)
    X_train, y_train = X[indices[num_test:]], y[indices[num_test:]]
    X_test, y_test = X[indices[:num_test]], y[indices[:num_test]]

    return ((X_train, y_train), (X_test, y_test))


def train(model, train_data, criterion, optimizer, num_iters):
    wandb.watch(model, criterion, log="all", log_freq=1000)

    data, target = train_data
    # model.train()
    for i in range(num_iters):
        out = model(data)
        loss = criterion(out, target)
        mea = torch.mean(torch.abs(target - out))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if i % 1000 == 0:
            train_log(loss, mea, i, num_iters)

def train_log(loss, mea, epoch, num_iters):
    wandb.log({"epoch": epoch, "loss_(mse)": loss.item(), 'mean_absolute_error': mea.item()})
    print("\t{}/{}: loss: {:.7f} - mea: {:.7f}".format(epoch+1, num_iters, loss.item(), mea.item()))


def test(model, test_data):
    data, target = test_data
    with torch.no_grad():
        out = model(data)
        return torch.abs(target - out)