In [None]:
import sys
import importlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

%matplotlib inline

from google.colab import drive
drive.mount('/content/drive')

In [None]:
# link to project modules
sys.path.insert(0,'/content/drive/MyDrive/Projects/UncertaintyExp')

# import custom modules
import utils, pipelines, models, train, predict, results

# reload all modules (google colab doesnt automatically detect 
# updated custom modules unless reloading)
importlib.reload(utils)
importlib.reload(pipelines)
importlib.reload(models)
importlib.reload(train)
importlib.reload(predict)
importlib.reload(results)

In [None]:
# general setttings
seed = 1
n_experiments = 10
batch_size = 128
epochs = 200
train_split = 0.8
n_hidden = 100
tau = 0.15
test_iters = 10

In [None]:
# Select dataset and index of target
dataset, label_index = 'bostonHousing', 13

# reload models module
importlib.reload(models)

# instantiate final logs
final_logs = []

# set gpu
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# start experiments
for experiment in range(n_experiments):
  experiment_logs = []
  seed = experiment
  # get data
  train, val, test, target_scale = pipelines.from_git(dataset=dataset, 
                                  label_index=label_index,
                                  split=train_split, seed=seed, device=device)
  n_features=train[0].shape[1]
  n_train = train[0].shape[0]
  steps = np.int(n_train/batch_size)+1
  target_scale = target_scale
  print(f"Experiment {experiment+1}: train sample: {n_train}, batchsize: {batch_size}, steps: {steps}")
  # instantiate models list
  model_list = [models.Baseline, models.MCDropout, models.SDENet, models.IVNet]   
  for model in model_list:
    # compile models
    net = model(n_features=n_features, n_hidden=n_hidden, tau=tau, n_train=n_train, layer_depth=6, device=device).to(device)
    train_optim, train_sched = net.custom_compile()
    # instantiate model logs
    train_logs = []
    val_logs_1, val_logs_2 = [], []
    test_logs_1, test_logs_2 = [], []
    # start training
    for epoch in range(epochs):
      permutation = utils.shuffle(train) # not needed 
      # start epoch
      for step in range(steps):
        batch = utils.load_batch(step, train, batch_size, permutation, shuffle_epoch=False)
        # get train logs
        train_log = net.train_step(model=net, batch=batch, optim=train_optim,
                                     scheduler=train_sched, train_logs=train_logs, batch_size=batch_size)
        train_logs.append(train_log)
      # get validation logs
      val_logs = net.evaluation_step(model=net,test_tuple=val, eval_func=utils.evaluation,
                                       target_scale=target_scale, test_iters=test_iters)
      # get test logs
      test_logs = net.evaluation_step(model=net,test_tuple=test, eval_func=utils.evaluation,
                                       target_scale=target_scale, test_iters=test_iters)
      val_logs_1.append(val_logs[0]), val_logs_2.append(val_logs[1])      
      test_logs_1.append(test_logs[0]), test_logs_2.append(test_logs[1])
    # only store test logs at mininum validation loss
    experiment_logs.append([test_logs_1[np.argmin(val_logs_1)], test_logs_2[np.argmin(val_logs_1)]])
    # log plots
    utils.log_plots(model_name=net.custom_name, dataset_name=dataset, 
                    train_logs = train_logs, val_logs_1 = val_logs_1,
                    val_logs_2 = val_logs_2, test_logs_1 = test_logs_1,
                     test_logs_2 = test_logs_2, remove_n =0, log_transform=False)       
    
  final_logs.append(experiment_logs)
# show results
results.resultdf(model_list, final_logs)

In [None]:
dataset, label_index = 'wine-quality-red', 11
importlib.reload(models)
final_logs = []
best_log = 0
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for experiment in range(n_experiments):
  experiment_logs = []
  seed = experiment
  train, val, test, target_scale = pipelines.from_git(dataset=dataset, 
                                  label_index=label_index,
                                  split=train_split, seed=seed, device=device)
  n_features=train[0].shape[1]
  n_train = train[0].shape[0]
  steps = np.int(n_train/batch_size)+1
  target_scale = target_scale
  print(f"Experiment {experiment+1}: train sample: {n_train}, batchsize: {batch_size}, steps: {steps}")
  model_list = [models.Baseline, models.MCDropout, models.SDENet, models.IVNet]   
  for model in model_list:
    net = model(n_features=n_features, n_hidden=n_hidden, tau=tau, n_train=n_train, layer_depth=6, device=device).to(device)
    train_optim, train_sched = net.custom_compile()
    train_logs = []
    val_logs_1, val_logs_2 = [], []
    test_logs_1, test_logs_2 = [], []
    for epoch in range(epochs):
      permutation = utils.shuffle(train)   
      for step in range(steps):
        batch = utils.load_batch(step, train, batch_size, permutation, shuffle_epoch=False)
        train_log = net.train_step(model=net, batch=batch, optim=train_optim,
                                     scheduler=train_sched, train_logs=train_logs, batch_size=batch_size)
        train_logs.append(train_log)
      val_logs = net.evaluation_step(model=net,test_tuple=val, eval_func=utils.evaluation,
                                       target_scale=target_scale, test_iters=test_iters)
      test_logs = net.evaluation_step(model=net,test_tuple=test, eval_func=utils.evaluation,
                                       target_scale=target_scale, test_iters=test_iters)
      val_logs_1.append(val_logs[0]), val_logs_2.append(val_logs[1])      
      test_logs_1.append(test_logs[0]), test_logs_2.append(test_logs[1])
    experiment_logs.append([test_logs_1[np.argmin(val_logs_1)], test_logs_2[np.argmin(val_logs_1)]])
    utils.log_plots(model_name=net.custom_name, dataset_name=dataset, 
                    train_logs = train_logs, val_logs_1 = val_logs_1,
                    val_logs_2 = val_logs_2, test_logs_1 = test_logs_1,
                     test_logs_2 = test_logs_2, remove_n =0, log_transform=False)       
    
  final_logs.append(experiment_logs)
results.resultdf(model_list, final_logs)

In [None]:
batch_size = 2048*3
dataset, label_index = 'naval-propulsion-plant', 16
importlib.reload(models)
final_logs = []
best_log = 0
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for experiment in range(n_experiments):
  experiment_logs = []
  seed = experiment
  train, val, test, target_scale = pipelines.from_git(dataset=dataset, 
                                  label_index=label_index,
                                  split=train_split, seed=seed, device=device)
  n_features=train[0].shape[1]
  n_train = train[0].shape[0]
  steps = np.int(n_train/batch_size)+1
  target_scale = target_scale
  print(f"Experiment {experiment+1}: train sample: {n_train}, batchsize: {batch_size}, steps: {steps}")
  model_list = [models.Baseline, models.MCDropout, models.SDENet, models.IVNet]   
  for model in model_list:
    net = model(n_features=n_features, n_hidden=n_hidden, tau=tau, n_train=n_train, layer_depth=6, device=device).to(device)
    train_optim, train_sched = net.custom_compile()
    train_logs = []
    val_logs_1, val_logs_2 = [], []
    test_logs_1, test_logs_2 = [], []
    for epoch in range(epochs):
      permutation = utils.shuffle(train)   
      for step in range(steps):
        batch = utils.load_batch(step, train, batch_size, permutation, shuffle_epoch=False)
        train_log = net.train_step(model=net, batch=batch, optim=train_optim,
                                     scheduler=train_sched, train_logs=train_logs, batch_size=batch_size)
        train_logs.append(train_log)
      val_logs = net.evaluation_step(model=net,test_tuple=val, eval_func=utils.evaluation,
                                       target_scale=target_scale, test_iters=test_iters)
      test_logs = net.evaluation_step(model=net,test_tuple=test, eval_func=utils.evaluation,
                                       target_scale=target_scale, test_iters=test_iters)
      val_logs_1.append(val_logs[0]), val_logs_2.append(val_logs[1])      
      test_logs_1.append(test_logs[0]), test_logs_2.append(test_logs[1])
    experiment_logs.append([test_logs_1[np.argmin(val_logs_2)], test_logs_2[np.argmin(val_logs_2)]])
    utils.log_plots(model_name=net.custom_name, dataset_name=dataset, 
                    train_logs = train_logs, val_logs_1 = val_logs_1,
                    val_logs_2 = val_logs_2, test_logs_1 = test_logs_1,
                     test_logs_2 = test_logs_2, remove_n =0, log_transform=True)       
    
  final_logs.append(experiment_logs)
results.resultdf(model_list, final_logs)

In [None]:
batch_size = 512
dataset, label_index = 'concrete', 8
importlib.reload(models)
final_logs = []
best_log = 0
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for experiment in range(n_experiments):
  experiment_logs = []
  seed = experiment
  train, val, test, target_scale = pipelines.from_git(dataset=dataset, 
                                  label_index=label_index,
                                  split=train_split, seed=seed, device=device)
  n_features=train[0].shape[1]
  n_train = train[0].shape[0]
  steps = np.int(n_train/batch_size)+1
  target_scale = target_scale
  print(f"Experiment {experiment+1}: train sample: {n_train}, batchsize: {batch_size}, steps: {steps}")
  model_list = [models.Baseline, models.MCDropout, models.SDENet, models.IVNet]   
  for model in model_list:
    net = model(n_features=n_features, n_hidden=n_hidden, tau=tau, n_train=n_train, layer_depth=6, device=device).to(device)
    train_optim, train_sched = net.custom_compile()
    train_logs = []
    val_logs_1, val_logs_2 = [], []
    test_logs_1, test_logs_2 = [], []
    for epoch in range(epochs):
      permutation = utils.shuffle(train)   
      for step in range(steps):
        batch = utils.load_batch(step, train, batch_size, permutation, shuffle_epoch=False)
        train_log = net.train_step(model=net, batch=batch, optim=train_optim,
                                     scheduler=train_sched, train_logs=train_logs, batch_size=batch_size)
        train_logs.append(train_log)
      val_logs = net.evaluation_step(model=net,test_tuple=val, eval_func=utils.evaluation,
                                       target_scale=target_scale, test_iters=test_iters)
      test_logs = net.evaluation_step(model=net,test_tuple=test, eval_func=utils.evaluation,
                                       target_scale=target_scale, test_iters=test_iters)
      val_logs_1.append(val_logs[0]), val_logs_2.append(val_logs[1])      
      test_logs_1.append(test_logs[0]), test_logs_2.append(test_logs[1])
    experiment_logs.append([test_logs_1[np.argmin(val_logs_2)], test_logs_2[np.argmin(val_logs_2)]])
    utils.log_plots(model_name=net.custom_name, dataset_name=dataset, 
                    train_logs = train_logs, val_logs_1 = val_logs_1,
                    val_logs_2 = val_logs_2, test_logs_1 = test_logs_1,
                     test_logs_2 = test_logs_2, remove_n =0, log_transform=True)       
    
  final_logs.append(experiment_logs)
results.resultdf(model_list, final_logs)

In [None]:
batch_size = 256
dataset, label_index = 'energy', 8
importlib.reload(models)
final_logs = []
best_log = 0
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for experiment in range(n_experiments):
  experiment_logs = []
  seed = experiment
  train, val, test, target_scale = pipelines.from_git(dataset=dataset, 
                                  label_index=label_index,
                                  split=train_split, seed=seed, device=device)
  n_features=train[0].shape[1]
  n_train = train[0].shape[0]
  steps = np.int(n_train/batch_size)+1
  target_scale = target_scale
  print(f"Experiment {experiment+1}: train sample: {n_train}, batchsize: {batch_size}, steps: {steps}")
  model_list = [models.Baseline, models.MCDropout, models.SDENet, models.IVNet]   
  for model in model_list:
    net = model(n_features=n_features, n_hidden=n_hidden, tau=tau, n_train=n_train, layer_depth=6, device=device).to(device)
    train_optim, train_sched = net.custom_compile()
    train_logs = []
    val_logs_1, val_logs_2 = [], []
    test_logs_1, test_logs_2 = [], []
    for epoch in range(epochs):
      permutation = utils.shuffle(train)   
      for step in range(steps):
        batch = utils.load_batch(step, train, batch_size, permutation, shuffle_epoch=False)
        train_log = net.train_step(model=net, batch=batch, optim=train_optim,
                                     scheduler=train_sched, train_logs=train_logs, batch_size=batch_size)
        train_logs.append(train_log)
      val_logs = net.evaluation_step(model=net,test_tuple=val, eval_func=utils.evaluation,
                                       target_scale=target_scale, test_iters=test_iters)
      test_logs = net.evaluation_step(model=net,test_tuple=test, eval_func=utils.evaluation,
                                       target_scale=target_scale, test_iters=test_iters)
      val_logs_1.append(val_logs[0]), val_logs_2.append(val_logs[1])      
      test_logs_1.append(test_logs[0]), test_logs_2.append(test_logs[1])
    experiment_logs.append([test_logs_1[np.argmin(val_logs_2)], test_logs_2[np.argmin(val_logs_2)]])
    utils.log_plots(model_name=net.custom_name, dataset_name=dataset, 
                    train_logs = train_logs, val_logs_1 = val_logs_1,
                    val_logs_2 = val_logs_2, test_logs_1 = test_logs_1,
                     test_logs_2 = test_logs_2, remove_n =0, log_transform=True)       
    
  final_logs.append(experiment_logs)
results.resultdf(model_list, final_logs)

In [None]:
batch_size = 2048*2
dataset, label_index = 'kin8nm', 8
importlib.reload(models)
final_logs = []
best_log = 0
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for experiment in range(n_experiments):
  experiment_logs = []
  seed = experiment
  train, val, test, target_scale = pipelines.from_git(dataset=dataset, 
                                  label_index=label_index,
                                  split=train_split, seed=seed, device=device)
  n_features=train[0].shape[1]
  n_train = train[0].shape[0]
  steps = np.int(n_train/batch_size)+1
  target_scale = target_scale
  print(f"Experiment {experiment+1}: train sample: {n_train}, batchsize: {batch_size}, steps: {steps}")
  model_list = [models.Baseline, models.MCDropout, models.SDENet, models.IVNet]   
  for model in model_list:
    net = model(n_features=n_features, n_hidden=n_hidden, tau=tau, n_train=n_train, layer_depth=6, device=device).to(device)
    train_optim, train_sched = net.custom_compile()
    train_logs = []
    val_logs_1, val_logs_2 = [], []
    test_logs_1, test_logs_2 = [], []
    for epoch in range(epochs):
      permutation = utils.shuffle(train)   
      for step in range(steps):
        batch = utils.load_batch(step, train, batch_size, permutation, shuffle_epoch=False)
        train_log = net.train_step(model=net, batch=batch, optim=train_optim,
                                     scheduler=train_sched, train_logs=train_logs, batch_size=batch_size)
        train_logs.append(train_log)
      val_logs = net.evaluation_step(model=net,test_tuple=val, eval_func=utils.evaluation,
                                       target_scale=target_scale, test_iters=test_iters)
      test_logs = net.evaluation_step(model=net,test_tuple=test, eval_func=utils.evaluation,
                                       target_scale=target_scale, test_iters=test_iters)
      val_logs_1.append(val_logs[0]), val_logs_2.append(val_logs[1])      
      test_logs_1.append(test_logs[0]), test_logs_2.append(test_logs[1])
    experiment_logs.append([test_logs_1[np.argmin(val_logs_2)], test_logs_2[np.argmin(val_logs_2)]])
    utils.log_plots(model_name=net.custom_name, dataset_name=dataset, 
                    train_logs = train_logs, val_logs_1 = val_logs_1,
                    val_logs_2 = val_logs_2, test_logs_1 = test_logs_1,
                     test_logs_2 = test_logs_2, remove_n =0, log_transform=True)       
    
  final_logs.append(experiment_logs)
results.resultdf(model_list, final_logs)

In [None]:
batch_size = 2048*2
dataset, label_index = 'power-plant', 4
importlib.reload(models)
final_logs = []
best_log = 0
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for experiment in range(n_experiments):
  experiment_logs = []
  seed = experiment
  train, val, test, target_scale = pipelines.from_git(dataset=dataset, 
                                  label_index=label_index,
                                  split=train_split, seed=seed, device=device)
  n_features=train[0].shape[1]
  n_train = train[0].shape[0]
  steps = np.int(n_train/batch_size)+1
  target_scale = target_scale
  print(f"Experiment {experiment+1}: train sample: {n_train}, batchsize: {batch_size}, steps: {steps}")
  model_list = [models.Baseline, models.MCDropout, models.SDENet, models.IVNet]   
  for model in model_list:
    net = model(n_features=n_features, n_hidden=n_hidden, tau=tau, n_train=n_train, layer_depth=6, device=device).to(device)
    train_optim, train_sched = net.custom_compile()
    train_logs = []
    val_logs_1, val_logs_2 = [], []
    test_logs_1, test_logs_2 = [], []
    for epoch in range(epochs):
      permutation = utils.shuffle(train)   
      for step in range(steps):
        batch = utils.load_batch(step, train, batch_size, permutation, shuffle_epoch=False)
        train_log = net.train_step(model=net, batch=batch, optim=train_optim,
                                     scheduler=train_sched, train_logs=train_logs, batch_size=batch_size)
        train_logs.append(train_log)
      val_logs = net.evaluation_step(model=net,test_tuple=val, eval_func=utils.evaluation,
                                       target_scale=target_scale, test_iters=test_iters)
      test_logs = net.evaluation_step(model=net,test_tuple=test, eval_func=utils.evaluation,
                                       target_scale=target_scale, test_iters=test_iters)
      val_logs_1.append(val_logs[0]), val_logs_2.append(val_logs[1])      
      test_logs_1.append(test_logs[0]), test_logs_2.append(test_logs[1])
    experiment_logs.append([test_logs_1[np.argmin(val_logs_2)], test_logs_2[np.argmin(val_logs_2)]])
    utils.log_plots(model_name=net.custom_name, dataset_name=dataset, 
                    train_logs = train_logs, val_logs_1 = val_logs_1,
                    val_logs_2 = val_logs_2, test_logs_1 = test_logs_1,
                     test_logs_2 = test_logs_2, remove_n =0, log_transform=True)       
    
  final_logs.append(experiment_logs)
results.resultdf(model_list, final_logs)

In [None]:
batch_size = 2048*2
dataset, label_index = 'protein-tertiary-structure', 9
importlib.reload(models)
final_logs = []
best_log = 0
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for experiment in range(n_experiments):
  experiment_logs = []
  seed = experiment
  train, val, test, target_scale = pipelines.from_git(dataset=dataset, 
                                  label_index=label_index,
                                  split=train_split, seed=seed, device=device)
  n_features=train[0].shape[1]
  n_train = train[0].shape[0]
  steps = np.int(n_train/batch_size)+1
  target_scale = target_scale
  print(f"Experiment {experiment+1}: train sample: {n_train}, batchsize: {batch_size}, steps: {steps}")
  model_list = [models.Baseline, models.MCDropout, models.SDENet, models.IVNet]   
  for model in model_list:
    net = model(n_features=n_features, n_hidden=n_hidden, tau=tau, n_train=n_train, layer_depth=6, device=device).to(device)
    train_optim, train_sched = net.custom_compile()
    train_logs = []
    val_logs_1, val_logs_2 = [], []
    test_logs_1, test_logs_2 = [], []
    for epoch in range(epochs):
      permutation = utils.shuffle(train)   
      for step in range(steps):
        batch = utils.load_batch(step, train, batch_size, permutation, shuffle_epoch=False)
        train_log = net.train_step(model=net, batch=batch, optim=train_optim,
                                     scheduler=train_sched, train_logs=train_logs, batch_size=batch_size)
        train_logs.append(train_log)
      val_logs = net.evaluation_step(model=net,test_tuple=val, eval_func=utils.evaluation,
                                       target_scale=target_scale, test_iters=test_iters)
      test_logs = net.evaluation_step(model=net,test_tuple=test, eval_func=utils.evaluation,
                                       target_scale=target_scale, test_iters=test_iters)
      val_logs_1.append(val_logs[0]), val_logs_2.append(val_logs[1])      
      test_logs_1.append(test_logs[0]), test_logs_2.append(test_logs[1])
    experiment_logs.append([test_logs_1[np.argmin(val_logs_2)], test_logs_2[np.argmin(val_logs_2)]])
    utils.log_plots(model_name=net.custom_name, dataset_name=dataset, 
                    train_logs = train_logs, val_logs_1 = val_logs_1,
                    val_logs_2 = val_logs_2, test_logs_1 = test_logs_1,
                     test_logs_2 = test_logs_2, remove_n =0, log_transform=True)       
    
  final_logs.append(experiment_logs)
results.resultdf(model_list, final_logs)

In [None]:
batch_size = 128
dataset, label_index = 'yacht', 6
importlib.reload(models)
final_logs = []
best_log = 0
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for experiment in range(n_experiments):
  experiment_logs = []
  seed = experiment
  train, val, test, target_scale = pipelines.from_git(dataset=dataset, 
                                  label_index=label_index,
                                  split=train_split, seed=seed, device=device)
  n_features=train[0].shape[1]
  n_train = train[0].shape[0]
  steps = np.int(n_train/batch_size)+1
  target_scale = target_scale
  print(f"Experiment {experiment+1}: train sample: {n_train}, batchsize: {batch_size}, steps: {steps}")
  model_list = [models.Baseline, models.MCDropout, models.SDENet, models.IVNet]   
  for model in model_list:
    net = model(n_features=n_features, n_hidden=n_hidden, tau=tau, n_train=n_train, layer_depth=6, device=device).to(device)
    train_optim, train_sched = net.custom_compile()
    train_logs = []
    val_logs_1, val_logs_2 = [], []
    test_logs_1, test_logs_2 = [], []
    for epoch in range(epochs):
      permutation = utils.shuffle(train)   
      for step in range(steps):
        batch = utils.load_batch(step, train, batch_size, permutation, shuffle_epoch=False)
        train_log = net.train_step(model=net, batch=batch, optim=train_optim,
                                     scheduler=train_sched, train_logs=train_logs, batch_size=batch_size)
        train_logs.append(train_log)
      val_logs = net.evaluation_step(model=net,test_tuple=val, eval_func=utils.evaluation,
                                       target_scale=target_scale, test_iters=test_iters)
      test_logs = net.evaluation_step(model=net,test_tuple=test, eval_func=utils.evaluation,
                                       target_scale=target_scale, test_iters=test_iters)
      val_logs_1.append(val_logs[0]), val_logs_2.append(val_logs[1])      
      test_logs_1.append(test_logs[0]), test_logs_2.append(test_logs[1])
    experiment_logs.append([test_logs_1[np.argmin(val_logs_2)], test_logs_2[np.argmin(val_logs_2)]])
    utils.log_plots(model_name=net.custom_name, dataset_name=dataset, 
                    train_logs = train_logs, val_logs_1 = val_logs_1,
                    val_logs_2 = val_logs_2, test_logs_1 = test_logs_1,
                     test_logs_2 = test_logs_2, remove_n =0, log_transform=True)       
    
  final_logs.append(experiment_logs)
results.resultdf(model_list, final_logs)

In [None]:
# variable-annuity

In [None]:
# claims frequency modelling

url = '/content/drive/My Drive/Research/Data/IP/FrenchMotor/Freq.csv'

data = pd.read_csv(url)

# remove rows with ClaimNb > 4 and Exposure >1

X_raw = data.drop(data.columns[[0,1,2,3]], axis=1)

# target = data['ClaimNb]/data['Exposure]
y_raw = data['ClaimNb']
y_raw = np.exp(np.array(y_df, dtype=np.float32))

In [None]:
data.head(5)

In [None]:
np.exp(np.array(y_df, dtype=np.float32))

In [None]:
X_df.head()