In [None]:
import os
import math
import random
import pandas as pd
import numpy as np
import datetime
import time
import datetime
import copy

from src.utility import *
from src.MTH.MultiTaskHypernetwork import *
from src.MTL.MultiTaskLearning import *

### Run Multi-Task Hypernetwork experiment

In [None]:
##### Save results into DataFrame #####
def save_df(results_df):
    data_names = data_params["data_name"] if isinstance(data_params["data_name"], list) else [data_params["data_name"]]
    for data_name in data_names:
        tune_cols = [col for col in param_loader.iter_param_keys.copy() if col not in ["data_name", "lr", "seed"]]
        seeds = [int(results_df["seed"].min()), int(results_df["seed"].max())]

        # Create new csv
        filename = r"{}_{}___{}___seeds{:d}-{:d}.{}.csv".format(
                params["algo_name"], data_name, t0.strftime("%y.%m.%d_%H.%M.%S"), seeds[0], seeds[1], ".".join(tune_cols)
            )
        results_df[results_df["data_name"] == data_name].to_csv(r"./results/" + filename, index=False)
        print(filename)

        # Delete old csv
        if seeds[1] > seeds[0]:
            filename = r"{}_{}___{}___seeds{:d}-{:d}.{}.csv".format(
                    params["algo_name"], data_name, t0.strftime("%y.%m.%d_%H.%M.%S"), seeds[0], seeds[1]-1, ".".join(tune_cols)
                )
            os.remove(r"./results/" + filename)

In [None]:
##### Data parameters  #####
data_params = {}
data_params["data_name"] = ["Cubic"]
data_params["num_tasks"] = 20
data_params["num_train_per_task"] = 10

data_tmp = get_data(data_params["data_name"][0])
input_dim = data_tmp["X_train"].shape[1]
output_dim = data_tmp["y_train"].shape[1]
del data_tmp

##### Model parameters c: #####
params = {}

params["algo_name"] = "HyperNet"

params["lr"] = [10**p for p in [-3, -3.5, -4, -4.5]]   
params["seed"] = [*range(0, 50)]              

params["use_metadata"] = [True, False]

# Hypernetwork architecture parameters
params["embedding_dim"] = [10]
params["hidden_dim"] = [32]
params["hyper_extractor_layers"] = [0, 1, 2]

# Target network architecture
params["target_arch"] = [[
    {"type": "linear", "params": [input_dim, 32]},
    {"type": "relu"},
    {"type": "linear", "params": [32, 32]},
    {"type": "relu"},
    {"type": "linear", "params": [32, 32]},
    {"type": "relu"},
    {"type": "linear", "params": [32, output_dim]}    
]]

params["verbose"] = True


##### Run experiments
t0 = datetime.datetime.now()
results_df = pd.DataFrame()

param_loader = param_iterator(params, data_params)
for i in range(param_loader.num_combinations):

    # Get next param combination
    p, d_p = param_loader.next()

    # Train model
    model = train_HyperNet(p, d_p)
    
    # Record results
    if hasattr(model, "metrics"):
        results = model.metrics.copy()
        for key in d_p:
            results[key] = d_p[key]
        results_df = results_df.append(results, ignore_index=True)

    # Save results dataframe
    if (i+1) % (param_loader.num_combinations / len(params["seed"])) == 0:
        save_df(results_df)
    

### Display results

In [None]:
# Read results DataFrame
filename = "HyperNet_Cubic___23.01.26_12.34.44___seeds0-49.use_metadata.hyper_extractor_layers.csv"
results_df_cp = pd.read_csv("./results//" + filename)

# Variables of interest
cols_to_agg = [col for col in results_df_cp.columns if col.startswith("acc_")]
tune_cols = filename.split("___")[-1].split(".")[1:-1]
groupby_cols = tune_cols + ["lr"]
seeds = filename.split("___")[-1].split(".")[0][5:].split("-")
num_seeds = int(seeds[1]) - int(seeds[0]) + 1
sf = 3

# Aggregate results
results_df_cp[[c + "_mean" for c in cols_to_agg]] = results_df_cp.groupby(groupby_cols)[cols_to_agg].transform("mean")
results_df_cp[[c + "_se" for c in cols_to_agg]] = (results_df_cp.groupby(groupby_cols)[cols_to_agg].transform("std")/np.sqrt(num_seeds-1))

# Display DataFrame
display_cols = [col + agg for col in ["acc_test", "acc_vali", "acc_train"] for agg in ["_mean", "_se"]]
display_cols += ["num_epochs"]
display_df = results_df_cp.groupby(groupby_cols).mean()

print(filename)
optimal_df = display_df[display_df['acc_vali_mean'] == display_df.groupby(tune_cols)['acc_vali_mean'].transform("min")]
display(optimal_df.sort_values("acc_vali_mean").round(sf)[display_cols])

### Run Multi-Task Learning Baseline experiments

In [None]:
 # Data parameters
data_params = {}
data_params["data_name"] = ["Algorithms"]
data_params["STL_metadata"] = [True,False]
data_params["STL_onehot"] = [False]
data_params["num_tasks"] = "all"
data_params["num_train_per_task"] = "all"

# Model parameters
params = {}   

# CHANGE algo_name to set algorithm: "STL" "MTL" "MRN" TF" "CS" "Sluice"
params["algo_name"] = "TF"
params["lr"] = [10**p for p in [-3, -3.5, -4, -4.5]]
params["vali_epoch_freq"] = 5
params["vali_epoch_delay"] = 20

params["batch_size"] = 64
params["batch_shuffle"] = True
params["normalise_task_training"] = False
params["max_epochs"] = 2000                         

# Network architecture
out_dim = get_data(data_params["data_name"][0])["y_train"].shape[1]
if params["algo_name"] == "STL":
    params["arch"] = [[[32,32,32,out_dim],[]]]
if params["algo_name"] == "MTL":
    params["arch"] = [[[32,32,32],[out_dim]]]
if params["algo_name"] == "MRN":
    params["arch"] = [[[32,32,32],[out_dim]]]
if params["algo_name"] == "TF":
    params["arch"] = [[[],[32,32,32,out_dim]]]
if params["algo_name"] == "CS":
    params["arch"] = [[[],[32,32,32,out_dim]]]
    
# Determine which method is being used, from algo name variable
params["is_MRN"] = params["algo_name"] == "MRN"
params["is_TF"] = params["algo_name"] == "TF"
params["is_MRN"] = params["algo_name"] == "CS"
params["is_MRN"] = params["algo_name"] == "Sluice"

# Specific parameters - Multilinear Network
if params["is_MRN"] == True:
    params["MRN_weight"] = [10**p for p in [-2, -3, -4, -5]] + [0]
    params["MRN_feat_k"] = [1, 0.1, 0.01]

# Specific parameters - Tensor Factorisation method (DMTRL)
if params["is_TF"] == True:
    params["TF_method"] = ["Tucker", "TT"]
    params["TF_k"] = [2,4,8,16]
    
# Specific parameters - Cross-stitch Network, Sluice networks respectively
params["is_CS"] = False
params["is_sluice"] = False
if params["is_sluice"] == True:
    params["sluice_num_subspaces"] = 2
    params["sluice_alpha_init"] = ["imbalanced", "balanced"]
    params["sluice_beta_init"] = "imbalanced"
    params["sluice_orthogonal_loss_coef"] = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0]

params["is_classification"] = False
params["loss_func_name"] = "mse"
params["record_task_sizes"] = True
params["verbose"] = False
params["plot"] = False

params["seed"] = [*range(0,50)]

t = time.time()

results_df = pd.DataFrame()
param_loader = param_iterator(data_params, params)
for i in range(param_loader.num_combinations):
    d_p, p = param_loader.next()

    MTL = train_MTL(p, d_p)

    results = MTL.metrics.copy()
    for key in d_p:
        results[key] = d_p[key]
    results_df = results_df.append(results, ignore_index=True)
    
    # Save results dataframe
    if (i+1) % (param_loader.num_combinations / len(params["seed"])) == 0:
        save_df(results_df)
        
print(time.time() - t)