In [1]:
import numpy as np
import pickle 
import sys, os
sys.path.append("..")
from logger import Logger
from cva_model import CvaModel, CvaModelAlt
from cavia_model import CaviaModel
from maml_model import MamlModel
import matplotlib.pyplot as plt
import pandas as pd
from tabulate import tabulate

In [2]:
def get_logger(pth):
    with open(pth, 'rb') as f:
        logger = pickle.load(f)
    logger.train_loss = np.asarray(logger.train_loss)
    logger.train_conf = np.asarray(logger.train_conf)

    logger.valid_loss = np.asarray(logger.valid_loss)
    logger.valid_conf = np.asarray(logger.valid_conf)

    logger.test_loss = np.asarray(logger.test_loss)
    logger.test_conf = np.asarray(logger.test_conf)

    logger.elapsed_time = np.asarray(logger.elapsed_time)
    
    return logger

# Performance with a plethora of regularization techniques

In [3]:
root_dir = '.'
col_prefs = [
    'test',
    'train',
    'ratio',
    'nf', 
    'lri', 
    'lrm', 
    'lrs', 
    'lrmd', 
    'lred', 
    'tpu', 
    'neu', 
    'dop', 
    're', 
    'red', 
    'ntc', 
    'npc'
]
col_names = [
    'test_loss',
    'train loss',
    'loss ratio',
    'num funcs',
    'emb lr',
    'model lr',
    'lr schedule',
    'model lr decay',
    'emb lr decay',
    'task/update',
    'num update/eval',
    'dropout',
    'reset freq',
    'reset decay',
    'tasks checked',
    'points checked'
]
col_names_compact = [
    'test_loss',
    'train_loss',
    'loss_diff',
    'nf_lri_lrm_lrs_lrmd_lred_tpu_neu_dop_re_red_ntc_npc'
]
data = []
for directory, subdirectories, files in os.walk('../cva_sine_result_files'):
    if directory is not '../cva_sine_result_files':
        continue
    print(f'Found {len(files)} files')
    for file in files:
        # 1) get the performance of the model
        logger = get_logger(os.path.join(directory, file))
        # get best test performance, best training performance, and ratio of test performance /|train performance - testperformance|
        idx = np.argmin(logger.test_loss)
        test_loss = logger.test_loss[idx]
        train_loss = logger.train_loss[idx]
        loss_diff = np.abs(train_loss - test_loss)
        params = []
        params.append(test_loss)
        params.append(train_loss)
        params.append(loss_diff)
        
        params.append('')
        # get metadata
        f = file.split("_")
        for i in range(14):
            if i > 0:
                if i >= len(f):
                    params[-1] = params[-1] + '_-'
                    #params.append('-')
                else:
                    params[-1] = params[-1] + '_' + ''.join(filter(str.isdigit, f[i]))
        data.append(params)

print(f'finished processing {len(data)} files')
df = pd.DataFrame(data, columns=col_names_compact, dtype=str) 
df = df.astype({'test_loss': 'float', 'train_loss': 'float', 'loss_diff': 'float'})
print(tabulate(df, headers='keys', tablefmt='psql'))

Found 872 files
finished processing 872 files
+-----+-------------+--------------+-------------+-------------------------------------------------------+
|     |   test_loss |   train_loss |   loss_diff | nf_lri_lrm_lrs_lrmd_lred_tpu_neu_dop_re_red_ntc_npc   |
|-----+-------------+--------------+-------------+-------------------------------------------------------|
|   0 |    0.40582  |    0.0265078 | 0.379312    | _100_0001_0001_100_099_09_5_40_0_1_2_-_-              |
|   1 |    0.311457 |    0.300651  | 0.0108056   | _100_0001_0001_5000_099_09_1_100_0_1000_2_-_-         |
|   2 |    0.263857 |    0.286373  | 0.0225163   | _100_0001_0001_100_09_09_10_100_02_1_10_-_-           |
|   3 |    0.380441 |    0.432641  | 0.0521996   | _100_0001_0001_100_099_099_1_100_02_1000_1_-_-        |
|   4 |    0.426715 |    0.0267186 | 0.399997    | _100_0001_0001_100_099_099_5_40_0_1_10_-_-            |
|   5 |    0.173445 |    0.103214  | 0.0702312   | _100_0001_0001_5000_09_099_10_40_02_1000_2_-_- 

In [4]:
df.describe()

Unnamed: 0,test_loss,train_loss,loss_diff
count,872.0,872.0,872.0
mean,0.376191,0.247108,0.156537
std,0.176316,0.210182,0.125804
min,0.154804,0.012255,3e-05
25%,0.266875,0.081897,0.049127
50%,0.346415,0.208334,0.123291
75%,0.414078,0.35117,0.2474
max,1.157221,1.187858,0.547068


In [5]:
performers = df[df['test_loss'] < 0.2]
print(tabulate(performers, headers='keys', tablefmt='psql'))

+-----+-------------+--------------+-------------+-------------------------------------------------------+
|     |   test_loss |   train_loss |   loss_diff | nf_lri_lrm_lrs_lrmd_lred_tpu_neu_dop_re_red_ntc_npc   |
|-----+-------------+--------------+-------------+-------------------------------------------------------|
|   5 |    0.173445 |    0.103214  | 0.0702312   | _100_0001_0001_5000_09_099_10_40_02_1000_2_-_-        |
|  24 |    0.198684 |    0.0946717 | 0.104012    | _100_0001_0001_5000_099_09_10_40_02_1_10_-_-          |
|  32 |    0.168049 |    0.222106  | 0.054057    | _100_0001_0001_5000_09_099_10_100_0_100_1_-_-         |
|  35 |    0.164331 |    0.112283  | 0.0520485   | _100_0001_0001_5000_099_099_10_40_02_100_2_-_-        |
|  45 |    0.170345 |    0.114353  | 0.0559916   | _100_0001_0001_100_099_099_10_40_02_100_2_-_-         |
|  56 |    0.194925 |    0.19249   | 0.00243552  | _100_0001_0001_100_099_099_10_100_02_100_10_-_-       |
|  63 |    0.197792 |    0.0946805 | 

In [6]:
best_performers = performers[performers['loss_diff'] < 0.01]
print(tabulate(best_performers, headers='keys', tablefmt='psql'))

+-----+-------------+--------------+-------------+-------------------------------------------------------+
|     |   test_loss |   train_loss |   loss_diff | nf_lri_lrm_lrs_lrmd_lred_tpu_neu_dop_re_red_ntc_npc   |
|-----+-------------+--------------+-------------+-------------------------------------------------------|
|  56 |    0.194925 |     0.19249  | 0.00243552  | _100_0001_0001_100_099_099_10_100_02_100_10_-_-       |
| 152 |    0.199872 |     0.200004 | 0.000132013 | _100_0001_0001_100_099_09_10_40_02_100_2_-_-          |
| 183 |    0.188739 |     0.190101 | 0.00136214  | _100_0001_0001_5000_099_09_10_100_02_100_10_-_-       |
| 191 |    0.192652 |     0.195041 | 0.00238824  | _100_0001_0001_100_099_099_10_100_02_1000_2_-_-       |
| 249 |    0.19217  |     0.200123 | 0.00795259  | _100_0001_0001_100_099_09_10_100_02_100_10_-_-        |
| 481 |    0.189812 |     0.190112 | 0.00030011  | _100_0001_0001_5000_09_099_10_100_02_100_2_-_-        |
| 551 |    0.181193 |     0.187484 | 

# Performance with embedding inserted into different locations of model

In [7]:
col_prefs = [
    'test',
    'train',
    'ratio',
    'nf', 
    'lri', 
    'lrm', 
    'lrs', 
    'lrmd', 
    'lred', 
    'tpu', 
    'neu', 
    'dop', 
    're', 
    'red', 
    'ntc', 
    'npc'
]
col_names = [
    'test_loss',
    'train loss',
    'loss ratio',
    'num funcs',
    'emb lr',
    'model lr',
    'lr schedule',
    'model lr decay',
    'emb lr decay',
    'task/update',
    'num update/eval',
    'dropout',
    'reset freq',
    'reset decay',
    'tasks checked',
    'points checked'
]
col_names_compact = [
    'test_loss',
    'train_loss',
    'loss_diff',
    'nf_lri_lrm_lrs_lrmd_lred_tpu_neu_dop_re_red_ntc_npc'
]
data = []
for directory, subdirectories, files in os.walk('../cva_sine_result_files/embeddingmoved/'):
#     if directory is not '../cva_sine_result_files':
#         continue
    print(f'Found {len(files)} files')
    for file in files:
        # 1) get the performance of the model
        logger = get_logger(os.path.join(directory, file))
        # get best test performance, best training performance, and ratio of test performance /|train performance - testperformance|
        idx = np.argmin(logger.test_loss)
        test_loss = logger.test_loss[idx]
        train_loss = logger.train_loss[idx]
        loss_diff = np.abs(train_loss - test_loss)
        params = []
        params.append(test_loss)
        params.append(train_loss)
        params.append(loss_diff)
        
        params.append('')
        # get metadata
        f = file.split("_")
        for i in range(14):
            if i > 0:
                if i >= len(f):
                    params[-1] = params[-1] + '_-'
                    #params.append('-')
                else:
                    params[-1] = params[-1] + '_' + ''.join(filter(str.isdigit, f[i]))
        data.append(params)

print(f'finished processing {len(data)} files')
df = pd.DataFrame(data, columns=col_names_compact, dtype=str) 
df = df.astype({'test_loss': 'float', 'train_loss': 'float', 'loss_diff': 'float'})
print(tabulate(df, headers='keys', tablefmt='psql'))

Found 1596 files


RuntimeError: Attempting to deserialize object on CUDA device 2 but torch.cuda.device_count() is 1. Please use torch.load with map_location to map your storages to an existing device.