In [20]:
%pip install anytree

Note: you may need to restart the kernel to use updated packages.


In [21]:
import torch
import pickle
from pathlib import Path
import os
import pprint
from anytree import Node, RenderTree
import numpy as np

For future iterations of this I might change the scheme to a deep directory based approach.    
Then iterations would be named `1.pickle`, `2.pickle` and be stored in deep directories like:    
`"Experiment_name/config1/config2/config3/config4/1.pickle"`    
This way I could just split the name accross the folder separator and have all the setting values, then the only thing I need is a list that gives me the correct setting-value assignment, which sounds a lot easier.

# Helper functions

In [22]:
def unpickle(filepath : str):
    file_object = open(filepath, "rb")
    results = pickle.load(file_object)
    file_object.close()
    return results

In [23]:
def extract_setting_from_name(name :str, naming_schema : list):
    result = {}
    # Main assumption for the naming schema: parameters are strictly separated by underscores '_'
    for value, setting in zip(name.split("_"), naming_schema):
        result[setting] = value
    return result
    

In [24]:
def load_results(directory, experiment_name, naming_schema=None):
    path = Path(directory)

    dirs = [e for e in path.iterdir() if e.is_dir() and not str(e) == '.ipynb_checkpoints']
    subdirs = {str(path): [e for e in path.iterdir() if e.is_dir()] for path in dirs}
    
    relevant_subdirs = subdirs[os.path.join(directory, experiment_name)]
    relevant_subdirs.sort()
    #result_filename = '*.pickle'
    pickle_dirs = list()
    for subdir in relevant_subdirs:
        pickle_dirs.extend(sorted(subdir.glob("*.pickle")))
    results = []
    if not naming_schema is None:
        all_attributes = [extract_setting_from_name(subdir.name, naming_schema) for subdir in relevant_subdirs]    
        for attributes, pick in zip(all_attributes, pickle_dirs):
            try:
                unpickled_stuff = unpickle(pick)
                results.append({'attributes': attributes, 'results': unpickled_stuff})
            except:
                # Sometimes there were unknown issues with the pickle files, in those instances we re-ran training
                print("Catastrophic failure")
    else:
        for pick in pickle_dirs:
            try:
                unpickled_stuff = unpickle(pick)
                results.append({'results': unpickled_stuff})
            except:
                # Sometimes there were unknown issues with the pickle files, in those instances we re-ran training
                print("Catastrophic failure")
    return results


In [25]:
def deep_dict_get(data : dict, path : str):
    """
    path a slash ('/') separated path down to 
    """
    temp = data.copy()
    for entry in path.split("/"):
        # Catches leading '/' in tree printing
        if entry == '':
            continue
        temp = temp[entry]
    return temp

In [26]:
def compare_values(d1 : dict, d2 : dict, path : str):
    v1 = deep_dict_get(d1, path)
    v2 = deep_dict_get(d2, path)
    # Sanity check 1
    if not type(v1) == type(v2):
        return False
    else:
        # This can potentially cause errors when comparing lists of lists (or Tensors/Arrays)
        return v1 == v2

In [27]:
def create_common_root_list(root : str, values : list):
    return [f"{root}/{val}" for val in values]

# Main functions    
What features do I need?    
- ~~Show a structure tree of the results (i.e. experiment settings and result values)~~~
- ~~Filtering by setting, given a key~~
- ~~Filtering by values, given a key~~
- Create selected statistics (mean/med/std/quartiles/...) for certain values/keys
- Apply a function to certain values/keys and return the results (e.g. Eigendecomposition/Normalization/...)
- ~~Group all entries that share settings/values~~
- 

In [28]:
def generate_tree_structure(data, parent=None):
    # Solved through recursively going deeper into the data structure and then returning the leafs if at the end

    if parent is None and not len(data.keys()) == 1:
        parent = Node("root")
    elif parent is None:
        parent = Node(list(data.keys())[0])
        data = data[list(data.keys())[0]]
    
    # Recursion condition
    # If there are any dictionaries inside then go deeper
    if not any([type(data[entry]) == dict for entry in data]):
        for entry in data:
            Node(entry, parent=parent) 
    else:
        for entry in data:
            if type(data[entry]) == dict:
                branch = Node(entry, parent=parent)
                generate_tree_structure(data[entry], parent=branch)
            else:
                Node(entry, parent=parent)
    return parent
    

In [29]:
def group_by(list_of_data : list, path, value=None):
    """
    value : If None, make subgroups of equal values. 
            Otherwise return a single group where value is matched
    """
    grouped_data = {}

    if not value is None:
        grouped_data[f"{path} = {value}"] = [data for data in list_of_data if deep_dict_get(data, path) == value]
    else:    
        finished_values = list()
        for data in list_of_data:
            value = deep_dict_get(data, path)
            grouped_data[f"{path} = {value}"].append(data)
            #if not value in finished_values:
            #    grouped_data[f"{path} = value"] = [data for data in list_of_data if deep_dict_get(data, path) == value]
            #    finished_values.append(value)
    return grouped_data

In [30]:
def dict_compare(d1, d2):
    # Checking for empty list in d2, i.e. initial value
    if not type(d1) == type(d2):
        return False
    return all((d1.get(k) == v for k, v in d2.items()))
        

# Could contain an alternative head as (list_of_data : list, paths : [list, dict], values : None) 
# where paths-values would require a 1-to-1 correspondence. 
# But this could quickly become error prone on the user side...
def group_by_multiple(list_of_data : list, paths):
    grouped_data = {}
    # Grouping without values
    if type(paths) == list:
        finished_values = list()
        for data in list_of_data:
            paths_vals = {path : deep_dict_get(data, path) for path in paths}
            # not any X <=> all not X
            # i.e. only succeeds when this combination didn't exist before
            if not any([dict_comapare(paths_vals, fin_val) for fin_val in finished_values]):
                grouped_data[" ; ".join([f"{path} = {paths_vals[path]}" for path in paths_vals])] = [data]
            else:
                grouped_data[" ; ".join([f"{path} = {paths_vals[path]}" for path in paths_vals])].append(data)

    # Grouping by path-value combinations
    # Only returns the group where all those pairs are true
    elif type(paths) == dict:
        # The keys will contain the paths
        # The values will be the corresponding expected values
        # Yes, this could be a one-liner with a very neat nested list creation, 
        #  but I chose readability with temporary variables over it.
        good_data = list()
        for data in list_of_data:
            if all([deep_dict_get(data, path) == paths[path] for path in paths]):
                good_data.append(data)
        grouped_data[" ; ".join([f"{path} = {paths[path]}" for path in paths])] = good_data
    return grouped_data

In [31]:
def filter_by(list_of_data : list, path : str, value):
    filtered_data = list()
    for data in list_of_data:
        if deep_dict_get(data, path) == value:
            filtered_data.append(data)
    return filtered_data

In [32]:
def filter_value(list_of_data : list, path : str):
    """
        returns a list of the target value from each data dict
    """
    list_of_values = list()
    for data in list_of_data:
        list_of_values.append(deep_dict_get(data, path))
    return list_of_values

# Load results

In [36]:
#naming_schema = ["Metric", "Kernel_search", "train_data_ratio", "Data_kernel", "weights", "Variance_list", "eval_START", "eval_END", "eval_COUNT", "optimizer", "train_iterations", "LR", "Noise", "Data_scaling", "BFGS"]
all_results = load_results('results', "hardcoded")
#pprint.pprint(all_results)
result_tree = generate_tree_structure(all_results[0]).descendants
#pprint.pprint(result_tree)


# data - kernel assignment : RBF_PER = SIN*RBF; 4PER = 4C*SIN; PER = C*SIN
# Perform for each main dictionary: Iterate over kernels -> Look for the lowest(highest?) loss, 

"""
for main_dict in all_results:
    print("############################")
    print(main_dict["results"]["attributes"]["data_gen"])
    for model_kernel in ['C*C*RBF','C*RBF','C*SIN','C*SIN + C*SIN','C*SIN + C*SIN + C*SIN','SIN*RBF']:
        print("\n----")
        print(f"{model_kernel}:\n")
        for metric in ["Laplace", "MC", "MLL", "AIC"]:
            print(f"{metric}\t - \t{main_dict['results'][metric][model_kernel]['loss']}")

"""

# What do I want to know?
# 1.) Does each metric recognize the correct kernels?
# 1.1.) If not: Why?
# 2.) Is the order I was talking about true?
# 2.1.) i.e. MLL > Laplace/AIC >=? MCMC
# 3.) Are models with redundant parameters more punished than others? Especially the 2SIN vs 3SIN case
# 4.) Make a ranking sheet per dataset where the models are compared _within_ a metric to see if there are similarities visible or something
#  

print("1.) Does each metric recognize the correct kernel?")
# 1.) Does each metric recognize the correct kernel?
# Iterate over all datasets
for main_dict in all_results:
    # Iterate over all kernels I tried
    #for model_kernel in ['C*C*RBF','C*RBF','C*SIN','C*SIN + C*SIN','C*SIN + C*SIN + C*SIN','SIN*RBF']:
    print(main_dict["results"]["attributes"]["data_gen"])
    for metric in ["Laplace", "MC", "MLL", "AIC", "Laplace_prior"]:
        print(f"{metric}\t {max([(main_dict['results'][metric][model_kernel]['loss'], model_kernel) for model_kernel in ['C*C*RBF','C*RBF','C*SIN','C*SIN + C*SIN','C*SIN + C*SIN + C*SIN','SIN*RBF', 'C*MAT', 'MAT*SIN']])}")

print("===============================")
print("2.) Is the order I was talking about true? i.e. what is the order or the values for MLL, AIC, Laplace and where does MC lie at?")
# 2.) Is the order I was talking about true? i.e. what is the order or the values for MLL, AIC, Laplace and where does MC lie at?
#     It should be ordered AIC > MLL > Laplace
#     And somewhere in between there should be MC
# Iterate over all datasets
for main_dict in all_results:
    print(main_dict["results"]["attributes"]["data_gen"])
    # Do this comparison for each model kernel
    for model_kernel in ['C*C*RBF','C*RBF','C*SIN','C*SIN + C*SIN','C*SIN + C*SIN + C*SIN','SIN*RBF', "C*MAT", "MAT*SIN"]:
        ranking = [(main_dict['results'][metric][model_kernel]['loss'], metric) for metric in ["Laplace", "MC", "MLL", "AIC", "Laplace_prior"]]
        ranking = sorted(ranking, key=lambda x:x[0], reverse=True)
        # Check if it either is MLL > Laplace > AIC (this happens if MLL is negative) OR AIC > MLL > Laplace 
        #if not (main_dict['results']["MLL"][model_kernel]['loss'] > main_dict['results']["Laplace"][model_kernel]['loss'] > main_dict['results']["AIC"][model_kernel]['loss']) and not (main_dict['results']["AIC"][model_kernel]['loss'] > main_dict['results']["MLL"][model_kernel]['loss'] > main_dict['results']["Laplace"][model_kernel]['loss']) :
        print(f"{model_kernel}\t {ranking}")

print("===============================")
print("3.) Are models with redundant parameters more punished than others? Especially the 2SIN vs 3SIN case")
# 3.) Are models with redundant parameters more punished than others? Especially the 2SIN vs 3SIN case 

# Iterate over all datasets
for main_dict in all_results:
    for metric in ["Laplace", "AIC", "Laplace_prior"]:
        print(metric)
        for pair in [('C*RBF', 'C*C*RBF'), ('C*SIN', 'C*SIN + C*SIN + C*SIN'), ('C*SIN + C*SIN', 'C*SIN + C*SIN + C*SIN'), ('C*SIN', 'C*SIN + C*SIN')]:
            met_1 = main_dict['results'][metric][pair[0]]['loss'] 
            met_2 = main_dict['results'][metric][pair[1]]['loss'] 
            if not met_1 > met_2:
                print(f"{main_dict['results']['attributes']['data_gen']} \t - {pair}")
        

print("===============================")
print("4.) Make a ranking per dataset how each model kernel was rated")

# 4.) Make a ranking per dataset how each model kernel was rated 
# Iterate over all datasets
for main_dict in all_results:
    print(main_dict["results"]["attributes"]["data_gen"])
    # Do this ranking for each metric 
    for metric in ["Laplace", "MC", "MLL", "AIC", "Laplace_prior"]:
        print(metric)
        ranking = [(main_dict['results'][metric][model_kernel]['loss'], model_kernel) for model_kernel in ['C*C*RBF','C*RBF','C*SIN','C*SIN + C*SIN','C*SIN + C*SIN + C*SIN','SIN*RBF', "C*MAT", "MAT*SIN"]] 
        key_fkt = lambda x: x[0]
        pprint.pprint(sorted(ranking, key=key_fkt, reverse=True))
        print("===============")

print("===============================")
print("5.) Runtime of each metric")
for main_dict in all_results:
    print(main_dict["results"]["attributes"]["data_gen"])
    # Do this ranking for each metric 
    for metric in ["Laplace", "MC", "Laplace_prior"]:
        print(metric)
        runtime_list = [main_dict['results'][metric][model_kernel]['details']['Total time'] for model_kernel in ['C*C*RBF','C*RBF','C*SIN','C*SIN + C*SIN','C*SIN + C*SIN + C*SIN','SIN*RBF', "C*MAT", "MAT*SIN"]] 
        print(np.average(runtime_list))

 




1.) Does each metric recognize the correct kernel?
3PER
Laplace	 (tensor([[228.2309]], dtype=torch.float64, requires_grad=True), 'C*SIN')
MC	 (tensor(17.0949), 'C*SIN + C*SIN + C*SIN')
MLL	 (tensor(261.6757, requires_grad=True), 'C*SIN + C*SIN + C*SIN')
AIC	 (tensor(514.2029, requires_grad=True), 'MAT*SIN')
Laplace_prior	 (tensor(247.6876, dtype=torch.float64, requires_grad=True), 'C*SIN + C*SIN + C*SIN')
MAT_PER
Laplace	 (tensor([[-140.0627]], dtype=torch.float64, requires_grad=True), 'C*RBF')
MC	 (tensor(-133.4964), 'C*SIN')
MLL	 (tensor(-122.8542, requires_grad=True), 'C*SIN + C*SIN + C*SIN')
AIC	 (tensor(-265.7084, requires_grad=True), 'C*SIN + C*SIN + C*SIN')
Laplace_prior	 (tensor(-132.0182, dtype=torch.float64, requires_grad=True), 'C*SIN')
PER
Laplace	 (tensor([[251.2515]], dtype=torch.float64, requires_grad=True), 'C*SIN + C*SIN')
MC	 (tensor(36.5242), 'C*SIN + C*SIN')
MLL	 (tensor(317.6550, requires_grad=True), 'C*SIN + C*SIN')
AIC	 (tensor(621.3099, requires_grad=True), 'C*S

In [None]:
result_tree

(Node('/results/attributes'),
 Node('/results/attributes/eval_START'),
 Node('/results/attributes/eval_END'),
 Node('/results/attributes/eval_COUNT'),
 Node('/results/attributes/optimizer'),
 Node('/results/attributes/train_iters'),
 Node('/results/attributes/LR'),
 Node('/results/attributes/BFGS'),
 Node('/results/attributes/data_gen'),
 Node('/results/AIC'),
 Node('/results/AIC/SIN*RBF'),
 Node('/results/AIC/SIN*RBF/loss'),
 Node('/results/AIC/SIN*RBF/details'),
 Node('/results/AIC/SIN*RBF/details/correction term'),
 Node('/results/AIC/SIN*RBF/details/loss term'),
 Node('/results/AIC/SIN*RBF/parameter_punishment'),
 Node('/results/AIC/C*C*RBF'),
 Node('/results/AIC/C*C*RBF/loss'),
 Node('/results/AIC/C*C*RBF/details'),
 Node('/results/AIC/C*C*RBF/details/correction term'),
 Node('/results/AIC/C*C*RBF/details/loss term'),
 Node('/results/AIC/C*C*RBF/parameter_punishment'),
 Node('/results/AIC/C*RBF'),
 Node('/results/AIC/C*RBF/loss'),
 Node('/results/AIC/C*RBF/details'),
 Node('/resul

In [None]:
#naming_schema = ["Metric", "Kernel_search", "train_data_ratio", "Data_kernel", "weights", "Variance_list", "eval_START", "eval_END", "eval_COUNT", "optimizer", "train_iterations", "LR", "Noise", "Data_scaling", "BFGS"]
for key in ["Laplace", "MLL", "AIC"]:
    print(key)
    all_results = load_results('results', key)
    print(len(all_results))
    result_tree = generate_tree_structure(all_results[0]).descendants
    pprint.pprint(result_tree)

    pprint.pprint(filter_value(all_results, '/results/results/final model'))
    pprint.pprint(filter_value(all_results, '/results/results/details'))

"""
print(filter_value(all_results, '/results/results/final model'))

#########
"""
    #Looking at the final models that were found and the ratio of exactly the correct one
"""
#########
print("\n########## Laplace ##########\n")
all_final_models = filter_value(all_results, '/results/results/final model')
winners = group_by(all_results, 'results/results/final model', value='(c * PER)')['results/results/final model = (c * PER)']
print(f"Percentage of exactly correct model: {len(winners)/len(all_final_models)*100}")
print(f"Percentage of correct component: {np.sum(['PER' in m for m in all_final_models])}")
print("\n##############\n")
Laplace_runtimes = filter_value(all_results, '/results/results/details/Total time')
#training_runtimes = filter_value(all_results, '/results/results/Training time')
#KS_runtimes = filter_value(all_results, '/results/results/Kernel search time')
print(f"Average Laplace runtime: {np.mean(Laplace_runtimes)}")
#print(f"Average Laplace runtime: {np.mean(KS_runtimes)}")

print("\n#############################\n")

"""

Laplace
100
(Node('/results/results'),
 Node('/results/results/Kernel search time'),
 Node('/results/results/details'),
 Node('/results/results/total count'),
 Node('/results/results/explosion count'),
 Node('/results/results/model history'),
 Node('/results/results/performance history'),
 Node('/results/results/loss history'),
 Node('/results/results/final model'),
 Node('/results/results/parameters'),
 Node('/results/results/parameters/likelihood.noise_covar.raw_noise'),
 Node('/results/results/parameters/covar_module.raw_outputscale'),
 Node('/results/results/parameters/covar_module.base_kernel.raw_lengthscale'),
 Node('/results/attributes'),
 Node('/results/attributes/Kernel_search'),
 Node('/results/attributes/train_data_ratio'),
 Node('/results/attributes/Data_kernel'),
 Node('/results/attributes/weights'),
 Node('/results/attributes/Variance_list'),
 Node('/results/attributes/eval_START'),
 Node('/results/attributes/eval_END'),
 Node('/results/attributes/eval_COUNT'),
 Node('/re

'\n#########\nprint("\n########## Laplace ##########\n")\nall_final_models = filter_value(all_results, \'/results/results/final model\')\nwinners = group_by(all_results, \'results/results/final model\', value=\'(c * PER)\')[\'results/results/final model = (c * PER)\']\nprint(f"Percentage of exactly correct model: {len(winners)/len(all_final_models)*100}")\nprint(f"Percentage of correct component: {np.sum([\'PER\' in m for m in all_final_models])}")\nprint("\n##############\n")\nLaplace_runtimes = filter_value(all_results, \'/results/results/details/Total time\')\n#training_runtimes = filter_value(all_results, \'/results/results/Training time\')\n#KS_runtimes = filter_value(all_results, \'/results/results/Kernel search time\')\nprint(f"Average Laplace runtime: {np.mean(Laplace_runtimes)}")\n#print(f"Average Laplace runtime: {np.mean(KS_runtimes)}")\n\nprint("\n#############################\n")\n\n'