In [2]:
## Goals of this experiment:
## - SGHMCHD is more robust with respect to different initial (smaller) stepsizes than SGHMC
## - SGHMCHD maintains reasonable uncertainty bounds of SGHMC 
%matplotlib inline
import numpy as np
import sys
from os.path import dirname, join as path_join, isdir
from glob import glob
import json
import pandas as pd

from collections import defaultdict

sys.path.insert(0, path_join("..", "..", ".."))

RESULT_DIR = path_join("..", "..", "..", "cluster_results", "uci_old")


def format_stepsize(stepsize):
    # XXX: Pretty formatting of different stepsizes to prettyprint in latex.
    return {
        "1e-09": "10^{{-9}}",
        "1e-07": "10^{{-7}}", 
        "1e-05": "10^{{-5}}",
    }.get(str(stepsize), str(stepsize))

def format_performance(mean, stddev):
    # XXX: Limit precision etc.
    return "${mean} \\pm {stddev}$".format(mean=round(mean, 3), stddev=round(stddev, 3))

def format_sampler(sampler, stepsize):
    # XXX: Prettyprint sampler with stepsize here.
    return "{sampler} $\epsilon_0 = {stepsize}$".format(
        sampler=sampler.upper(), stepsize=format_stepsize(stepsize)
    )

METHODS = set()

def load_results(directory):
    directories = [
        directory for directory in glob("{}/*".format(directory))
        if isdir(directory)
    ]
    
    def load_json(filename):
        with open(filename) as f:
            return json.load(f)
    
    configurations = tuple(
        load_json(path_join(directory, "config.json"))
        for directory in directories
        if "_sources" not in directory
    )
    
    results = tuple(
        load_json(path_join(directory, "run.json"))
        for directory in directories
        if "_sources" not in directory
    )
    
    
    runs = defaultdict(lambda: defaultdict(list))
    
    for configuration, result in zip(configurations, results):
        if result["status"] == "COMPLETED":
            method = format_sampler(
                sampler=configuration["sampler"],
                stepsize=configuration["stepsize"]
            )
            METHODS.add(method)
    
            for dataset, dataset_result in result["result"].items():
                dataset_result["dataset"] = dataset
                runs[dataset][method].append(dataset_result)
    
    return runs


def rename(name):
    dataset_mappings = {
        "BostonHousing": "Boston Housing",
        "keras.datasets.boston_housing": "Boston Housing",
        "YachtHydrodynamics": "Yacht Hydrodynamics",
        "WineQualityRed": "Wine Quality Red",
    }
    metric_mappings = {
        "rmse": "Root mean squared error~(RMSE)",
        "ll": "Log likelihood",
    }
    
    def get_first_matching(name, dictionaries, default=None):
        for dictionary in dictionaries:
            value = dictionary.get(name)
            if value is not None:
                return value
        if default is None:
            raise KeyError("'{name}' is not contained in any of the given dictionaries!".format(name=name))
        return default
        
    return get_first_matching(name, (dataset_mappings, metric_mappings), default=name)

runs = load_results(RESULT_DIR)  # runs is dict mapping dataset to dict mapping sampler to dict mapping stepsize to 
                                 # run result json content that we can use to measure performance.

DATASETS = ("BostonHousing", "YachtHydrodynamics", "Concrete", "WineQualityRed")

def results_for(metric_function):
    # return dataframe that has columns:
    # dataset, sampler, stepsize, mean_performance, stddev_performance
    
    # hereby, performance is defined by `metric_function`
    
    # XXX: check if any of the "SGHMC" results in supplementary are also
    # useful to have here.
    records = defaultdict(dict)
    if metric_function.__name__ == "log_likelihood":
        records["BostonHousing"] = {
            "VI": "-2.903 \\pm 0.071",
            "PBP": "-2.574 \\pm 0.089",
        }
        records["YachtHydrodynamics"] = {
            "VI": "-3.439 \\pm 0.163",
            "PBP": "-1.634 \\pm 0.016"
        }
        records["Concrete"] = {
            "VI": "-3.391 \\pm 0.017",
            "PBP": "-3.161 \\pm 0.019"
        }
        records["Wine Quality Red"] = {
            "VI": "-0.980 \\pm 0.013",
            "PBP": "-0.968 \\pm 0.014"
        }
    elif metric_function.__name__ == "rmse":
        records["Boston Housing"] = {
            "VI": "4.320 \\pm 0.2914",
            "PBP": "3.014 \\pm 0.1800",
        }
        records["Yacht Hydrodynamics"] = {
            "VI": "6.887 \\pm 0.6749",
            "PBP": "1.015 \\pm 0.0542"
        }
        records["Concrete"] = {
            "VI": "7.128 \\pm 0.1230",
            "PBP": "5.667 \\pm 0.0933"
        }
        records["Wine Quality Red"] = {
            "VI": "0.646 \\pm 0.0081",
            "PBP": "0.635 \\pm 0.0079"
        }    
    else:
        raise NotImplementedError(
           "We do not have any literature results for metric function {}".format(
           metric_function.__name__
        ))
    for dataset, method_results in runs.items():
        for method, method_runs in method_results.items():
            performances = np.asarray([
                    metric_function(y_true=run["y_test"],
                                    prediction_mean=run["prediction_mean"],
                                    prediction_variance=run["prediction_variance"])
                    for run in method_runs
                ]) 
            records[rename(dataset)][method] = format_performance(
                mean=performances.mean(), 
                stddev=performances.std()
            )
    columns = [rename(dataset) for dataset in DATASETS]
    dataframe = pd.DataFrame.from_records(
        records, columns=[rename(dataset) for dataset in DATASETS],
        # index = XXX: use this to enforce a specific ordering 
    )
    dataframe.metric_name = rename(metric_function.__name__)
    return dataframe

def prettyprint_results(dataframe):
    table = dataframe.to_latex(escape=False).format(
        "\\textbf{Method/Dataset}", tabular="{tabular}", **{"l" * len(METHODS): "{" + "l" * len(METHODS) + "}"}
    ).rstrip("\n")
    caption = """\\caption{{{metric} for regression benchmarks from the UCI repository.
For comparison we include results for VI~(variational inference) and
PBP~(probabilistic backpropagation) for Bayesian neural network
training taken from Hernández-Lobato and Adams~\\cite{{uci-vi-pbp}}.
We report mean $\\pm$ stddev across $10$ runs.}}""".format(metric=rename(dataframe.metric_name))
    return """
\\begin{{figure}}
{table}
{caption}
\\end{{figure}}
""".format(table=table, caption=caption)



In [3]:
from sklearn.metrics import mean_squared_error


def rmse(y_true, prediction_mean, **kwargs):
    return np.sqrt(
        mean_squared_error(y_true=y_true, y_pred=prediction_mean)
    )

# metrics all take "y_test, prediction_mean, prediction_variance
# as argument and return a scalar loss value.
metric_functions = (rmse,)

for metric_function in metric_functions:
    metric_results = results_for(metric_function)
    print(metric_results)
    results_tablecode = prettyprint_results(metric_results)
    print(results_tablecode)
    with open("/tmp/t.tex", "w") as f:
        f.write(results_tablecode)
    
                                                   
    # XXX: Write to latex

# XXX Construct dataframe from results that looks like in bohamiann supplementary
# => use smart renaming on the dataframe to clearly mark approaches etc.
# => call dataframe.to_latex(escape=False) like in old notebook to directly write results tables
# => drop results tables in a location that allows us to directly compile them into the thesis.
    
    

                                   Boston Housing Yacht Hydrodynamics  \
PBP                              3.014 \pm 0.1800    1.015 \pm 0.0542   
SGHMC $\epsilon_0 = 0.001$      $3.129 \pm 0.395$   $5.515 \pm 0.958$   
SGHMC $\epsilon_0 = 0.01$       $3.099 \pm 0.385$   $5.702 \pm 1.069$   
SGHMC $\epsilon_0 = 10^{{-5}}$  $3.099 \pm 0.385$   $5.702 \pm 1.069$   
SGHMC $\epsilon_0 = 10^{{-7}}$  $3.099 \pm 0.385$   $5.702 \pm 1.069$   
SGHMC $\epsilon_0 = 10^{{-9}}$  $3.099 \pm 0.385$   $5.702 \pm 1.069$   
VI                               4.320 \pm 0.2914    6.887 \pm 0.6749   

                                         Concrete   Wine Quality Red  
PBP                              5.667 \pm 0.0933   0.635 \pm 0.0079  
SGHMC $\epsilon_0 = 0.001$       $8.042 \pm 0.38$  $0.606 \pm 0.033$  
SGHMC $\epsilon_0 = 0.01$       $7.949 \pm 0.456$  $0.608 \pm 0.032$  
SGHMC $\epsilon_0 = 10^{{-5}}$  $7.949 \pm 0.456$  $0.608 \pm 0.032$  
SGHMC $\epsilon_0 = 10^{{-7}}$  $7.949 \pm 0.456$  $0.608 \p