## Create Table to summarize results

This notebook creates a table with the normalized NLLHs for different models. It expects to find pickled predictions for 10 folds with the following filenames:

 * ```<scenario_name>.floc.<dist_name>.<fold>.100.<model>.pkl```

`eval_model.py` automatically creates such files. 

**NOTE:** The first cell contains some variables that need to be adjusted. Evaluating all scenarios might take a while.

In [None]:
import os
import pickle
import sys

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import scipy.stats as scst
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import tabulate

% matplotlib inline

sys.path.append("../")
from helper import load_data, preprocess, data_source_release

mean_exp = ["rf", "nn"]
sc_dict = data_source_release.get_sc_dict()

# 1) Select the distribution you want to fit
floc_exp = ["lognormal_distfit", "lognormal_distfit.rf2", "lognormal_distfit.rf", "lognormal_nn"]
distribution_hndl = scst.distributions.lognorm
#floc_exp = ["expon_distfit", "expon_distfit.rf2", "expon_distfit.rf", "expon_nn"]
#distribution_hndl = scst.distributions.expon
#floc_exp = ["invgauss_distfit", "invgauss_distfit.rf2", "invgauss_distfit.rf", "invgauss_nn"]
#distribution_hndl = scst.distributions.invgauss

# 2) Change the following to existing paths
output_dir = "../"
save_dir = "../results/"

# 3) Select whethe to compute results for all scenarios or just a single scenario
SCENARIOS = []
#SCENARIOS = ['clasp_factoring', ]
#SCENARIOS = ['saps-CVVAR', ]
#SCENARIOS = ['lpg-zeno', ]
#SCENARIOS = ['yalsat_qcp', ]
#SCENARIOS = ['spear_qcp', ]
#SCENARIOS = ['spear_smallworlds', ]
#SCENARIOS = ['yalsat_smallworlds', ]

load = False

In [None]:
res_dict = dict()
for fl in os.listdir(output_dir):
    if not ".100." in fl:
        # Select only one seed
        continue
    print(fl, output_dir)

    fl_path = os.path.join(output_dir, fl)
    
    if os.path.isdir(fl_path):
        continue
    
    if not "pkl" in fl_path:
        continue
    
    if "txt" in fl_path or "png" in fl_path:
        continue

    if not os.path.isfile(fl_path):
        fl_path = os.path.join(output_dir)

    with open(fl_path, "rb") as fh:
        pkl = pickle.load(fh)
        train_pred = np.array(pkl[0])
        val_pred = np.array(pkl[1])
        add_info = pkl[2]
        task = add_info["task"]
        model = add_info["model"]
        scenario = add_info["scenario"]
        fold = add_info["fold"]

        if "nn" in add_info["model"] and add_info["task"] == "floc":
            print("%20s" % scenario, model, fold, add_info["loaded"])
    if scenario not in res_dict:
        res_dict[scenario] = dict()

    if model not in floc_exp and model not in mean_exp:
        continue

    if task not in res_dict[scenario]:
        res_dict[scenario][task] = dict()
        
    if model not in res_dict[scenario][task]:
        res_dict[scenario][task][model] = dict()

    res_dict[scenario][task][model][fold] = (train_pred, val_pred)

if len(SCENARIOS) == 0:
    SCENARIOS = list(res_dict.keys())

In [None]:
for scen in sorted(SCENARIOS):
    print("### %s" % scen)
    try:
        for model in sorted(res_dict[scen]["mean"]):
            print("mean %20s" % model, len(res_dict[scen]["mean"][model]))
    except:
        pass
    for model in sorted(res_dict[scen]["floc"]):
        print("floc %20s" % model, len(res_dict[scen]["floc"][model]))

In [None]:
nllh_dict = dict()
mean_dict = dict()
var_dict = dict()
if load:
    with open(save_dir + "/" + distribution_hndl.name + ".nllh.pkl", "rb") as fh:
        nllh_dict = pickle.load(fh)
        
    with open(save_dir + "/" + distribution_hndl.name + ".mean.pkl", "rb") as fh:
        mean_dict = pickle.load(fh)
        
    with open(save_dir + "/" + distribution_hndl.name + ".var.pkl", "rb") as fh:
        var_dict = pickle.load(fh)
    print("LOADED")
else:
    for scen in SCENARIOS:
        print("##### %s #####" % scen)
        nllh_dict[scen] = dict()
        mean_dict[scen] = {"true": {"train": [], "val": []}, "floc": dict(), "mean": dict()}
        var_dict[scen] = {"true": {"train": [], "val": []}, "floc": dict(), "mean": dict()}

        # 1) Load data
        sc_dict = data_source_release.get_sc_dict()
        data_dir = data_source_release.get_data_dir()

        runtimes, features, sat_ls = load_data.\
            get_data(scenario=scen, data_dir=data_dir,
                     sc_dict=sc_dict, retrieve=sc_dict[scen]['use'])

        # We do not need any features
        del features

        # Get CV splits
        idx = list(range(runtimes.shape[0]))
        kf = KFold(n_splits=10, shuffle=True, random_state=0)
        fold = -1
        for train, valid in kf.split(idx):
            fold += 1
            #print("## Split %d" % ctr)

            y_tra_run = runtimes[train]
            y_val_run = runtimes[valid]

            y_max_ = np.max(y_tra_run)
            y_min_ = 0
            y_tra_run = (y_tra_run - y_min_) / y_max_
            y_val_run = (y_val_run - y_min_) / y_max_

            task = "floc"
            for model in floc_exp:
                if model not in res_dict[scen][task]:
                    continue

                if model not in nllh_dict[scen]:
                    nllh_dict[scen][model] = np.zeros([10, 2]) * np.nan
                    mean_dict[scen][task][model] = {"train": list(), "val": list()}
                    var_dict[scen][task][model] = {"train": list(), "val": list()}

                if fold not in res_dict[scen][task][model]:
                    print("FAILED, %s, %s, %s, fold %d" % (scen, task, model, fold))
                    continue

                for observations, data, idx in ([y_tra_run, "train", 0], [y_val_run, "val", 1]):
                    assert observations.shape[0] == res_dict[scen][task][model][fold][idx].shape[0]
                    nllh = list()
                    mean = list()
                    var = list()
                    for obs, p in zip(observations, res_dict[scen][task][model][fold][idx]):
                        if distribution_hndl.name == "expon":
                            if type(p) == np.float64: 
                                p = [p, ]
                            if len(p) == 1: p = [0, p[0]]
                            assert p[0] == 0
                        else:
                            if len(p) == 2: p = [p[0], 0, p[1]]
                            assert p[1] == 0
                        nllh_per_inst = distribution_hndl.logpdf(obs, *p[:-2], loc=p[-2], scale=p[-1]) + np.log(max(obs))              
                        nllh_per_inst = np.mean(-nllh_per_inst)
                        nllh.append(nllh_per_inst)
                        mean.append(distribution_hndl.mean(*p[:-2], loc=p[-2], scale=p[-1]))
                        var.append(distribution_hndl.var(*p[:-2], loc=p[-2], scale=p[-1]))
                    nllh_dict[scen][model][fold, idx] = np.mean(nllh)
                    mean_dict[scen][task][model][data].append(mean)
                    var_dict[scen][task][model][data].append(var)

            task = "mean"
            if task not in res_dict[scen]:
                continue
            for model in mean_exp:
                if model not in res_dict[scen][task]:
                    continue
                if model not in mean_dict[scen][task]:
                    mean_dict[scen][task][model] = {"train": list(), "val": list()}
                    var_dict[scen][task][model] = {"train": list(), "val": list()}
                if fold not in res_dict[scen][task][model]:
                    print("FAILED, %s, %s, %s, fold %d" % (scen, task, model, fold))
                    continue

                for observations, data, idx in ([y_tra_run, "train", 0], [y_val_run, "val", 1]):
                    assert observations.shape[0] == len(res_dict[scen][task][model][fold][idx]), (model, scenario, y_tra_run.shape[0], len(res_dict[scen][task][model][fold][idx]))
                    mean_ls = list()
                    tr_mean = list()
                    tr_var = list()
                    for obs, mean in zip(observations, res_dict[scen][task][model][fold][idx]):
                        mean_ls.append(mean)
                        tr_mean.append(np.mean(obs))
                        tr_var.append(np.var(obs, ddof=1))

                    mean_dict[scen][task][model][data].append(mean_ls)
                    if len(mean_dict[scen]["true"][data]) == fold:
                        mean_dict[scen]["true"][data].append(tr_mean)
                        var_dict[scen]["true"][data].append(tr_var)
    with open(save_dir + "/" + distribution_hndl.name + ".nllh.pkl", "wb") as fh:
        pickle.dump(nllh_dict, fh)

    with open(save_dir + "/" + distribution_hndl.name + ".mean.pkl", "wb") as fh:
        pickle.dump(mean_dict, fh)

    with open(save_dir + "/" + distribution_hndl.name + ".var.pkl", "wb") as fh:
        pickle.dump(var_dict, fh)
    print("dumped to " + output_dir)

In [None]:
# Generate NLLH table

# nan means there are some folds missing
# inf means there was no results file found
tab_data = list()
header = ["sc"] + floc_exp
for scen in sorted(nllh_dict.keys()):
    dummy_array = np.ones([2, 2]) * np.inf
    tab_data.append([scen] + [np.mean(nllh_dict[scen].get(m, dummy_array)[:, 0]) for m in header[1:]])
    tab_data.append(["", ] + [np.mean(nllh_dict[scen].get(m, dummy_array)[:, 1]) for m in header[1:]])

a = tabulate.tabulate(tab_data, headers=header, floatfmt="5.3f")
print(a)

with open("%s/nllh_%s.txt" % (save_dir, distribution_hndl.name), "w") as fh:
    fh.write(a)