## Create Table to summarize results

This notebook creates a table with the normalized NLLHs for different models. It expects to find pickled predictions for 10 folds with the following filenames:

 * ```<scenario_name>.floc.<dist_name>.<fold>.100.<model>.pkl```

`eval_model.py` automatically creates such files. 

**NOTE:** The first cell contains some variables that need to be adjusted. Evaluating all scenarios might take a while.

In [73]:
%matplotlib inline

import os
import pickle
import sys
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import scipy.stats as scst
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import tabulate

# 1) Paths
output_dir = "../../DistNet_nn_pkl"        # where the .pkl files live
save_dir   = "../results/"  # where tables / figure go
os.makedirs(save_dir, exist_ok=True)

# 2) Helper imports
sys.path.append("../")
from helper import load_data, preprocess, data_source_release

# 3) General settings
mean_exp = ["rf", "nn"]
sc_dict  = data_source_release.get_sc_dict()

# 4) Distribution you want to fit
floc_exp = ["lognormal_distfit",
            "lognormal_distfit.rf2",
            "lognormal_distfit.rf",
            "lognormal_nn"]
distribution_hndl = scst.distributions.lognorm

# 5) Scenarios to evaluate (empty list = all)
SCENARIOS = []    # or pick specific keys
load = False

In [74]:
res_dict = dict()
for fl in os.listdir(output_dir):
    if ".pkl" not in fl:
        # Select only one seed
        continue
    print(fl, output_dir)

    fl_path = os.path.join(output_dir, fl)
    
    if os.path.isdir(fl_path):
        continue
    
    if not "pkl" in fl_path:
        continue
    
    if "txt" in fl_path or "png" in fl_path:
        continue

    if not os.path.isfile(fl_path):
        fl_path = os.path.join(output_dir)

    with open(fl_path, "rb") as fh:
        pkl = pickle.load(fh)
        train_pred = np.array(pkl[0])
        val_pred = np.array(pkl[1])
        add_info = pkl[2]
        task = add_info["task"]
        model = add_info["model"]
        scenario = add_info["scenario"]
        fold = add_info["fold"]

        if "nn" in add_info["model"] and add_info["task"] == "floc":
            print("%20s" % scenario, model, fold, add_info["loaded"])
    if scenario not in res_dict:
        res_dict[scenario] = dict()

    if model not in floc_exp and model not in mean_exp:
        continue

    if task not in res_dict[scenario]:
        res_dict[scenario][task] = dict()
        
    if model not in res_dict[scenario][task]:
        res_dict[scenario][task][model] = dict()

    res_dict[scenario][task][model][fold] = (train_pred, val_pred)

if len(SCENARIOS) == 0:
    SCENARIOS = list(res_dict.keys())

spear_swgcp.floc.lognormal_nn.3.1.pkl ../../DistNet_nn_pkl
         spear_swgcp lognormal_nn 3 False
clasp_factoring.floc.lognormal_nn.3.1.pkl ../../DistNet_nn_pkl
     clasp_factoring lognormal_nn 3 False
yalsat_swgcp.floc.lognormal_nn.1.1.pkl ../../DistNet_nn_pkl
        yalsat_swgcp lognormal_nn 1 False
yalsat_swgcp.floc.lognormal_nn.3.1.pkl ../../DistNet_nn_pkl
        yalsat_swgcp lognormal_nn 3 False
clasp_factoring.floc.lognormal_nn.1.1.pkl ../../DistNet_nn_pkl
     clasp_factoring lognormal_nn 1 False
yalsat_qcp.floc.lognormal_nn.8.1.pkl ../../DistNet_nn_pkl
          yalsat_qcp lognormal_nn 8 False
spear_swgcp.floc.lognormal_nn.1.1.pkl ../../DistNet_nn_pkl
         spear_swgcp lognormal_nn 1 False
spear_swgcp.floc.lognormal_nn.5.1.pkl ../../DistNet_nn_pkl
         spear_swgcp lognormal_nn 5 False
yalsat_swgcp.floc.lognormal_nn.7.1.pkl ../../DistNet_nn_pkl
        yalsat_swgcp lognormal_nn 7 False
spear_qcp.floc.lognormal_nn.8.1.pkl ../../DistNet_nn_pkl
           spear_qcp log

In [75]:
for scen in sorted(SCENARIOS):
    print("### %s" % scen)
    try:
        for model in sorted(res_dict[scen]["mean"]):
            print("mean %20s" % model, len(res_dict[scen]["mean"][model]))
    except:
        pass
    for model in sorted(res_dict[scen]["floc"]):
        print("floc %20s" % model, len(res_dict[scen]["floc"][model]))

### clasp_factoring
floc         lognormal_nn 10
### lpg-zeno
floc         lognormal_nn 10
### saps-CVVAR
floc         lognormal_nn 10
### spear_qcp
floc         lognormal_nn 10
### spear_swgcp
floc         lognormal_nn 10
### yalsat_qcp
floc         lognormal_nn 10
### yalsat_swgcp
floc         lognormal_nn 10


In [76]:
nllh_dict = dict()
mean_dict = dict()
var_dict = dict()
if load:
    with open(save_dir + "/" + distribution_hndl.name + ".nllh.pkl", "rb") as fh:
        nllh_dict = pickle.load(fh)
        
    with open(save_dir + "/" + distribution_hndl.name + ".mean.pkl", "rb") as fh:
        mean_dict = pickle.load(fh)
        
    with open(save_dir + "/" + distribution_hndl.name + ".var.pkl", "rb") as fh:
        var_dict = pickle.load(fh)
    print("LOADED")
else:
    for scen in SCENARIOS:
        print("##### %s #####" % scen)
        nllh_dict[scen] = dict()
        mean_dict[scen] = {"true": {"train": [], "val": []}, "floc": dict(), "mean": dict()}
        var_dict[scen] = {"true": {"train": [], "val": []}, "floc": dict(), "mean": dict()}

        # 1) Load data
        sc_dict = data_source_release.get_sc_dict()
        data_dir = data_source_release.get_data_dir()

        runtimes, features, sat_ls = load_data.\
            get_data(scenario=scen, data_dir=data_dir,
                     sc_dict=sc_dict, retrieve=sc_dict[scen]['use'])

        # We do not need any features
        del features

        # Get CV splits
        idx = list(range(runtimes.shape[0]))
        kf = KFold(n_splits=10, shuffle=True, random_state=0)
        fold = -1
        for train, valid in kf.split(idx):
            fold += 1
            #print("## Split %d" % ctr)

            y_tra_run = runtimes[train]
            y_val_run = runtimes[valid]

            y_max_ = np.max(y_tra_run)
            y_min_ = 0
            y_tra_run = (y_tra_run - y_min_) / y_max_
            y_val_run = (y_val_run - y_min_) / y_max_

            task = "floc"
            for model in floc_exp:
                if model not in res_dict[scen][task]:
                    continue

                if model not in nllh_dict[scen]:
                    nllh_dict[scen][model] = np.zeros([10, 2]) * np.nan
                    mean_dict[scen][task][model] = {"train": list(), "val": list()}
                    var_dict[scen][task][model] = {"train": list(), "val": list()}

                if fold not in res_dict[scen][task][model]:
                    print("FAILED, %s, %s, %s, fold %d" % (scen, task, model, fold))
                    continue

                for observations, data, idx in ([y_tra_run, "train", 0], [y_val_run, "val", 1]):
                    assert observations.shape[0] == res_dict[scen][task][model][fold][idx].shape[0]
                    nllh = list()
                    mean = list()
                    var = list()
                    for obs, p in zip(observations, res_dict[scen][task][model][fold][idx]):
                        if distribution_hndl.name == "expon":
                            if type(p) == np.float64: 
                                p = [p, ]
                            if len(p) == 1: p = [0, p[0]]
                            assert p[0] == 0
                        else:
                            if len(p) == 2: p = [p[0], 0, p[1]]
                            assert p[1] == 0
                        nllh_per_inst = distribution_hndl.logpdf(obs, *p[:-2], loc=p[-2], scale=p[-1]) + np.log(max(obs))              
                        nllh_per_inst = np.mean(-nllh_per_inst)
                        nllh.append(nllh_per_inst)
                        mean.append(distribution_hndl.mean(*p[:-2], loc=p[-2], scale=p[-1]))
                        var.append(distribution_hndl.var(*p[:-2], loc=p[-2], scale=p[-1]))
                    nllh_dict[scen][model][fold, idx] = np.mean(nllh)
                    mean_dict[scen][task][model][data].append(mean)
                    var_dict[scen][task][model][data].append(var)

            task = "mean"
            if task not in res_dict[scen]:
                continue
            for model in mean_exp:
                if model not in res_dict[scen][task]:
                    continue
                if model not in mean_dict[scen][task]:
                    mean_dict[scen][task][model] = {"train": list(), "val": list()}
                    var_dict[scen][task][model] = {"train": list(), "val": list()}
                if fold not in res_dict[scen][task][model]:
                    print("FAILED, %s, %s, %s, fold %d" % (scen, task, model, fold))
                    continue

                for observations, data, idx in ([y_tra_run, "train", 0], [y_val_run, "val", 1]):
                    assert observations.shape[0] == len(res_dict[scen][task][model][fold][idx]), (model, scenario, y_tra_run.shape[0], len(res_dict[scen][task][model][fold][idx]))
                    mean_ls = list()
                    tr_mean = list()
                    tr_var = list()
                    for obs, mean in zip(observations, res_dict[scen][task][model][fold][idx]):
                        mean_ls.append(mean)
                        tr_mean.append(np.mean(obs))
                        tr_var.append(np.var(obs, ddof=1))

                    mean_dict[scen][task][model][data].append(mean_ls)
                    if len(mean_dict[scen]["true"][data]) == fold:
                        mean_dict[scen]["true"][data].append(tr_mean)
                        var_dict[scen]["true"][data].append(tr_var)
    with open(save_dir + "/" + distribution_hndl.name + ".nllh.pkl", "wb") as fh:
        pickle.dump(nllh_dict, fh)

    with open(save_dir + "/" + distribution_hndl.name + ".mean.pkl", "wb") as fh:
        pickle.dump(mean_dict, fh)

    with open(save_dir + "/" + distribution_hndl.name + ".var.pkl", "wb") as fh:
        pickle.dump(var_dict, fh)
    print("dumped to " + output_dir)

##### spear_swgcp #####
Train data loaded
Test data loaded
/Users/gurmehersinghpuri/Freiburg/DL_LAB/Project/DistNet/notebooks/../data/spear_smallworlds/features.txt
(12499, 100) (12499, 100)
Discarding 0 (12499) instances because of CRASHED
Discarding 34 (12499) instances because of TIMEOUT
Discarding 0 (12465) instances because not stated TIMEOUTS
Discarding 0 (12465) instances because of constant features
Discarding 1283 (12465) instances because of UNSAT
(11182, 100)
##### clasp_factoring #####
Train data loaded
Test data loaded
/Users/gurmehersinghpuri/Freiburg/DL_LAB/Project/DistNet/notebooks/../data/clasp-3.0.4-p8_rand_factoring/features.txt
(2000, 113) (2000, 100)
Discarding 0 (2000) instances because of CRASHED
Discarding 0 (2000) instances because of TIMEOUT
Discarding 0 (2000) instances because not stated TIMEOUTS
Discarding 0 (2000) instances because of constant features
Discarding 0 (2000) instances because of UNSAT
(2000, 113)
##### yalsat_swgcp #####
Train data loaded
Tes

In [77]:
import numpy as np
import tabulate

# (your existing nllh_dict & floc_exp code here…)

# Build full table as before
tab_data = []
header   = ["sc"] + floc_exp  # e.g. ["sc", "invgauss_nn", "lognormal_nn", "expon_nn", ...]

for scen in sorted(nllh_dict):
    dummy = np.ones([2,2]) * np.inf
    # row[0] = train NLLH mean; row[1] = test NLLH mean for each model
    tab_data.append([scen] + [ np.mean(nllh_dict[scen].get(m, dummy)[:,0]) for m in header[1:] ])
    tab_data.append([""]   + [ np.mean(nllh_dict[scen].get(m, dummy)[:,1]) for m in header[1:] ])

# Now pick out only the DistNet column:
distnet_col = "lognormal_nn"            # change to whatever your model key is
idx = header.index(distnet_col)

# Build a reduced table with only “sc” and that one column
new_header = ["sc", distnet_col]
new_data   = []
for row in tab_data:
    new_data.append([ row[0], row[idx] ])  # row[0]=scenario name, row[idx]=DistNet value

# Print & save
out = tabulate.tabulate(new_data, headers=new_header, floatfmt="5.3f")
print(out)

with open(f"{save_dir}/nllh_{distribution_hndl.name}_distnet_only.txt", "w") as fh:
    fh.write(out)

sc                 lognormal_nn
---------------  --------------
clasp_factoring          -0.264
                         -0.084
lpg-zeno                 -0.860
                         -0.855
saps-CVVAR               -0.673
                         -0.606
spear_qcp                -1.129
                         -1.106
spear_swgcp              -0.528
                         -0.455
yalsat_qcp               -0.767
                         -0.764
yalsat_swgcp             -0.824
                         -0.800
