In [23]:
import copy
import json
import os
from pathlib import Path

import numpy as np
import pandas as pd
import wandb
from omegaconf import OmegaConf
from scipy.stats import ttest_ind

In [38]:
WANDB_RUNS = OmegaConf.create(
    {
#         "boston": "aalto-ml/sl-fast-updates/jb8zuhnp",
#         "boston": "aalto-ml/sl-fast-updates/n0tpvytt",
#         "airfoil": "aalto-ml/sl-fast-updates/7l0oc7tu",
        "boston": "aalto-ml/sl-fast-updates/2vxkwnxl", # triton
        "airfoil": "aalto-ml/sl-fast-updates/d1agchis", # triton
        "protein": "aalto-ml/sl-fast-updates/8ad4u90g", # triton
#         "boston": "aalto-ml/sl-fast-updates/outqf2oo", # train from D1 and D2
#         "airfoil": "", # train from D1 and D2
    }
)

COLUMNS_TITLES = [
    "NN MAP",
    "BNN full",
    "GLM full",
    "GP Subset (GP)",
    "SFR (GP)",
    # "BNN full GRID",
    # "GLM full GRID",
    # "GP Subset (GP) BO",
    # "SFR (GP) BO",
]

COLUMNS_TITLES_MULTI = [
    ("$N$", ""),
    ("$D$", ""),
    ("$C$", ""),
    ("\sc nn map", ""),
    # (" ", "$N$"),
    # (" ", "$D$"),
    # (" ", "$C$"),
    # ("     ", "\sc nn map"),
    # ("No $\delta$ tuning", "\sc nn map"),
    ("\sc Laplace", "full"),
    ("\sc Laplace glm", "full"),
    ("{\sc gp} subset", "$M = 20\% \\text{ of } N$"),
    ("\our", "$M = 20\% \\text{ of } N$"),
    # ("$\delta$ tuning", "\sc bnn"),
    # ("$\delta$ tuning", "\sc glm"),
    # ("$\delta$ tuning", "{\sc gp} subset"),
    # ("$\delta$ tuning", "\our"),
]

COLUMNS_TITLES_DICT = {
    "NN MAP": "\sc nn map",
    "BNN full": "\sc bnn",
    "GLM full": "\sc glm",
    # "BNN full GRID": "\sc bnn",
    # "GLM full GRID": "\sc glm",
    "GP Subset (GP)": "{\sc gp} subset",
    "SFR (GP)": "\our",
    # "GP Subset (GP) BO": "{\sc gp} subset",
    # "SFR (GP) BO": "\our",
}
DATASETS = {
    "australian": "\sc Australian",
    "breast_cancer": "\sc Breast cancer",
    "ionosphere": "\sc Ionosphere",
    "glass": "\sc Glass",
    "vehicle": "\sc Vehicle",
    "waveform": "\sc Waveform",
    "digits": "\sc Digits",
    "satellite": "\sc Satellite",
}
NUM_DATAS = {
    "australian": 690,
    "breast_cancer": 683,
    "ionosphere": 351,
    "glass": 214,
    "vehicle": 846,
    "waveform": 1000,
    "digits": 1797,
    "satellite": 6435,
}
INPUT_DIMS = {
    "australian": 14,
    "breast_cancer": 10,
    "ionosphere": 34,
    "glass": 9,
    "vehicle": 18,
    "waveform": 21,
    "digits": 64,
    "satellite": 35,
}


NUM_CLASSES = {
    "australian": 2,
    "breast_cancer": 2,
    "ionosphere": 2,
    "glass": 6,
    "vehicle": 4,
    "waveform": 3,
    "digits": 10,
    "satellite": 6,
}

In [39]:
run = wandb.init()

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01670420138333005, max=1.0)…

In [40]:
table_name = "NLPD"
table_name = "Metrics"
dfs = []
for dataset in WANDB_RUNS.keys():
    print("Data set: {}".format(dataset))
    run_id = WANDB_RUNS[dataset]
    print("Getting data for seed with run_id: {}".format(run_id))
    table_artifact = run.use_artifact(
                run_id.split("/")[0]
                + "/"
                + run_id.split("/")[1]
                + "/run-"
                + run_id.split("/")[2]
                + "-"
                + table_name
                + ":latest",
                # + "-NLPD:latest",
                type="run_table",
            )
    save_dir = os.path.join("old-artifacts", table_artifact.name)
    if not Path(save_dir).exists():
        # download and get table from wandb
        print("Downloading artifact")
        table_artifact.download()
        table = table_artifact.get(table_name)
        df = pd.DataFrame(data=table.data, columns=table.columns)
    else:
        # get dataframe from json
        print("Loading artifact from json")
            
        table_path = f"{save_dir}/{table_name}.table.json"

        with open(table_path) as file:
            json_dict = json.load(file)

        df = pd.DataFrame(json_dict["data"], columns=json_dict["columns"])
    print(df)
    dfs.append(df)
df = pd.concat(dfs)
print(30 * "-")
print(df)

Data set: boston
Getting data for seed with run_id: aalto-ml/sl-fast-updates/2vxkwnxl
Downloading artifact


[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  


   dataset     model   seed  num_inducing acc      nlpd ece       mse  \
0   boston    NN MAP     42           NaN      0.172699      0.058095   
1   boston  SFR (GP)     42         128.0      0.213941      0.094116   
2   boston  SFR (GP)     42         128.0      0.143940      0.187193   
3   boston    NN MAP     42           NaN      0.273552      0.041523   
4   boston  SFR (GP)     42         128.0      0.117305      0.059619   
5   boston    NN MAP     42           NaN      0.152292      0.039728   
6   boston  SFR (GP)     42         128.0      0.116249      0.056885   
7   boston    NN MAP     42           NaN      0.325562      0.059409   
8   boston  SFR (GP)     42         128.0      0.076843      0.035625   
9   boston    NN MAP    100           NaN      0.390829      0.123710   
10  boston  SFR (GP)    100         128.0      0.218388      0.354791   
11  boston  SFR (GP)    100         128.0      0.182122      0.468148   
12  boston    NN MAP    100           NaN      0.04

[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  


    dataset     model   seed  num_inducing acc      nlpd ece       mse  \
0   airfoil    NN MAP     42           NaN      6.263195      0.434843   
1   airfoil  SFR (GP)     42         128.0      0.567218      1.127642   
2   airfoil  SFR (GP)     42         128.0      0.535224      0.268844   
3   airfoil    NN MAP     42           NaN      0.082586      0.015205   
4   airfoil  SFR (GP)     42         128.0      0.503456      0.016848   
5   airfoil    NN MAP     42           NaN      0.137894      0.023671   
6   airfoil  SFR (GP)     42         128.0      0.492506      0.024569   
7   airfoil    NN MAP     42           NaN      0.134163      0.024534   
8   airfoil  SFR (GP)     42         128.0      0.496800      0.021662   
9   airfoil    NN MAP    100           NaN      2.163439      0.193873   
10  airfoil  SFR (GP)    100         128.0      0.550517      1.541458   
11  airfoil  SFR (GP)    100         128.0      0.480724      0.246924   
12  airfoil    NN MAP    100          

[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  


    dataset     model   seed  num_inducing acc      nlpd ece        mse  \
0   protein    NN MAP     42           NaN      0.132023       0.069541   
1   protein  SFR (GP)     42        1024.0      0.435724       4.345885   
2   protein  SFR (GP)     42        1024.0      0.159852       0.070752   
3   protein    NN MAP     42           NaN      0.130201       0.067723   
4   protein  SFR (GP)     42        1024.0      0.149093       0.067108   
5   protein    NN MAP     42           NaN      0.130917       0.068583   
6   protein  SFR (GP)     42        1024.0      0.140504       0.068205   
7   protein    NN MAP     42           NaN      0.126606       0.063312   
8   protein  SFR (GP)     42        1024.0      0.139203       0.063483   
9   protein    NN MAP    100           NaN      0.130121       0.067470   
10  protein  SFR (GP)    100        1024.0      0.425011       5.628676   
11  protein  SFR (GP)    100        1024.0      0.151537       0.099048   
12  protein    NN MAP    

In [41]:
# Only keeps models we want in table
df = df[
    df["method"].isin(
        [
            "Train D1",
            "Train D1 -> Update D2",
            "Train D1+D2",
#             "Train D1 -> Train D1+D2",
        ]
    )
]
df = df[
    df["model"].isin(
        [
            "SFR (GP)",
        ]
    )
]
print(df)

    dataset     model   seed  num_inducing acc      nlpd ece        mse  \
1    boston  SFR (GP)     42         128.0      0.213941       0.094116   
2    boston  SFR (GP)     42         128.0      0.143940       0.187193   
6    boston  SFR (GP)     42         128.0      0.116249       0.056885   
10   boston  SFR (GP)    100         128.0      0.218388       0.354791   
11   boston  SFR (GP)    100         128.0      0.182122       0.468148   
15   boston  SFR (GP)    100         128.0      0.137587       0.038704   
19   boston  SFR (GP)     48         128.0      0.249671       0.237561   
20   boston  SFR (GP)     48         128.0      0.145947       0.195901   
24   boston  SFR (GP)     48         128.0      0.124362       0.137787   
28   boston  SFR (GP)    412         128.0      0.258044       2.121921   
29   boston  SFR (GP)    412         128.0      0.204698       0.452240   
33   boston  SFR (GP)    412         128.0      0.103737       0.034442   
37   boston  SFR (GP)  46

In [42]:
df_with_stats = (
    df.groupby(["dataset", "method"])
    .agg(
        nlpd_mean=("nlpd", "mean"),
        nlpd_std=("nlpd", "std"),
        time_mean=("time", "mean"),
        time_std=("time", "std"),
        time_count=("time", "count"),
    )
    .reset_index()
)
print(df_with_stats)

   dataset                 method  nlpd_mean  nlpd_std  time_mean  time_std  \
0  airfoil               Train D1   0.568197  0.014776   9.138226  0.969055   
1  airfoil  Train D1 -> Update D2   0.523564  0.028442   0.076515  0.002528   
2  airfoil            Train D1+D2   0.508950  0.013600   8.947857  0.168052   
3   boston               Train D1   0.237513  0.019943  10.906910  1.044851   
4   boston  Train D1 -> Update D2   0.167867  0.025691   0.039092  0.000369   
5   boston            Train D1+D2   0.126152  0.017668  10.298200  0.280920   
6  protein               Train D1   0.439546  0.020930   9.101285  1.332234   
7  protein  Train D1 -> Update D2   0.158301  0.007530   0.810522  0.112258   
8  protein            Train D1+D2   0.137291  0.005571  11.529008  2.707221   

   time_count  
0           5  
1           5  
2           5  
3           5  
4           5  
5           5  
6           5  
7           5  
8           5  


In [43]:
def bold_if_significant(row):
    # print(f"ROW ROW: {row}")
#     print(f"Model: {row['model']}")
#     print(f"tstatistic {row['tstatistic']}")
#     print(f"pvalue{row['pvalue']}")
#     if row["pvalue"] < 0.05:
        # reject the null hypothesis of identical avg NLPDs
#         mean = f"{row['mean']:.2f}"
#         std = f"{(row['std']):.2f}"
#     else:
#         mean = "\mathbf{" + f"{row['mean']:.2f}" + "}"
#         std = "\mathbf{" + f"{(row['std']):.2f}" + "}"
#     if "NN MAP" in row["model"]:
#         mean = f"{row['mean']:.2f}"
#         std = f"{(row['std']):.2f}"
    mean = f"{row['mean']:.2f}"
    std = f"{(row['std']):.2f}"
    return "\\val{" + mean + "}{" + std + "}"

def bold_time(row):
    mean = f"{row['time_mean']:.2f}"
    std = f"{(row['time_std']):.2f}"
    return "\\val{" + mean + "}{" + std + "}"

def bold_nlpd(row):
    mean = f"{row['nlpd_mean']:.2f}"
    std = f"{(row['nlpd_std']):.2f}"
    return "\\val{" + mean + "}{" + std + "}"

In [44]:
df_with_stats["nlpd_mean_pm_std"] = df_with_stats.apply(bold_nlpd, axis=1)
df_with_stats["time_mean_pm_std"] = df_with_stats.apply(bold_time, axis=1)
print(df_with_stats)

   dataset                 method  nlpd_mean  nlpd_std  time_mean  time_std  \
0  airfoil               Train D1   0.568197  0.014776   9.138226  0.969055   
1  airfoil  Train D1 -> Update D2   0.523564  0.028442   0.076515  0.002528   
2  airfoil            Train D1+D2   0.508950  0.013600   8.947857  0.168052   
3   boston               Train D1   0.237513  0.019943  10.906910  1.044851   
4   boston  Train D1 -> Update D2   0.167867  0.025691   0.039092  0.000369   
5   boston            Train D1+D2   0.126152  0.017668  10.298200  0.280920   
6  protein               Train D1   0.439546  0.020930   9.101285  1.332234   
7  protein  Train D1 -> Update D2   0.158301  0.007530   0.810522  0.112258   
8  protein            Train D1+D2   0.137291  0.005571  11.529008  2.707221   

   time_count  nlpd_mean_pm_std   time_mean_pm_std  
0           5  \val{0.57}{0.01}   \val{9.14}{0.97}  
1           5  \val{0.52}{0.03}   \val{0.08}{0.00}  
2           5  \val{0.51}{0.01}   \val{8.95}{0.17}

In [45]:
# df_with_stats = df_with_stats.drop(columns=['time_count'])
# df_with_stats = df_with_stats.drop(columns=['time_mean', 'time_std', 'nlpd_mean', 'nlpd_std'])
# # df_with_stats.rename(columns=[""])
# print(df_with_stats)

In [46]:
# df_with_stats.rename(columns={"nlpd_mean_pm_std": "NLPD", "time_mean_pm_std": "Time"}, inplace=True)
# df_with_stats.rename(index={"boston": "\sc Boston", "Train D1": "No updates", "Train D1 -> Update D2": "Fast updates", "Train D1+D2": "Retrain"}, inplace=True)

In [47]:
print(df_with_stats.index)
updates_table = df_with_stats.pivot(
    index="dataset",
    # index=["dataset", "N", "D", "C"],
    columns="method",
    values=["nlpd_mean_pm_std", "time_mean_pm_std"],
#     values="nlpd_mean_pm_std",
)
print(updates_table)

RangeIndex(start=0, stop=9, step=1)
         nlpd_mean_pm_std                                          \
method           Train D1 Train D1 -> Update D2       Train D1+D2   
dataset                                                             
airfoil  \val{0.57}{0.01}      \val{0.52}{0.03}  \val{0.51}{0.01}   
boston   \val{0.24}{0.02}      \val{0.17}{0.03}  \val{0.13}{0.02}   
protein  \val{0.44}{0.02}      \val{0.16}{0.01}  \val{0.14}{0.01}   

          time_mean_pm_std                                           
method            Train D1 Train D1 -> Update D2        Train D1+D2  
dataset                                                              
airfoil   \val{9.14}{0.97}      \val{0.08}{0.00}   \val{8.95}{0.17}  
boston   \val{10.91}{1.04}      \val{0.04}{0.00}  \val{10.30}{0.28}  
protein   \val{9.10}{1.33}      \val{0.81}{0.11}  \val{11.53}{2.71}  


In [48]:
updates_table.index.names = [None]
updates_table.columns.names = [None, None]
print(updates_table)

         nlpd_mean_pm_std                                          \
                 Train D1 Train D1 -> Update D2       Train D1+D2   
airfoil  \val{0.57}{0.01}      \val{0.52}{0.03}  \val{0.51}{0.01}   
boston   \val{0.24}{0.02}      \val{0.17}{0.03}  \val{0.13}{0.02}   
protein  \val{0.44}{0.02}      \val{0.16}{0.01}  \val{0.14}{0.01}   

          time_mean_pm_std                                           
                  Train D1 Train D1 -> Update D2        Train D1+D2  
airfoil   \val{9.14}{0.97}      \val{0.08}{0.00}   \val{8.95}{0.17}  
boston   \val{10.91}{1.04}      \val{0.04}{0.00}  \val{10.30}{0.28}  
protein   \val{9.10}{1.33}      \val{0.81}{0.11}  \val{11.53}{2.71}  


In [49]:
updates_table.rename(columns={"nlpd_mean_pm_std": "NLPD", 
                              "time_mean_pm_std": "Time (s)", 
                              "Train D1": "Train w. $\mathcal{D}_1$", 
                              "Train D1 -> Update D2": "Updates w. $\mathcal{D}_2$", 
                              "Train D1+D2": "Retrain w. $\mathcal{D}_1 \cup \mathcal{D}_2$",
#                               "Train D1 -> Train D1+D2": "Retrain",
                              "method": " "
                              }, inplace=True)
updates_table.rename(index={"boston": "\sc Boston",
                            "airfoil": "\sc Airfoil",
                            "protein": "\sc Protein",
                            "method": "",
                           }, inplace=True)

In [50]:
print(updates_table.columns)

MultiIndex([(    'NLPD',                      'Train w. $\mathcal{D}_1$'),
            (    'NLPD',                    'Updates w. $\mathcal{D}_2$'),
            (    'NLPD', 'Retrain w. $\mathcal{D}_1 \cup \mathcal{D}_2$'),
            ('Time (s)',                      'Train w. $\mathcal{D}_1$'),
            ('Time (s)',                    'Updates w. $\mathcal{D}_2$'),
            ('Time (s)', 'Retrain w. $\mathcal{D}_1 \cup \mathcal{D}_2$')],
           )


In [51]:
print(updates_table.to_latex(column_format="l|c|cc|c|cc", escape=False, multicolumn_format="c|"))

\begin{tabular}{l|c|cc|c|cc}
\toprule
{} & \multicolumn{3}{c|}{NLPD} & \multicolumn{3}{c|}{Time (s)} \\
{} & Train w. $\mathcal{D}_1$ & Updates w. $\mathcal{D}_2$ & Retrain w. $\mathcal{D}_1 \cup \mathcal{D}_2$ & Train w. $\mathcal{D}_1$ & Updates w. $\mathcal{D}_2$ & Retrain w. $\mathcal{D}_1 \cup \mathcal{D}_2$ \\
\midrule
\sc Airfoil &         \val{0.57}{0.01} &           \val{0.52}{0.03} &                              \val{0.51}{0.01} &         \val{9.14}{0.97} &           \val{0.08}{0.00} &                              \val{8.95}{0.17} \\
\sc Boston  &         \val{0.24}{0.02} &           \val{0.17}{0.03} &                              \val{0.13}{0.02} &        \val{10.91}{1.04} &           \val{0.04}{0.00} &                             \val{10.30}{0.28} \\
\sc Protein &         \val{0.44}{0.02} &           \val{0.16}{0.01} &                              \val{0.14}{0.01} &         \val{9.10}{1.33} &           \val{0.81}{0.11} &                             \val{11.53}{2.71} \\
\bot

  print(updates_table.to_latex(column_format="l|c|cc|c|cc", escape=False, multicolumn_format="c|"))
