In [54]:
"""
Notebook for post-processing the results
"""
import os
import numpy as np
import pandas as pd

database = "./database/"
filename = "input-uci-crispr-experiments.csv"

# Read Data
data = pd.read_csv(os.path.join(database,filename))
## Sort out the job that has been finished
finished_data = data[data['State'] == 'finished']

def get_mean_std(dt):
    """Calculate the Mean and Std of the results"""
    s = []
    for col in dt.columns:
        s.append((np.mean(dt[col]), np.std(dt[col])))
    return s

image = ['mnist', 'fmnist', 'cifar10']
ls = []

for dataset in image:
    result = {}
    data_class = finished_data[(finished_data['dataset'] == dataset)]

    # homoscedastic
    homo_data = data_class[(data_class['likelihood'] == 'homoscedastic') & (data_class['method'] == 'marglik')]
    homo_eb_pp = homo_data[['test/loglik_bayes', 'test/kl_div', 'test/rmse']]
    result['homo'] = get_mean_std(homo_eb_pp)

    # naive
    naive_data = data_class[(data_class['likelihood'] == 'heteroscedastic') & (data_class['head'] == 'meanvar') & (data_class['method'] == 'map')]
    naive_gs = naive_data[['test/loglik', 'test/kl_div', 'test/rmse']]
    result['naive_gs'] = get_mean_std(naive_gs)

    # Other methods
    betahalf_data = data_class[(data_class['likelihood'] == 'heteroscedastic') & (data_class['method'] == 'betanll') & (data_class['beta'] == 0.5)]
    betahalf_gs = betahalf_data[['test/loglik', 'test/kl_div', 'test/rmse']]
    result['betahalf'] = get_mean_std(betahalf_gs)

    betaone_data = data_class[(data_class['likelihood'] == 'heteroscedastic') & (data_class['method'] == 'betanll') & (data_class['beta'] == 1)]
    betaone_gs = betaone_data[['test/loglik', 'test/kl_div', 'test/rmse']]
    result['betaone'] = get_mean_std(betaone_data[['test/loglik', 'test/kl_div', 'test/rmse']])

    faith_data = data_class[(data_class['likelihood'] == 'heteroscedastic') & (data_class['method'] == 'faithful')]
    faith_gs = faith_data[['test/loglik', 'test/kl_div', 'test/rmse']]
    result['faith'] = get_mean_std(faith_gs)
    
    mcdropout_data = data_class[(data_class['likelihood'] == 'heteroscedastic') & (data_class['method'] == 'mcdropout')]
    mcdropout_gs_pp = mcdropout_data[['test/loglik_bayes', 'test/kl_div', 'test/rmse']]
    result['mcdropout'] = get_mean_std(mcdropout_gs_pp)

    vi_data = data_class[(data_class['likelihood'] == 'heteroscedastic') & (data_class['method'] == 'vi')]
    vi_gs_pp = vi_data[['test/loglik_bayes', 'test/kl_div', 'test/rmse']]
    result['vi'] = get_mean_std(vi_gs_pp)

    # naive
    naive_eb_pp_data = data_class[(data_class['likelihood'] == 'heteroscedastic') & (data_class['head'] == 'meanvar') & (data_class['method'] == 'marglik')]
    naive_eb_pp = naive_eb_pp_data[['test/loglik_bayes', 'test/kl_div', 'test/rmse']]
    result['naive_eb_pp'] = get_mean_std(naive_eb_pp)

    naive_eb_data = data_class[(data_class['likelihood'] == 'heteroscedastic') & (data_class['head'] == 'meanvar') & (data_class['method'] == 'marglik')]
    naive_eb = naive_eb_data[['test/loglik', 'test/kl_div', 'test/rmse']]
    result['naive_eb'] = get_mean_std(naive_eb)

    # natural
    natural_gs_pp_data = data_class[(data_class['likelihood'] == 'heteroscedastic') & (data_class['head'] == 'natural') & (data_class['method'] == 'map')]
    natural_gs_pp = natural_gs_pp_data[['test/loglik_bayes', 'test/kl_div', 'test/rmse']]
    result['natural_gs_pp'] = get_mean_std(natural_gs_pp)

    natural_gs_data = data_class[(data_class['likelihood'] == 'heteroscedastic') & (data_class['head'] == 'natural') & (data_class['method'] == 'map')]
    natural_gs = natural_gs_data[['test/loglik', 'test/kl_div', 'test/rmse']]
    result['natural_gs'] = get_mean_std(natural_gs)

    natural_eb_pp_data = data_class[(data_class['likelihood'] == 'heteroscedastic') & (data_class['head'] == 'natural') & (data_class['method'] == 'marglik')]
    natural_eb_pp = natural_eb_pp_data[['test/loglik_bayes', 'test/kl_div', 'test/rmse']]
    result['natural_eb_pp'] = get_mean_std(natural_eb_pp)

    natural_eb_data = data_class[(data_class['likelihood'] == 'heteroscedastic') & (data_class['head'] == 'natural') & (data_class['method'] == 'marglik')]
    natural_eb = natural_eb_data[['test/loglik', 'test/kl_div', 'test/rmse']]
    result['natural_eb'] = get_mean_std(natural_eb)

    ls.append(result)

    # Define the columns of the table
columns = pd.MultiIndex.from_product(
    [['MNIST with MLP', 'FashionMNIST with CNN', 'CIFAR10'], ['LL', 'D_KL', 'RMSE']],
    names=['Dataset', 'Metric']
)

# define the rows of the table
methods = [
    ('Homoscedastic', 'EB', '✓'),
    ('Naive NLL', 'GS', '✗'),
    ('β-NLL (0.5)', 'GS', '✗'),
    ('β-NLL (1)', 'GS', '✗'),
    ('Faithful', 'GS', '✗'),
    ('MC-Dropout', 'GS', '✓'),
    ('VI', 'GS', '✓'),
    ('Naive NLL', 'EB', '✓'),
    ('Naive NLL', 'EB', '✗'),
    ('Natural NLL', 'GS', '✓'),
    ('Natural NLL', 'GS', '✗'),
    ('Natural NLL', 'EB', '✓'),
    ('Natural NLL', 'EB', '✗')
]

# Define the methods to be used in the table
table_data_method = [
    'homo',
    'naive_gs',
    'betahalf',
    'betaone',
    'faith',
    'mcdropout',
    'vi',
    'naive_eb_pp',
    'naive_eb_pp',
    'natural_gs_pp',
    'natural_gs',
    'natural_eb_pp',
    'natural_eb'
]


table_data = []

# Fill the table
for method_name in table_data_method:
    row = []
    for dataset in ls:  # mnist, fmnist, cifar10
        method_key = method_name
        if method_key in dataset:
            mean_std_values = dataset[method_key]
            for mean, std in mean_std_values:
                row.append(f"{mean:.2f} ({std:.2f})")
        else:
            row.extend([""] * 3)
    table_data.append(row)

# Transfer to DataFrame
df = pd.DataFrame(table_data, index=pd.MultiIndex.from_tuples(methods, names=["Objective", "Regularization", "Posterior Predictive"]), columns=columns)

# Design the table style
df_styled = df.style.set_caption("Results Table").set_table_styles([
    {"selector": "th", "props": [("text-align", "center")]},
    {"selector": "td", "props": [("text-align", "center")]}
])

# Export to LaTeX
latex_table = df.to_latex(multicolumn=True, multicolumn_format='c', index=True)

with open("results_table.tex", "w") as file:
    file.write(latex_table)

# Save the table
df.to_csv("results_table.csv")


df_styled

Unnamed: 0_level_0,Unnamed: 1_level_0,Dataset,MNIST with MLP,MNIST with MLP,MNIST with MLP,FashionMNIST with CNN,FashionMNIST with CNN,FashionMNIST with CNN,CIFAR10,CIFAR10,CIFAR10
Unnamed: 0_level_1,Unnamed: 1_level_1,Metric,LL,D_KL,RMSE,LL,D_KL,RMSE,LL,D_KL,RMSE
Objective,Regularization,Posterior Predictive,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Homoscedastic,EB,✓,-4.60 (0.68),2.74 (3.73),31.60 (23.61),-4.48 (0.76),0.87 (0.31),30.96 (22.39),-5.51 (0.19),1.59 (0.53),61.07 (12.29)
Naive NLL,GS,✗,-5.27 (0.40),1.35 (0.55),49.38 (15.93),-4.57 (0.58),0.64 (0.23),31.20 (22.48),-5.44 (0.18),1.52 (0.55),56.90 (10.24)
β-NLL (0.5),GS,✗,-4.59 (0.60),0.66 (0.16),33.04 (22.11),-4.58 (0.62),0.65 (0.14),32.96 (21.97),-5.37 (0.28),1.45 (0.47),54.08 (15.95)
β-NLL (1),GS,✗,-4.65 (0.52),0.73 (0.25),33.23 (21.45),-4.59 (0.65),0.66 (0.15),31.54 (23.56),-5.34 (0.27),1.42 (0.47),52.67 (14.58)
Faithful,GS,✗,-4.69 (0.61),0.77 (0.22),32.25 (21.36),-4.88 (0.59),0.96 (0.21),37.85 (24.62),-5.45 (0.22),1.58 (0.49),50.67 (13.44)
MC-Dropout,GS,✓,nan (nan),0.68 (0.20),32.79 (21.12),nan (nan),0.46 (0.12),31.93 (21.62),nan (nan),2.87 (0.94),757.90 (766.72)
VI,GS,✓,nan (nan),0.70 (0.22),32.14 (21.60),nan (nan),0.65 (0.31),35.04 (20.16),nan (nan),1.50 (0.42),56.85 (16.77)
Naive NLL,EB,✓,-4.48 (0.61),0.55 (0.15),30.95 (21.74),-4.48 (0.65),0.48 (0.12),31.54 (21.85),-5.27 (0.29),1.32 (0.43),50.01 (14.94)
Naive NLL,EB,✗,-4.48 (0.61),0.55 (0.15),30.95 (21.74),-4.48 (0.65),0.48 (0.12),31.54 (21.85),-5.27 (0.29),1.32 (0.43),50.01 (14.94)
Natural NLL,GS,✓,-4.62 (0.56),0.70 (0.21),31.97 (21.15),-4.37 (0.74),0.51 (0.11),31.73 (22.76),-5.18 (0.35),1.27 (0.40),48.02 (15.93)
