In [511]:
import jsonlines
import os
import pandas as pd
import numpy as np

In [524]:
def process_method(filename):
    options = {
        "results_spatial_plus_final.jsonl": "spatial+",
        "results_spatial_final.jsonl": "spatial",
        "results_GCNN_zero.jsonl": "gcnn ZERO",
        "results_GCNN_1hidden_layer.jsonl": "gcnn 1HIDDEN",
        "results_GCNN_lin.jsonl": "gcnn LIN",
        "results_GCN.jsonl": "gcnn ReLU",
        "results_GM_Lag.jsonl": "s2sls",
        "results_GM_Error.jsonl": "glmerr",
        "results_Ridge.jsonl": "ols",
        "results_dapsm_final.jsonl": "dapsm",
    }
    return options.get(filename)

In [525]:
def get_meanplusminus(datmean, datstd):
    return (
        datmean.map("{:02.2f}".format)
        + " ± {\small "
        + datstd.map("{:02.2f}".format)
        + "}"
    )

In [526]:
def get_databin(df):
    datbin_ate_mean = (
        df[df["treatment_type"] == "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["ate_se"]
        .mean()
    )
    datbin_ate_std = (
        df[df["treatment_type"] == "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["ate_se"]
        .std()
    )
    datbin_ate = get_meanplusminus(datbin_ate_mean, datbin_ate_std)
    datbin_pehe_mean = (
        df[df["treatment_type"] == "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["pehe_av"]
        .mean()
    )
    datbin_pehe_std = (
        df[df["treatment_type"] == "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["pehe_av"]
        .std()
    )
    datbin_pehe = get_meanplusminus(datbin_pehe_mean, datbin_pehe_std)
    datbin = pd.concat([datbin_ate, datbin_pehe], axis=1)
    return datbin

In [527]:
def get_datcon(df):
    datcon_erf_mean = (
        df[df["treatment_type"] != "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["erf_av"]
        .mean()
    )
    datcon_erf_std = (
        df[df["treatment_type"] != "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["erf_av"]
        .std()
    )
    datcon_erf = get_meanplusminus(datcon_erf_mean, datcon_erf_std)
    datcon_pehe_mean = (
        df[df["treatment_type"] != "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["pehe_av"]
        .mean()
    )
    datcon_pehe_std = (
        df[df["treatment_type"] != "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["pehe_av"]
        .std()
    )
    datcon_pehe = get_meanplusminus(datcon_pehe_mean, datcon_pehe_std)
    datcon = pd.concat([datcon_erf, datcon_pehe], axis=1)
    return datcon

In [528]:
def get_dapsm(df):
    datbin_ate_mean = (
        df[df["treatment_type"] == "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["ate_se"]
        .mean()
    )
    datbin_ate_std = (
        df[df["treatment_type"] == "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["ate_se"]
        .std()
    )
    datbin_ate = get_meanplusminus(datbin_ate_mean, datbin_ate_std)
    return datbin_ate

In [529]:
directory_path = "results"

files_list = []
# Iterate over all the files in the directory
for filename in os.listdir(directory_path):
    # If the filename starts with "results_"
    if filename.startswith("results_"):
        # Construct the full file path
        files_list.append(filename)

In [530]:
def mean_of_list(value):
    if isinstance(value, list):
        return np.mean(value)
    else:
        return value

In [531]:
files_list

['results_GCNN_1hidden_layer.jsonl',
 'results_GCN.jsonl',
 'results_GCN_hidden_new.jsonl',
 'results_GCN_first_MT.jsonl',
 'results_GCN_relu_16h.jsonl',
 'results_GCNN_lin.jsonl',
 'results_GCNN_zero.jsonl']

In [532]:
tlb_list = []

for filename in files_list:
    print(f"Filename: {filename}")
    with jsonlines.open(directory_path + "/" + filename) as reader:
        data = [obj for obj in reader]
    df = pd.DataFrame(data)
    df["treatment_type"] = np.where(
        df["envname"].str.contains("disc"), "binary", "continuous"
    )
    # df["smoothness_binned"] = pd.qcut(df["smoothness"], q=2, labels=["low", "high"])
    # df["confounding_binned"] = pd.qcut(df["confounding"], q=2, labels=["low", "high"])

    # Use pd.cut to create the bins
    df["smoothness_binned"] = pd.cut(
        df["smoothness"], bins=[-np.inf, 0.5, np.inf], labels=["low", "high"]
    )
    df["confounding_binned"] = pd.cut(
        df["confounding"], bins=[-np.inf, 0.025, np.inf], labels=["low", "high"]
    )

    df["ate_se"] = df["ate_se"].apply(lambda x: x[0] if isinstance(x, list) else x)

    df["ate_se"] = np.sqrt(df["ate_se"])  # df[""] * 100
    if filename == "results_dapsm_final.jsonl":
        dat = get_dapsm(df).to_frame()
        dat["pehe_bin"] = "n/a"
        dat["erf_av"] = "n/a"
        dat["pehe_con"] = "n/a"
    elif filename == "results_GCN_fin.jsonl":
        continue
    else:
        df["pehe_av"] = np.sqrt(df["pehe_av"])  # df[""] * 100
        df["erf_av"] = np.sqrt(df["erf_av"])  # * 100  df[""]

        datbin = get_databin(df).rename(columns={"pehe_av": "pehe_bin"})
        datcon = get_datcon(df).rename(columns={"pehe_av": "pehe_con"})
        dat = pd.concat([datbin, datcon], axis=1)

    method = process_method(filename)
    dat["method"] = method
    dat.set_index("method", append=True)
    tlb_list.append(dat)

Filename: results_GCNN_1hidden_layer.jsonl
Filename: results_GCN.jsonl
Filename: results_GCN_hidden_new.jsonl
Filename: results_GCN_first_MT.jsonl
Filename: results_GCN_relu_16h.jsonl
Filename: results_GCNN_lin.jsonl
Filename: results_GCNN_zero.jsonl


In [533]:
fin = (
    pd.concat(tlb_list)
    .reset_index()
    .rename(
        columns={"smoothness_binned": "smoothness", "confounding_binned": "confounding"}
    )
    .groupby(by=["smoothness", "confounding", "method"])
    .first()
)

In [534]:
fin

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ate_se,pehe_bin,erf_av,pehe_con
smoothness,confounding,method,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
low,low,gcnn 1HIDDEN,0.13 ± {r'\small0.09},0.41 ± {r'\small0.11},1.27 ± {r'\small1.21},1.59 ± {r'\small1.19}
low,low,gcnn LIN,0.06 ± {r'\small0.05},0.46 ± {r'\small0.13},1.12 ± {r'\small1.17},1.75 ± {r'\small1.12}
low,low,gcnn ReLU,0.08 ± {r'\small0.06},0.29 ± {r'\small0.07},0.57 ± {r'\small0.55},0.75 ± {r'\small0.54}
low,low,gcnn ZERO,0.15 ± {r'\small0.10},0.51 ± {r'\small0.16},1.20 ± {r'\small1.20},1.69 ± {r'\small1.18}
low,high,gcnn 1HIDDEN,0.10 ± {r'\small0.08},0.34 ± {r'\small0.11},0.97 ± {r'\small0.74},1.19 ± {r'\small0.69}
low,high,gcnn LIN,0.06 ± {r'\small0.04},0.42 ± {r'\small0.12},0.69 ± {r'\small0.57},1.20 ± {r'\small0.52}
low,high,gcnn ReLU,0.06 ± {r'\small0.04},0.24 ± {r'\small0.07},0.55 ± {r'\small0.37},0.67 ± {r'\small0.36}
low,high,gcnn ZERO,0.09 ± {r'\small0.10},0.43 ± {r'\small0.20},1.21 ± {r'\small0.83},1.44 ± {r'\small0.79}
high,low,gcnn 1HIDDEN,0.09 ± {r'\small0.08},0.38 ± {r'\small0.10},1.05 ± {r'\small1.00},1.38 ± {r'\small0.97}
high,low,gcnn LIN,0.06 ± {r'\small0.05},0.47 ± {r'\small0.12},0.84 ± {r'\small0.83},1.49 ± {r'\small0.83}


In [535]:
with open("leaderboards.txt", "w") as f:
    f.write(
        fin.to_latex(
            index=True,
            escape=False,
            formatters={"name": str.upper},
            float_format="{:.3f}".format,
        )
    )

  fin.to_latex(
