In [173]:
import jsonlines
import os
import pandas as pd
import numpy as np

In [174]:
def process_method(filename):
    options = {
        "results_ML_Lag.jsonl": "Maximum Likelihood Estimation of the Spatial Lag",
        "results_spatial_plus_final.jsonl": "Spatial Plus",
        "results_spatial_final.jsonl": "Spatial",
        "results_GCN.jsonl": "Graph Convolutional Neural Network",
        "results_GM_Lag.jsonl": "Spatial Two Stage Least Squares",
        "results_GM_Error.jsonl": "GMM Estimation of the Spatial Error",
        "results_ML_Error.jsonl": "Maximum Likelihood Estimation of the Spatial Error",
        "results_OLS.jsonl": "Ordinary Least Squares (OLS)",
        "results_Ridge.jsonl": "Ridge Regression",
        "results_dapsm_final.jsonl": "DAPSM",
    }
    return options.get(filename)

In [175]:
def get_databin(df):
    datbin_ate_mean = (
        df[df["treatment_type"] == "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["ate_se"]
        .mean()
    )
    datbin_ate_std = (
        df[df["treatment_type"] == "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["ate_se"]
        .std()
    )
    datbin_ate = (
        datbin_ate_mean.round().astype(str) + "±" + datbin_ate_std.round().astype(str)
    )
    datbin_pehe_mean = (
        df[df["treatment_type"] == "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["pehe_av"]
        .mean()
    )
    datbin_pehe_std = (
        df[df["treatment_type"] == "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["pehe_av"]
        .std()
    )
    datbin_pehe = (
        datbin_pehe_mean.round().astype(str) + "±" + datbin_pehe_std.round().astype(str)
    )
    datbin = pd.concat([datbin_ate, datbin_pehe], axis=1)
    return datbin

In [176]:
def get_datcon(df):
    datcon_erf_mean = (
        df[df["treatment_type"] != "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["erf_av"]
        .mean()
    )
    datcon_erf_std = (
        df[df["treatment_type"] != "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["erf_av"]
        .std()
    )
    datcon_erf = (
        datcon_erf_mean.round().astype(str) + "±" + datcon_erf_std.round().astype(str)
    )
    datcon_pehe_mean = (
        df[df["treatment_type"] != "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["pehe_av"]
        .mean()
    )
    datcon_pehe_std = (
        df[df["treatment_type"] != "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["pehe_av"]
        .std()
    )
    datcon_pehe = (
        datcon_pehe_mean.round().astype(str) + "±" + datcon_pehe_std.round().astype(str)
    )
    datcon = pd.concat([datcon_erf, datcon_pehe], axis=1)
    return datcon

In [177]:
def get_dapsm(df):
    datbin_ate_mean = (
        df[df["treatment_type"] == "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["ate_se"]
        .mean()
    )
    datbin_ate_std = (
        df[df["treatment_type"] == "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["ate_se"]
        .std()
    )
    datbin_ate = (
        datbin_ate_mean.round().astype(str) + "±" + datbin_ate_std.round().astype(str)
    )
    return datbin_ate

In [178]:
directory_path = "results"

files_list = []
# Iterate over all the files in the directory
for filename in os.listdir(directory_path):
    # If the filename starts with "results_"
    if filename.startswith("results_"):
        # Construct the full file path
        files_list.append(filename)

In [179]:
def mean_of_list(value):
    if isinstance(value, list):
        return np.mean(value)
    else:
        return value

In [180]:
files_list

['results_spatial_plus_final.jsonl',
 'results_GM_Lag.jsonl',
 'results_GM_Error.jsonl',
 'results_GCN.jsonl',
 'results_dapsm_final.jsonl',
 'results_Ridge.jsonl',
 'results_spatial_final.jsonl']

In [181]:
tlb_list = []

for filename in files_list:
    print(f"Filename: {filename}")
    with jsonlines.open(directory_path + "/" + filename) as reader:
        data = [obj for obj in reader]
    df = pd.DataFrame(data)
    df["treatment_type"] = np.where(
        df["envname"].str.contains("disc"), "binary", "continuous"
    )
    # df["smoothness_binned"] = pd.qcut(df["smoothness"], q=2, labels=["low", "high"])
    # df["confounding_binned"] = pd.qcut(df["confounding"], q=2, labels=["low", "high"])

    # Use pd.cut to create the bins
    df["smoothness_binned"] = pd.cut(
        df["smoothness"], bins=[-np.inf, 0.5, np.inf], labels=["low", "high"]
    )
    df["confounding_binned"] = pd.cut(
        df["confounding"], bins=[-np.inf, 0.025, np.inf], labels=["low", "high"]
    )

    df["ate_se"] = df["ate_se"].apply(lambda x: x[0] if isinstance(x, list) else x)

    df["ate_se"] = df["ate_se"] * 100
    if filename == "results_dapsm_final.jsonl":
        dat = get_dapsm(df).to_frame()
        dat["pehe_bin"] = ""
        dat["erf_av"] = ""
        dat["pehe_con"] = ""
    else:
        df["pehe_av"] = df["pehe_av"] * 100
        df["erf_av"] = df["erf_av"] * 100

        datbin = get_databin(df).rename(columns={"pehe_av": "pehe_bin"})
        datcon = get_datcon(df).rename(columns={"pehe_av": "pehe_con"})
        dat = pd.concat([datbin, datcon], axis=1)

    method = process_method(filename)
    dat["method"] = method
    dat.set_index("method", append=True)
    tlb_list.append(dat)

Filename: results_spatial_plus_final.jsonl
Filename: results_GM_Lag.jsonl
Filename: results_GM_Error.jsonl
Filename: results_GCN.jsonl
Filename: results_dapsm_final.jsonl
Filename: results_Ridge.jsonl
Filename: results_spatial_final.jsonl


In [182]:
fin = (
    pd.concat(tlb_list)
    .reset_index()
    .rename(
        columns={"smoothness_binned": "smoothness", "confounding_binned": "confounding"}
    )
    .groupby(by=["smoothness", "confounding", "method"])
    .first()
)

In [183]:
fin

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ate_se,pehe_bin,erf_av,pehe_con
smoothness,confounding,method,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
low,low,DAPSM,5.0±4.0,,,
low,low,GMM Estimation of the Spatial Error,0.0±0.0,2.0±1.0,28.0±36.0,30.0±37.0
low,low,Graph Convolutional Neural Network,1.0±1.0,87.0±5.0,30.0±41.0,103.0±23.0
low,low,Ridge Regression,0.0±0.0,44.0±18.0,32.0±41.0,76.0±38.0
low,low,Spatial,0.0±0.0,2.0±1.0,32.0±41.0,34.0±42.0
low,low,Spatial Plus,0.0±0.0,2.0±1.0,32.0±41.0,34.0±42.0
low,low,Spatial Two Stage Least Squares,2.0±9.0,3.0±4.0,25.0±35.0,26.0±36.0
low,high,DAPSM,3.0±4.0,,,
low,high,GMM Estimation of the Spatial Error,0.0±0.0,2.0±1.0,9.0±22.0,12.0±22.0
low,high,Graph Convolutional Neural Network,1.0±1.0,90.0±6.0,53.0±58.0,119.0±47.0


In [184]:
with open("leaderboards.txt", "w") as f:
    f.write(
        fin.to_latex(
            index=True, formatters={"name": str.upper}, float_format="{:.3f}".format
        )
    )

  fin.to_latex(
