In [37]:
import jsonlines
import os
import pandas as pd
import numpy as np

In [38]:
%load_ext lab_black

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [39]:
def process_method(filename):
    options = {
        "results_ML_Lag.jsonl": "Maximum Likelihood Estimation of the Spatial Lag",
        "results_spatial_plus_final.jsonl": "Spatial Plus",
        "results_spatial_final.jsonl": "Spatial",
        "results_GCN.jsonl": "Graph Convolutional Neural Network",
        "results_GM_Lag.jsonl": "Spatial Two Stage Least Squares",
        "results_GM_Error.jsonl": "General Methods of Moments (GMM) Estimation of the Spatial Error",
        "results_ML_Error.jsonl": "Maximum Likelihood Estimation of the Spatial Error",
        "results_OLS.jsonl": "Ordinary Least Squares (OLS)",
        "results_Ridge.jsonl": "Ridge Regression",
        "results_dapsm_final.jsonl": "DAPSM",
    }
    return options.get(filename)

In [80]:
def get_databin(df):
    datbin_ate_mean = (
        df[df["treatment_type"] == "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["ate_se"]
        .mean()
    )
    datbin_ate_std = (
        df[df["treatment_type"] == "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["ate_se"]
        .std()
    )
    datbin_ate = (
        datbin_ate_mean.round().astype(str) + "±" + datbin_ate_std.round().astype(str)
    )
    datbin_pehe_mean = (
        df[df["treatment_type"] == "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["pehe_av"]
        .mean()
    )
    datbin_pehe_std = (
        df[df["treatment_type"] == "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["pehe_av"]
        .std()
    )
    datbin_pehe = (
        datbin_pehe_mean.round().astype(str) + "±" + datbin_pehe_std.round().astype(str)
    )
    datbin = pd.concat([datbin_ate, datbin_pehe], axis=1)
    return datbin

In [81]:
def get_datcon(df):
    datcon_erf_mean = (
        df[df["treatment_type"] != "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["erf_av"]
        .mean()
    )
    datcon_erf_std = (
        df[df["treatment_type"] != "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["erf_av"]
        .std()
    )
    datcon_erf = (
        datcon_erf_mean.round().astype(str) + "±" + datcon_erf_std.round().astype(str)
    )
    datcon_pehe_mean = (
        df[df["treatment_type"] != "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["pehe_av"]
        .mean()
    )
    datcon_pehe_std = (
        df[df["treatment_type"] != "binary"]
        .groupby(["smoothness_binned", "confounding_binned"])["pehe_av"]
        .std()
    )
    datcon_pehe = (
        datcon_pehe_mean.round().astype(str) + "±" + datcon_pehe_std.round().astype(str)
    )
    datcon = pd.concat([datcon_erf, datcon_pehe], axis=1)
    return datcon

In [40]:
directory_path = "results"

files_list = []
# Iterate over all the files in the directory
for filename in os.listdir(directory_path):
    # If the filename starts with "results_"
    if filename.startswith("results_"):
        # Construct the full file path
        files_list.append(filename)

In [41]:
def mean_of_list(value):
    if isinstance(value, list):
        return np.mean(value)
    else:
        return value

In [42]:
files_list

['results_spatial_plus_final.jsonl',
 'results_GM_Lag.jsonl',
 'results_GM_Error.jsonl',
 'results_GCN.jsonl',
 'results_dapsm_final.jsonl',
 'results_Ridge.jsonl',
 'results_spatial_final.jsonl']

In [43]:
print(f"Filename: {filename}")
with jsonlines.open(directory_path + "/" + "results_GCN.jsonl") as reader:
    data = [obj for obj in reader]
df = pd.DataFrame(data)
df["treatment_type"] = np.where(
    df["envname"].str.contains("disc"), "binary", "continuous"
)

Filename: _results_ML_Error.jsonl


In [47]:
df["smoothness_binned"] = pd.qcut(df["smoothness"], q=2, labels=["low", "high"])
df["confounding_binned"] = pd.qcut(df["confounding"], q=2, labels=["low", "high"])

In [75]:
df = pd.concat([datbin, datcon], axis=1)

In [77]:
df["method"] = "met"

In [78]:
df.set_index("method", append=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ate_se,pehe_av,erf_av,pehe_av
smoothness_binned,confounding_binned,method,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
low,low,met,0.0±0.0,1.0±0.0,0.0±0.0,1.0±0.0
low,high,met,0.0±0.0,1.0±0.0,0.0±1.0,1.0±0.0
high,low,met,0.0±0.0,1.0±0.0,0.0±1.0,1.0±0.0
high,high,met,0.0±0.0,1.0±0.0,0.0±0.0,1.0±0.0


In [34]:
tlb_list = []
bin_list = []
con_list = []

for filename in files_list[1:]:
    print(f"Filename: {filename}")
    with jsonlines.open(directory_path + "/" + filename) as reader:
        data = [obj for obj in reader]
    df = pd.DataFrame(data)
    df["treatment_type"] = np.where(
        df["envname"].str.contains("disc"), "binary", "continuous"
    )
    df["smoothness_binned"] = pd.qcut(df["smoothness"], q=2, labels=["low", "high"])
    df["confounding_binned"] = pd.qcut(df["confounding"], q=2, labels=["low", "high"])

    # df['erf_error'] = df['erf_error'].apply(mean_of_list)

    df["ate_se"] = df["ate_se"].apply(lambda x: x[0] if isinstance(x, list) else x)

    method = process_method(filename)
    if filename == "results_dapsm_final.jsonl":
        # binary leaderboard
        blb = (
            df[df["treatment_type"] == "binary"]
            .groupby(["smoothness_binned", "confounding_binned"])["ate_se"]
            .agg(["mean", "std"])
        )
        blb["method"] = method
        bin_list.append(blb)

    else:
        # total leaderboard
        tlb = df.groupby(["smoothness_binned", "confounding_binned"])["pehe_av"].agg(
            ["mean", "std"]
        )
        tlb["method"] = method

        tlb_list.append(tlb)
        # binary leaderboard
        blb = (
            df[df["treatment_type"] == "binary"]
            .groupby(["smoothness_binned", "confounding_binned"])["ate_se"]
            .agg(["mean", "std"])
        )
        blb["method"] = method
        bin_list.append(blb)

        # cont leaderboard
        clb = (
            df[df["treatment_type"] != "binary"]
            .groupby(["smoothness_binned", "confounding_binned"])["erf_av"]
            .agg(["mean", "std"])
        )
        clb["method"] = method
        con_list.append(clb)

Filename: results_GM_Lag.jsonl
Filename: results_GM_Error.jsonl
Filename: results_GCN.jsonl
Filename: results_dapsm_final.jsonl
Filename: results_Ridge.jsonl
Filename: results_spatial_final.jsonl


In [22]:
with open("leaderboards.txt", "w") as f:
    pass


def write_results(leaderboard, name):
    leaderboard = leaderboard.reset_index().rename(
        columns={"smoothness_binned": "smoothness", "confounding_binned": "confounding"}
    )
    df_sorted = leaderboard.sort_values(
        ["smoothness", "confounding", "mean", "std"],
        ascending=[False, False, True, True],
    )
    df_sorted["mean"] = df_sorted["mean"].round(2)
    df_sorted["std"] = df_sorted["std"].round(2)
    df_sorted.set_index(["smoothness", "confounding"], inplace=True)
    with open("leaderboards.txt", "a") as f:
        f.write(
            df_sorted.to_latex(
                index=True,
                formatters={"name": str.upper},
                float_format="{:.3f}".format,
                caption=name,
            )
        )

In [23]:
tlb = pd.concat(tlb_list)
tbin = pd.concat(bin_list)
cont = pd.concat(con_list)

In [None]:
df_mean.round().astype(str)++

In [31]:
tlb.set_index(tlb["method"], append=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean,std,method
smoothness_binned,confounding_binned,method,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
low,low,Spatial Two Stage Least Squares,0.193896,0.310359,Spatial Two Stage Least Squares
low,high,Spatial Two Stage Least Squares,0.1935,0.318968,Spatial Two Stage Least Squares
high,low,Spatial Two Stage Least Squares,0.152912,0.23013,Spatial Two Stage Least Squares
high,high,Spatial Two Stage Least Squares,0.94735,4.754943,Spatial Two Stage Least Squares
low,low,General Methods of Moments (GMM) Estimation of the Spatial Error,0.190598,0.325091,General Methods of Moments (GMM) Estimation of...
low,high,General Methods of Moments (GMM) Estimation of the Spatial Error,0.163331,0.287262,General Methods of Moments (GMM) Estimation of...
high,low,General Methods of Moments (GMM) Estimation of the Spatial Error,0.154917,0.259438,General Methods of Moments (GMM) Estimation of...
high,high,General Methods of Moments (GMM) Estimation of the Spatial Error,0.267908,0.37019,General Methods of Moments (GMM) Estimation of...
low,low,Graph Convolutional Neural Network,0.941348,0.174629,Graph Convolutional Neural Network
low,high,Graph Convolutional Neural Network,1.063746,0.373687,Graph Convolutional Neural Network


In [15]:
write_results(), "Both Binary and Continuous Treatment Combined")
write_results(pd.concat(bin_list), "Binary Treatment")
write_results(pd.concat(con_list), "Continuous Treatment")

  df_sorted.to_latex(
  df_sorted.to_latex(
  df_sorted.to_latex(
