In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os
import glob

from utils import names, name_order, name_shapes, name_colors, name_linestyles

FOLDER_PATH = "../results"
mpl.rcParams["axes.prop_cycle"] = mpl.cycler(
    color=[
        "#377eb8",
        "#ff7f00",
        "#4daf4a",
        "#f781bf",
        "#a65628",
        "#984ea3",
        "#999999",
        "#e41a1c",
        "#dede00",
    ]
)

import warnings  # To suppress some warnings

# Suppress the specific FutureWarning
warnings.filterwarnings("ignore", category=FutureWarning, module="seaborn")

# Get the five most recent files
csv_files = glob.glob(os.path.join(FOLDER_PATH, "*.csv"))
csv_files.sort(key=os.path.getmtime, reverse=True)
recent_files = csv_files[:5]
print("Five most recent CSV files:")
print("\n".join(recent_files))

In [None]:
# Define constants

# Example names
FILE_NAMES = [
    "experiment_B1-20250115_100145",  
    "experiment_B2-20250115_100144",
    "experiment_B2-20250116_153744", 
    "experiment_B3-20250115_100244", 
    "experiment_B4-20250115_100244",
    "experiment_B4-20250116_153844", 
]

# Initialize list to store DataFrames
dfs = []

# Load DataFrames from the most recent files
for file_name in FILE_NAMES:
    file_path = os.path.join(FOLDER_PATH, f"{file_name}.csv")
    df = pd.read_csv(file_path)
    dfs.append(df)

# Concatenate all DataFrames
df = pd.concat(dfs, ignore_index=True)

# Rename methods using a dictionary lookup
print("method found:\n", df["method"].unique())
df["method"] = df["method"].replace(names)
print("renamed:\n", df["method"].unique())

In [None]:
# Filter and preprocess the dataframe for experiment B1
table = df[df.experiment == "experiment_B1"]
print("Methods:", table["method"].unique())
table = table.drop(columns=["pval", "iterations"])

#table = table[table["method"].isin(selected_methods_table)]

# Group by and calculate mean and standard error
table = (
    table.groupby(
        by=["conf_strength", "degree", "method", "transportability_violation"]
    )
    .agg(
        detection_mean=("detection", "mean"),
        detection_se=("detection", lambda x: np.std(x, ddof=1) / np.sqrt(len(x))),
    )
    .reset_index()
)

# Rename columns
table = table.rename(
    columns={
        "conf_strength": "Unmeasured confounder",
        "degree": "DGP",
        "method": "Method",
        "transportability_violation": "Transportability",
    }
)
table["Unmeasured confounder"] = table["Unmeasured confounder"].replace(
    {0.0: "No unmeasured confounder", 1.0: "Unmeasured confounder present"}
)
table["Transportability"] = table["Transportability"].replace(
    {1.0: "Violated", 0.0: "Holds"}
)

table["DGP"] = table["DGP"].replace({1: "Linear", 3: "Cubic"})

# Pivot the table
table_detection = table.pivot_table(
    index="Method",
    columns=["Unmeasured confounder", "Transportability", "DGP"],
    values=["detection_mean", "detection_se"],
)

# Format the table to include mean and SE in the format "mean (.SE)"
formatted_table_detection = table_detection.apply(
    lambda x: x["detection_mean"].map("{:.2f}".format)
    + " ("
    + x["detection_se"].map("{:.2f}".format).str.lstrip("0")
    + ")",
    axis=1,
)


# Sort based on method name
formatted_table_detection.index = pd.Categorical(
    formatted_table_detection.index, categories=name_order, ordered=True
)
formatted_table_detection = formatted_table_detection.sort_index()

# Print the formatted table
print(formatted_table_detection)

# Save the formatted table as a LaTeX file
formatted_table_detection.to_latex(
    "output/table_B1_detection.tex",
    multicolumn=True,
    multicolumn_format="c",
    escape=False,
)

In [13]:
def pre_process_df(dataframe, groupby_var):
    # Drop unnecessary columns and compute the mean
    dataframe = (
        dataframe.drop(
            columns=[
                "iterations",
                "experiment",
                "pval",
                "degree",
                "conf_strength",
            ]
        )
        .groupby(by=["method", groupby_var])
        .agg(detection_mean=("detection", "mean"), count=("detection", "count"))
        .reset_index()
    )
    dataframe["detection_se"] = np.sqrt(
        dataframe["detection_mean"]
        * (1 - dataframe["detection_mean"])
        / dataframe["count"]
    )
    return dataframe

In [None]:
# Filter the dataframe for experiment B2 and get the unique methods


selected_methods_plots_final = [
    "HGIC (Pearson)",
    #"HGIC (Pearson-Tippett)",
    "HGIC (KCIT-Tippett)",
    #"Ours without bootstrap (Linear)",
    "Ours (Linear)",
    "Transp. test (Pearson)",
    #"Transp. test (KCIT)",
]

df_B2 = df[df.experiment == "experiment_B2"]
df_B4 = df[df.experiment == "experiment_B4"]

print("Methods:", df_B2["method"].unique())

df_B2 = df_B2[df_B2["method"].isin(selected_methods_plots_final)]
df_B4 = df_B4[df_B4["method"].isin(selected_methods_plots_final)]

# Iterate over unique confounding strengths and sample sizes
conf_strength = 1.0
sns.set_context("talk", font_scale=1.1)

fig, ax = plt.subplots(1, 4, figsize=(20, 5))

for i, ns in enumerate([25, 100]):
    tmp1 = df_B2[(df_B2.n_samples == ns) & (df_B2.conf_strength == conf_strength)]
    tmp1 = pre_process_df(tmp1, "n_envs")

    sns.lineplot(
        data=tmp1,
        ax=ax[i],
        x="n_envs",
        y="detection_mean",
        hue="method",
        style="method",
        markers=name_shapes,
        palette=name_colors,
        dashes=name_linestyles,
        linewidth=3,
        hue_order=[mname for mname in name_order if mname in df_B2.method.unique()],
    )
    for method, group in tmp1.groupby("method"):
        ax[i].errorbar(
            group["n_envs"],
            group["detection_mean"],
            yerr=group["detection_se"],
            fmt="none",  # 'none' so that the error bars don't come with a marker
            capsize=5,  # Length of the error bar caps
            color=name_colors[method],  # Set the color to match the line
        )

    if i == 0:
        ax[i].set_ylabel("Falsification rate")
    else:
        ax[i].set_ylabel("")
    ax[i].set_ylim(-0.02, 1.02)
    ax[i].set_xlabel("Number of environments")
    ax[i].legend_.remove()
    ax[i].set_title(f"Fixed N = {ns}")


for i, ne in enumerate([10, 50]):
    tmp2 = df_B4[(df_B4.n_envs == ne) & (df_B4.conf_strength == conf_strength)]
    tmp2 = pre_process_df(tmp2, "n_samples")

    sns.lineplot(
        data=tmp2,
        ax=ax[i + 2],
        x="n_samples",
        y="detection_mean",
        hue="method",
        style="method",
        markers=name_shapes,
        palette=name_colors,
        dashes=name_linestyles,
        linewidth=3,
        hue_order=[mname for mname in name_order if mname in df_B4.method.unique()],
    )
    for method, group in tmp2.groupby("method"):
        ax[i + 2].errorbar(
            group["n_samples"],
            group["detection_mean"],
            yerr=group["detection_se"],
            fmt="none",  # 'none' so that the error bars don't come with a marker
            capsize=5,  # Length of the error bar caps
            color=name_colors[method],  # Set the color to match the line
        )

    ax[i + 2].set_ylabel("")
    ax[i + 2].set_ylim(-0.02, 1.02)
    ax[i + 2].set_xlabel("Number of samples")
    ax[i + 2].legend_.remove()
    ax[i + 2].set_title(f"Fixed K = {ne}")


if conf_strength == 0.0:
    for j in range(4):  # Iterate over all subplots
        ax[j].axhline(y=0.05, color="black", linestyle="dotted", linewidth=1.5)

# Add a common legend below the plots
handles, labels = ax[
    0
].get_legend_handles_labels()  # Get handles and labels from the first plot
fig.legend(
    handles,
    labels,
    loc="center",
    bbox_to_anchor=(0.5, -0.075),
    ncol=5,
    fontsize="small",
    title_fontsize="small",
)


plt.tight_layout()
output_path = (
    f"output/simulation_study_combined-{conf_strength}-vary_nsamples_nenvironments.pdf"
)
plt.savefig(output_path, bbox_inches="tight")

In [None]:
# Filter the dataframe for experiment B2 and get the unique methods

df_B3 = df[df.experiment == "experiment_B3"]
print(df_B3.method.unique())

selected_methods_plots_final = [
    "HGIC (Pearson)",
    #"HGIC (Pearson-Tippett)",
    "HGIC (KCIT-Tippett)",
    "Ours (Linear)",
    "Transp. test (Pearson)",
    "Transp. test (KCIT)",
]

df_B3 = df_B3[df_B3["method"].isin(selected_methods_plots_final)]

# Iterate over unique confounding strengths and sample sizes
sns.set_context(
    "talk", font_scale=1.1
)  # 'talk' is larger than default, scale up if needed

fig, ax = plt.subplots(1, 2, figsize=(10, 5))


for i, c in enumerate([0.0, 1.0]):
    tmp3 = df_B3[(df_B3.conf_strength == c)]
    print(tmp3.columns)
    tmp3 = pre_process_df(tmp3, "n_observed_confounders")

    sns.lineplot(
        data=tmp3,
        ax=ax[i],
        x="n_observed_confounders",
        y="detection_mean",
        hue="method",
        style="method",
        markers=name_shapes,
        palette=name_colors,
        dashes=name_linestyles,
        hue_order=[mname for mname in name_order if mname in df_B3.method.unique()],
    )
    for method, group in tmp3.groupby("method"):
        ax[i].errorbar(
            group["n_observed_confounders"],
            group["detection_mean"],
            yerr=group["detection_se"],
            fmt="none",  # 'none' so that the error bars don't come with a marker
            capsize=5,  # Length of the error bar caps
            color=name_colors[method],  # Set the color to match the line
        )

    if i == 0:
        ax[i].set_ylabel("Falsification rate")
    else:
        ax[i].set_ylabel("")
    ax[i].set_ylim(-0.02, 1.02)
    ax[i].set_xlabel("Number of observed covariates")
    ax[i].legend_.remove()
    ax[i].set_title(
        "No unmeasured confounder" if c == 0.0 else "Unmeasured confounder present"
    )
    # ax[i].set_xticks([0,5,10,15])
    if conf_strength == 0.0:
        ax[i].axhline(y=0.05, color="black", linestyle="dotted", linewidth=1.5)


# Add a common legend below the plots
handles, labels = ax[
    0
].get_legend_handles_labels()  # Get handles and labels from the first plot
fig.legend(
    handles,
    labels,
    loc="center",
    bbox_to_anchor=(0.5, -0.075),
    ncol=3,
    fontsize="small",
    title_fontsize="small",
)


plt.tight_layout()
output_path = f"figures/simulation_study_combined-vary_n_confounders.pdf"
plt.savefig(output_path, bbox_inches="tight")

In [None]:
# Filter the dataframe for experiment B2 and get the unique methods


selected_methods_plots_final = [
    "HGIC (Pearson)",
    "HGIC (KCIT)",
    "Ours (Linear)",
    "Transp. test (Pearson)",
    # "Transp. test (KCIT)",
]
conf_strength = 1.0

df_B2 = df[df.experiment == "experiment_B2"]
df_B3 = df[df.experiment == "experiment_B3"]
df_B4 = df[df.experiment == "experiment_B4"]

print("Methods:", df_B2["method"].unique())

df_B2 = df_B2[df_B2["method"].isin(selected_methods_plots_final)]
df_B3 = df_B3[df_B3["method"].isin(selected_methods_plots_final)]
df_B4 = df_B4[df_B4["method"].isin(selected_methods_plots_final)]

# assert df_B2['method'].unique() == df_B3['method'].unique() == df_B4['method'].unique()


sns.set_context("talk", font_scale=1.1)
fig, ax = plt.subplots(1, 5, figsize=(25, 5))

for i, ns in enumerate([25, 100]):
    tmp1 = df_B2[(df_B2.n_samples == ns) & (df_B2.conf_strength == conf_strength)]
    assert len(tmp1["n_observed_confounders"].unique()) == 1
    n_observed_confounders_B2 = tmp1["n_observed_confounders"].unique()[0]
    tmp1 = pre_process_df(tmp1, "n_envs")

    sns.lineplot(
        data=tmp1,
        ax=ax[i],
        x="n_envs",
        y="detection_mean",
        hue="method",
        style="method",
        markers=name_shapes,
        palette=name_colors,
        dashes=name_linestyles,
        linewidth=3,
        hue_order=[mname for mname in name_order if mname in df_B2.method.unique()],
    )
    for method, group in tmp1.groupby("method"):
        ax[i].errorbar(
            group["n_envs"],
            group["detection_mean"],
            yerr=group["detection_se"],
            fmt="none",  # 'none' so that the error bars don't come with a marker
            capsize=5,  # Length of the error bar caps
            color=name_colors[method],  # Set the color to match the line
        )

    if i == 0:
        ax[i].set_ylabel("Falsification rate")
    else:
        ax[i].set_ylabel("")
    ax[i].set_ylim(-0.02, 1.02)
    ax[i].set_xlabel("Number of environments $K$")
    ax[i].legend_.remove()
    ax[i].set_title(f"Fixed $(N,d) = ({ns},{n_observed_confounders_B2})$")


for i, ne in enumerate([10, 50]):
    tmp2 = df_B4[(df_B4.n_envs == ne) & (df_B4.conf_strength == conf_strength)]
    assert len(tmp2["n_observed_confounders"].unique()) == 1
    n_observed_confounders_B4 = tmp2["n_observed_confounders"].unique()[0]
    tmp2 = pre_process_df(tmp2, "n_samples")

    sns.lineplot(
        data=tmp2,
        ax=ax[i + 2],
        x="n_samples",
        y="detection_mean",
        hue="method",
        style="method",
        markers=name_shapes,
        palette=name_colors,
        dashes=name_linestyles,
        linewidth=3,
        hue_order=[mname for mname in name_order if mname in df_B4.method.unique()],
    )
    for method, group in tmp2.groupby("method"):
        ax[i + 2].errorbar(
            group["n_samples"],
            group["detection_mean"],
            yerr=group["detection_se"],
            fmt="none",  # 'none' so that the error bars don't come with a marker
            capsize=5,  # Length of the error bar caps
            color=name_colors[method],  # Set the color to match the line
        )

    ax[i + 2].set_ylabel("")
    ax[i + 2].set_ylim(-0.02, 1.02)
    ax[i + 2].set_xlabel("Number of samples $N$")
    ax[i + 2].legend_.remove()
    ax[i + 2].set_title(f"Fixed $(K,d) = ({ne},{n_observed_confounders_B4})$")


tmp3 = df_B3[(df_B3.conf_strength == conf_strength)]
assert (len(tmp3["n_envs"].unique()) == 1) and (len(tmp3["n_samples"].unique()) == 1)
n_environments_B3 = tmp3["n_envs"].unique()[0]
n_samples_B3 = tmp3["n_samples"].unique()[0]
tmp3 = pre_process_df(tmp3, "n_observed_confounders")

sns.lineplot(
    data=tmp3,
    ax=ax[4],
    x="n_observed_confounders",
    y="detection_mean",
    hue="method",
    style="method",
    markers=name_shapes,
    palette=name_colors,
    dashes=name_linestyles,
    linewidth=3,
    hue_order=[mname for mname in name_order if mname in df_B3.method.unique()],
)
for method, group in tmp3.groupby("method"):
    ax[4].errorbar(
        group["n_observed_confounders"],
        group["detection_mean"],
        yerr=group["detection_se"],
        fmt="none",  # 'none' so that the error bars don't come with a marker
        capsize=5,  # Length of the error bar caps
        color=name_colors[method],  # Set the color to match the line
    )

ax[4].set_ylabel("")
ax[4].set_ylim(-0.02, 1.02)
ax[4].set_xlabel("Number of covariates $d$")
ax[4].legend_.remove()

ax[4].set_title(f"Fixed $(K,N) = ({n_environments_B3},{n_samples_B3})$")

if conf_strength == 0.0:
    for j in range(4):  # Iterate over all subplots
        ax[j].axhline(y=0.05, color="black", linestyle="dotted", linewidth=1.5)

# Add a common legend below the plots
handles, labels = ax[
    0
].get_legend_handles_labels()  # Get handles and labels from the first plot
fig.legend(
    handles,
    labels,
    loc="center",
    bbox_to_anchor=(0.5, -0.075),
    ncol=5,
    fontsize="small",
    title_fontsize="small",
)


plt.tight_layout()
output_path = (
    f"figures/simulation_study_combined-{conf_strength}.pdf"
)
plt.savefig(output_path, bbox_inches="tight")