In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
import pandas as pd
import numpy as np
import os
import glob

#from utils import names, name_order, name_shapes, name_colors, name_linestyles

FOLDER_PATH = "../results"
mpl.rcParams["axes.prop_cycle"] = mpl.cycler(
    color=[
        "#377eb8",
        "#ff7f00",
        "#4daf4a",
        "#f781bf",
        "#a65628",
        "#984ea3",
        "#999999",
        "#e41a1c",
        "#dede00",
    ]
)

import warnings  # To suppress some warnings

# Suppress the specific FutureWarning
warnings.filterwarnings("ignore", category=FutureWarning, module="seaborn")

# Get the five most recent files
csv_files = glob.glob(os.path.join(FOLDER_PATH, "*.csv"))
csv_files.sort(key=os.path.getmtime, reverse=True)
recent_files = csv_files[:5]
print("Five most recent CSV files:")
print("\n".join(recent_files))

In [None]:
# Define constants
FILE_NAMES = [
    "20250114_104951", # example
]

# Initialize list to store DataFrames
dfs = []

# Load DataFrames from the most recent files
for file_name in FILE_NAMES:
    file_path = os.path.join(FOLDER_PATH, f"comparison_hgic-{file_name}.csv")
    df = pd.read_csv(file_path)
    dfs.append(df)

# Concatenate all DataFrames
df = pd.concat(dfs, ignore_index=True)

# Rename methods using a dictionary lookup
print('method found:\n', df["method"].unique())


In [None]:
# Filter and preprocess the dataframe 
table = df.drop(columns=["pval", "iterations"])

#table = table[table["method"].isin(selected_methods_table)]

# Group by and calculate mean and standard error
table = (
    table.groupby(
        by=["conf_strength", "method"]
    )
    .agg(
        detection_mean=("detection", "mean"),
        detection_se=("detection", lambda x: np.std(x, ddof=1) / np.sqrt(len(x))),
    )
    .reset_index()
)

# Rename columns
table = table.rename(
    columns={
        "conf_strength": "Unmeasured confounder",
        "method": "Method",
    }
)
table["Unmeasured confounder"] = table["Unmeasured confounder"].replace(
    {0.0: "No unmeasured confounder", 1.0: "Unmeasured confounder present"}
)

# Pivot the table
table_detection = table.pivot_table(
    index="Method",
    columns=["Unmeasured confounder"],
    values=["detection_mean", "detection_se"],
)

# Format the table to include mean and SE in the format "mean (.SE)"
formatted_table_detection = table_detection.apply(
    lambda x: x["detection_mean"].map("{:.2f}".format)
    + " ("
    + x["detection_se"].map("{:.2f}".format).str.lstrip("0")
    + ")",
    axis=1,
)


# Sort based on method name
#formatted_table_detection.index = pd.Categorical(
#    formatted_table_detection.index, categories=name_order, ordered=True
#)
formatted_table_detection = formatted_table_detection.sort_index()

# Print the formatted table
print(formatted_table_detection)

# Save the formatted table as a LaTeX file
formatted_table_detection.to_latex(
    "output/table_hgic_comparison.tex",
    multicolumn=True,
    multicolumn_format="c",
    escape=False,
)