## Evaluation Table Simulation

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
from pathlib import Path
from itertools import product

In [None]:
project_root = (
    Path.cwd()
    .parents[0]
)
df_1 = pd.read_pickle(f"results/results_high_dimensional.pkl")
df_2 = pd.read_pickle(f"results/results_overlap.pkl")
df_3 = pd.read_pickle(f"results/results_prop_misspecification.pkl")
df_4 = pd.read_pickle(f"results/results_unbalanced.pkl")

In [3]:
df_1['overlap'] = 0  
df_1['share_treated'] = 0  

df_2['R2_d'] = 0  
df_2['dim_x'] = 3  
df_2['share_treated'] = 0 

df_3['R2_d'] = 0  
df_3['dim_x'] = 4  
df_3['overlap'] = 0 
df_3['share_treated'] = 0 

df_4['R2_d'] = 0  
df_4['dim_x'] = 20  
df_4['overlap'] = 0 

In [4]:
# Define settings
settings = [
    {
        "name": "Setting 1",
        "df": df_1,
        "n_obs": 2000,
        "dim_x": 200,
        "clipping_threshold": 1e-12,
        "clipping_threshold_2": 0.01,
        "R2_d": 0.5,
        "overlap": 0,
        "share_treated": 0,
        "theta": 0
    },
    {
        "name": "Setting 2",
        "df": df_2,
        "n_obs": 2000,
        "dim_x": 3,
        "clipping_threshold": 1e-12,
        "clipping_threshold_2": 0.01,
        "R2_d": 0,
        "overlap": 0.5,
        "share_treated": 0,
        "theta": -1
    },
    {
        "name": "Setting 3",
        "df": df_3,
        "n_obs": 2000,
        "dim_x": 4,
        "clipping_threshold": 1e-12,
        "clipping_threshold_2": 0.01,
        "R2_d": 0,
        "overlap": 0,
        "share_treated": 0,
        "theta": 1.6257
    },
    {
        "name": "Setting 4",
        "df": df_4,
        "n_obs": 4000,
        "dim_x": 20,
        "clipping_threshold": 1e-12,
        "clipping_threshold_2": 0.01,
        "share_treated": 0.1,
        "R2_d": 0,
        "overlap": 0,
        "theta": 1
    }
]

# Define learner_g
learner_g = 'LGBM'

# Define method and calibration mappings
method_mapping = {
    "alg-1-uncalibrated": "Alg-1-uncalib",
    #"alg-2-nested-cross-fitting-calib": "Alg-2-nested-cf",
    #"alg-3-cross-fitted-calib": "Alg-3-cf",
    #"alg-4-single-split-calib": "Alg-4-single-split",
    "alg-5-full-sample-calib": "Alg-5-full-sample"
}

calib_mapping = {
    "uncalibrated": "Uncalib",
    "isotonic": "Iso",
    "platt": "Platt",
    "ivap": "IVAP"
}

learner_dict_m = ["Logit", "RF", "LGBM"]

In [42]:
def evaluate_estimation(ate: np.ndarray, theta: float, 
                       ci_length: np.ndarray, cover: np.ndarray) -> dict:
    """Calculate estimation metrics for ATE results with CI metrics."""
    ate_true = np.full_like(ate, theta)
    bias = ate - ate_true
    return {
        'RMSE': np.sqrt(np.nanmean(bias ** 2)),
        'Std. dev.': np.nanstd(ate),
        'MAE': np.nanmean(np.abs(bias)),
        'Cover': np.nanmean(cover),
        'CI_length': np.nanmean(ci_length),
    }

In [79]:
results_by_procedure = {"IPW": pd.DataFrame(), "IRM": pd.DataFrame(), "PLR": pd.DataFrame(), "Match": pd.DataFrame(), "TMLE": pd.DataFrame()}


for setting in settings:    
    df = setting["df"]
    n_obs = setting["n_obs"]
    dim_x = setting["dim_x"]
    clipping_threshold = setting["clipping_threshold"]
    clipping_threshold_2 = setting["clipping_threshold_2"]
    R2_d = setting["R2_d"]
    overlap = setting["overlap"]
    share_treated = setting["share_treated"]
    theta = setting["theta"]

    df = df.rename(columns={
        "irm_coefs": "IRM",
        "ipw_coefs": "IPW",
        "plr_coefs": "PLR",
        "match_coefs": "Match",
        "TMLE_coefs": "TMLE",
        "TMLE_cover": "tmle_cover",
        "TMLE_ci_length": "tmle_ci_length",
    })
    
    calib_methods = ["uncalibrated", "isotonic", "platt", "ivap"]
    methods = [
        "alg-1-uncalibrated",
        #"alg-2-nested-cross-fitting-calib",
        #"alg-3-cross-fitted-calib",
        #"alg-4-single-split-calib",
        "alg-5-full-sample-calib"
    ]

    df = df[df["calib_method"].isin(calib_methods)]
    df = df[df["method"].isin(methods)]


    for value_var in ["IPW", "IRM", "PLR", "Match", "TMLE"]:
        grouping_columns = [
            "n_obs", "dim_x", "learner_g", "learner_m", "method", "calib_method",
            "clipping_threshold", "R2_d", "rmses", "K", "overlap", "share_treated",
            f"{value_var.lower()}_ci_length",  
            f"{value_var.lower()}_cover"     
        ]

        temp_df = df.melt(
            id_vars=grouping_columns,
            value_vars=[value_var],
            var_name="procedure",
            value_name="estimate"
        )
        
        temp_df["ci_length"] = temp_df[f"{value_var.lower()}_ci_length"]
        temp_df["cover"] = temp_df[f"{value_var.lower()}_cover"]
        temp_df = temp_df.drop(columns=[f"{value_var.lower()}_ci_length", f"{value_var.lower()}_cover"])

        cols_needed = ['estimate', 'ci_length', 'cover']
        df_eval = temp_df.groupby([
            'n_obs', 'dim_x', 'learner_g', 'learner_m', 'R2_d',
            'clipping_threshold', 'procedure', 'method', 'calib_method',
            'overlap', 'share_treated'
        ])[cols_needed].apply(lambda g: pd.Series(
            evaluate_estimation(
                g['estimate'].values,
                theta=theta,
                ci_length=g['ci_length'].values,
                cover=g['cover'].values
            )
        )).reset_index()
        metric_cols = ['RMSE', 'Std. dev.', 'MAE', 'Cover', 'CI_length']
        grouping_cols = [
            'n_obs', 'dim_x', 'learner_g', 'learner_m', 'R2_d',
            'clipping_threshold', 'procedure', 'method', 'calib_method',
            'overlap', 'share_treated']
        # Use pd.melt to reshape the DataFrame:
        df_eval = pd.melt(
            df_eval,
            id_vars=grouping_cols,
            value_vars=metric_cols,
            var_name='Metrics',   
            value_name='estimate'   
        )
        
        if 'Metrics' not in df_eval.columns:
            print(f"Warning: 'Metrics' column not found in df_eval for setting {setting['name']} and procedure {value_var}")
            continue

        row_data = []
        
        for learner_m in learner_dict_m:

            if not df_eval.empty:
                
                df_eval_new = df_eval.copy()
                # Insert combined method column
                df_eval_new["method"] = df_eval_new["method"].replace(method_mapping)
                df_eval_new["calib_method"] = df_eval_new["calib_method"].replace(calib_mapping)
                df_eval_new.insert(1, "Method", df_eval_new[['method', 'calib_method']].agg('-'.join, axis=1))

                Method_mapping = {
                    "Alg-1-uncalib-Uncalib": "Alg-1-Uncalib"
                }
                df_eval_new.loc[:, "Method"] = df_eval_new["Method"].replace(Method_mapping)

                # Initial filtering for the first part with the original clipping threshold
                df_alg1_clipped = df_eval_new.copy()
                df_eval_new = df_eval_new[
                    (df_eval_new['learner_m'] == learner_m) &
                    (df_eval_new['n_obs'] == n_obs) &
                    (df_eval_new['dim_x'] == dim_x) &
                    (df_eval_new['learner_g'] == learner_g) &
                    (df_eval_new['R2_d'] == R2_d) &
                    (df_eval_new['overlap'] == overlap) &
                    (df_eval_new['share_treated'] == share_treated) &
                    (
                        # Check if the method is either of the specified ones and use clipping_threshold_2
                        (
                            (df_eval_new['Method'].isin(['Alg-2-nested-cf-Iso', 'Alg-3-cf-IVAP', 'Alg-4-single-split-Iso'])) &
                            (df_eval_new['clipping_threshold'] == clipping_threshold_2)
                        ) |
                        # Otherwise, use the original clipping_threshold for other methods
                        (
                            (~df_eval_new['Method'].isin(['Alg-2-nested-cf-Iso', 'Alg-3-cf-IVAP', 'Alg-4-single-split-Iso'])) &
                            (df_eval_new['clipping_threshold'] == clipping_threshold)
                        )
                    )
                ].copy()

                # Filter for the threshold_2 and change the method name to "Alg-1-Clipped"
                df_alg1_clipped = df_alg1_clipped[
                    (df_alg1_clipped['learner_m'] == learner_m) &
                    (df_alg1_clipped['n_obs'] == n_obs) &
                    (df_alg1_clipped['dim_x'] == dim_x) &
                    (df_alg1_clipped['clipping_threshold'] == clipping_threshold_2) &
                    (df_alg1_clipped['learner_g'] == learner_g) &
                    (df_alg1_clipped['R2_d'] == R2_d) &
                    (df_alg1_clipped['overlap'] == overlap) &
                    (df_alg1_clipped['share_treated'] == share_treated) &
                    (df_alg1_clipped['Method'] == "Alg-1-Uncalib")
                ].copy()

                df_alg1_clipped.loc[:, 'Method'] = "Alg-1-Clipped"

                # Combine the filtered DataFrames
                df_eval_new = pd.concat([df_eval_new, df_alg1_clipped], ignore_index=True)

                # Add suffix to 'Metrics' column with {learner_m}
                df_eval_new['Metrics'] = df_eval_new['Metrics'].str.cat([learner_m]*len(df_eval_new), sep='-')

                # Keep only necessary columns
                df_eval_new = df_eval_new[['Method', 'Metrics', 'estimate']]
                
                row_data.append(df_eval_new)

        # Combine all metrics for the current setting and procedures
        if row_data:
            combined_results = pd.concat(row_data, ignore_index=True)
            combined_results = combined_results.pivot_table(values='estimate', index=['Method'], columns='Metrics').reset_index()
            combined_results['Setting'] = setting["name"]
            combined_results['Procedure'] = value_var

            # Concatenate the results to the overall dictionary
            results_by_procedure[value_var] = pd.concat([results_by_procedure[value_var], combined_results], ignore_index=True)
            setting_name = setting["name"]
        print(f"{setting_name}: Processed Method: {value_var}")

Setting 1: Processed Method: IPW
Setting 1: Processed Method: IRM
Setting 1: Processed Method: PLR


  'CI_length': np.nanmean(ci_length),


Setting 1: Processed Method: Match
Setting 1: Processed Method: TMLE
Setting 2: Processed Method: IPW
Setting 2: Processed Method: IRM
Setting 2: Processed Method: PLR
Setting 2: Processed Method: Match
Setting 2: Processed Method: TMLE
Setting 3: Processed Method: IPW
Setting 3: Processed Method: IRM
Setting 3: Processed Method: PLR
Setting 3: Processed Method: Match
Setting 3: Processed Method: TMLE
Setting 4: Processed Method: IPW
Setting 4: Processed Method: IRM
Setting 4: Processed Method: PLR
Setting 4: Processed Method: Match
Setting 4: Processed Method: TMLE


In [84]:
def reorder_columns(df):
    # Define the order of learners and metric prefixes.
    learners = ['Logit', 'LGBM']
    metric_order = ['MAE-', 'RMSE-', 'Std. dev.-', 'CI_length-', 'Cover-']
    
    # Create the desired order of columns for each learner.
    new_order = []
    for learner in learners:
        for metric in metric_order:
            col_name = metric + learner
            if col_name in df.columns:  # Only add if it exists in DataFrame.
                new_order.append(col_name)
    
    # Append any remaining columns that are not learner-specific.
    remaining_cols = [col for col in df.columns if col not in new_order]
    new_order.extend(remaining_cols)
    
    # Reorder the DataFrame.
    return df[new_order]

# Reorder for each procedure
results_by_procedure["IPW"] = reorder_columns(results_by_procedure["IPW"])
results_by_procedure["IRM"] = reorder_columns(results_by_procedure["IRM"])
results_by_procedure["PLR"] = reorder_columns(results_by_procedure["PLR"])
results_by_procedure["Match"] = reorder_columns(results_by_procedure["Match"])
results_by_procedure["TMLE"] = reorder_columns(results_by_procedure["TMLE"])

In [85]:
results_by_procedure["IPW"] 

Metrics,MAE-Logit,RMSE-Logit,Std. dev.-Logit,CI_length-Logit,Cover-Logit,MAE-LGBM,RMSE-LGBM,Std. dev.-LGBM,CI_length-LGBM,Cover-LGBM,Method,Setting,Procedure
0,0.207637,0.241962,0.1278,0.208763,0.27,0.516462,0.535927,0.143127,0.2018,0.0,Alg-1-Clipped,Setting 1,IPW
1,0.325195,0.428792,0.281204,0.211143,0.18,0.690507,0.720668,0.20631,0.200543,0.0,Alg-1-Uncalib,Setting 1,IPW
2,0.208322,0.216719,0.059742,0.206874,0.03,0.136909,0.149634,0.061381,0.208131,0.32,Alg-5-full-sample-IVAP,Setting 1,IPW
3,0.20699,0.214971,0.058029,0.206621,0.05,0.132817,0.145385,0.061828,0.208176,0.31,Alg-5-full-sample-Iso,Setting 1,IPW
4,0.255163,0.261265,0.056133,0.205812,0.0,0.216495,0.223004,0.053485,0.205924,0.02,Alg-5-full-sample-Platt,Setting 1,IPW
5,0.084283,0.107039,0.105763,0.371206,0.96,0.237433,0.284267,0.208531,0.384436,0.44,Alg-1-Clipped,Setting 2,IPW
6,0.084283,0.107039,0.105763,0.371206,0.96,0.247973,0.295698,0.217804,0.384912,0.42,Alg-1-Uncalib,Setting 2,IPW
7,0.087057,0.108131,0.108008,0.371614,0.97,0.17806,0.207255,0.10996,0.369964,0.56,Alg-5-full-sample-IVAP,Setting 2,IPW
8,0.086483,0.107741,0.1077,0.371592,0.96,0.179165,0.207261,0.107936,0.369994,0.55,Alg-5-full-sample-Iso,Setting 2,IPW
9,0.084178,0.106475,0.105379,0.371195,0.96,0.216362,0.238683,0.102694,0.369196,0.33,Alg-5-full-sample-Platt,Setting 2,IPW


In [86]:
for procedure, combined_results in results_by_procedure.items():
    if not combined_results.empty:
        # Make a copy to avoid SettingWithCopyWarning
        combined_results = combined_results.copy()

        # Reset index and drop old index
        combined_results.reset_index(drop=True, inplace=True)

        # Sort the DataFrame by Setting and Method columns
        combined_results.sort_values(by=['Setting', 'Method'], inplace=True)
        
        # Rearrange columns to move Setting to the front and exclude Procedure
        column_order = ['Setting', 'Method'] + [col for col in combined_results.columns if col not in ['Setting', 'Method', 'Metrics', 'index', 'Procedure']]
        combined_results = combined_results[column_order]

        Setting_mapping = {
        "Setting 1": "1",
        "Setting 2": "2",
        "Setting 3": "3",
        "Setting 4": "4"}
        combined_results.loc[:, "Setting"] = combined_results["Setting"].replace(Setting_mapping)        

        # Define the MultiIndex for the header
        cidx = pd.MultiIndex.from_arrays([
            ["Setting", "Method", "m = Logit", "m = Logit", "m = Logit", "m = Logit", "m = Logit", "m = LGBM", "m = LGBM", "m = LGBM", "m = LGBM", "m = LGBM"],
            ["", "", "MAE", "RMSE", "Std. dev.", "CI Length", "Cover", "MAE", "RMSE", "Std. dev.", "CI Length", "Cover"]
        ])

        #cidx = pd.MultiIndex.from_arrays([
        #    ["Setting", "Method", "m = Logit", "m = Logit", "m = Logit", "m = Random Forest", "m = Random Forest", "m = Random Forest", "m = LGBM", "m = LGBM", "m = LGBM"],
        #    ["", "", "MAE", "RMSE", "Std. dev.", "MAE", "RMSE", "Std. dev.", "MAE", "RMSE", "Std. dev."]
        #])

        # Create a styled DataFrame object
        styler = pd.DataFrame(combined_results.to_numpy(), columns=cidx, index=combined_results.index).style

        # Define a custom formatting function
        def custom_formatter(x):
            # Check if the value is a number (int or float), process only numeric values
            if isinstance(x, (int, float)):
                if np.abs(x) >= 1e6:
                    return "{:,.2e}".format(x)  # Scientific notation with thousands separator
                else:
                    return "{:,.2f}".format(x)  # Regular format with thousands separator
            return x  # Leave non-numeric values as is

        # Apply the custom formatter
        styler = styler.format(custom_formatter)

        filename = f'{procedure}'

        # Convert to LaTeX
        df_tex = styler.hide(axis="index").to_latex(
            caption=filename,
            convert_css=True,
            position_float="centering",
            multicol_align="|c|",
            hrules=True,
        )

        # Print the final LaTeX table
        print(df_tex)
    else:
        print(f"No results found for procedure {procedure}")

\begin{table}
\centering
\caption{IPW}
\begin{tabular}{llllllllllll}
\toprule
Setting & Method & \multicolumn{5}{|c|}{m = Logit} & \multicolumn{5}{|c|}{m = LGBM} \\
 &  & MAE & RMSE & Std. dev. & CI Length & Cover & MAE & RMSE & Std. dev. & CI Length & Cover \\
\midrule
1 & Alg-1-Clipped & 0.21 & 0.24 & 0.13 & 0.21 & 0.27 & 0.52 & 0.54 & 0.14 & 0.20 & 0.00 \\
1 & Alg-1-Uncalib & 0.33 & 0.43 & 0.28 & 0.21 & 0.18 & 0.69 & 0.72 & 0.21 & 0.20 & 0.00 \\
1 & Alg-5-full-sample-IVAP & 0.21 & 0.22 & 0.06 & 0.21 & 0.03 & 0.14 & 0.15 & 0.06 & 0.21 & 0.32 \\
1 & Alg-5-full-sample-Iso & 0.21 & 0.21 & 0.06 & 0.21 & 0.05 & 0.13 & 0.15 & 0.06 & 0.21 & 0.31 \\
1 & Alg-5-full-sample-Platt & 0.26 & 0.26 & 0.06 & 0.21 & 0.00 & 0.22 & 0.22 & 0.05 & 0.21 & 0.02 \\
2 & Alg-1-Clipped & 0.08 & 0.11 & 0.11 & 0.37 & 0.96 & 0.24 & 0.28 & 0.21 & 0.38 & 0.44 \\
2 & Alg-1-Uncalib & 0.08 & 0.11 & 0.11 & 0.37 & 0.96 & 0.25 & 0.30 & 0.22 & 0.38 & 0.42 \\
2 & Alg-5-full-sample-IVAP & 0.09 & 0.11 & 0.11 & 0.37 & 0.97 & 0

# Evaluation Table Lalonde

In [95]:
project_root = (
    Path.cwd()
    .parents[0]
)
df_1 = pd.read_pickle(f"results/df_lalonde_exp.pkl")
df_2 = pd.read_pickle(f"results/df_lalonde_nonexp.pkl")
df_3 = pd.read_pickle(f"results/df_lalonde_restricted.pkl")


In [96]:
df_1['R2_d'] = 0  
df_1['overlap'] = 0 
df_1['share_treated'] = 0 

df_2['R2_d'] = 0  
df_2['overlap'] = 0 
df_2['share_treated'] = 0 

df_3['R2_d'] = 0  
df_3['overlap'] = 0 
df_3['share_treated'] = 0 



In [104]:
# Define settings
settings = [
    {
        "name": "Setting 1",
        "df": df_1,
        "n_obs": 445,
        "dim_x": 9,
        "clipping_threshold": 1e-12,
        "clipping_threshold_2": 0.01,
        "R2_d": 0,
        "overlap": 0,
        "share_treated": 0,
        "theta": 1794.34 
    },
    {
        "name": "Setting 2",
        "df": df_2,
        "n_obs": 2675,
        "dim_x": 9,
        "clipping_threshold": 1e-12,
        "clipping_threshold_2": 0.01,
        "R2_d": 0,
        "overlap": 0,
        "share_treated": 0,
        "theta": 1794.34 
    },
    {
        "name": "Setting 3",
        "df": df_3,
        "n_obs": 390,
        "dim_x": 9,
        "clipping_threshold": 1e-12,
        "clipping_threshold_2": 0.01,
        "R2_d": 0,
        "overlap": 0,
        "share_treated": 0,
        "theta": 1794.34 
    }
]

# Define learner_g
learner_g = 'LGBM'

# Define method and calibration mappings
method_mapping = {
    "alg-1-uncalibrated": "Alg-1-uncalib",
    #"alg-2-nested-cross-fitting-calib": "Alg-2-nested-cf",
    #"alg-3-cross-fitted-calib": "Alg-3-cf",
    #"alg-4-single-split-calib": "Alg-4-single-split",
    "alg-5-full-sample-calib": "Alg-5-full-sample"
}

calib_mapping = {
    "uncalibrated": "Uncalib",
    "isotonic": "Iso",
    "platt": "Platt",
    "ivap": "IVAP"
}

learner_dict_m = ["Logit", "RF", "LGBM"]

In [110]:
def evaluate_estimation(ate: np.ndarray, theta: float, 
                       ci_length: np.ndarray, cover: np.ndarray) -> dict:
    """Calculate estimation metrics for ATE results with CI metrics."""
    ate_true = np.full_like(ate, theta)
    bias = ate - ate_true
    return {
        'ATE': np.nanmean(ate),
        'RMSE': np.sqrt(np.nanmean(bias ** 2)),
        'Cover': np.nanmean(cover),
        'CI_length': np.nanmean(ci_length),
    }

In [111]:
results_by_procedure = {"IPW": pd.DataFrame(), "IRM": pd.DataFrame(), "PLR": pd.DataFrame(), "Match": pd.DataFrame(), "TMLE": pd.DataFrame()}


for setting in settings:    
    df = setting["df"]
    n_obs = setting["n_obs"]
    dim_x = setting["dim_x"]
    clipping_threshold = setting["clipping_threshold"]
    clipping_threshold_2 = setting["clipping_threshold_2"]
    R2_d = setting["R2_d"]
    overlap = setting["overlap"]
    share_treated = setting["share_treated"]
    theta = setting["theta"]

    df = df.rename(columns={
        "irm_coefs": "IRM",
        "ipw_coefs": "IPW",
        "plr_coefs": "PLR",
        "match_coefs": "Match",
        "TMLE_coefs": "TMLE",
        "TMLE_cover": "tmle_cover",
        "TMLE_ci_length": "tmle_ci_length",
    })
    
    calib_methods = ["uncalibrated", "isotonic", "platt", "ivap"]
    methods = [
        "alg-1-uncalibrated",
        #"alg-2-nested-cross-fitting-calib",
        #"alg-3-cross-fitted-calib",
        #"alg-4-single-split-calib",
        "alg-5-full-sample-calib"
    ]

    df = df[df["calib_method"].isin(calib_methods)]
    df = df[df["method"].isin(methods)]


    for value_var in ["IPW", "IRM", "PLR", "Match", "TMLE"]:
        grouping_columns = [
            "n_obs", "dim_x", "learner_g", "learner_m", "method", "calib_method",
            "clipping_threshold", "R2_d", "rmses", "K", "overlap", "share_treated",
            f"{value_var.lower()}_ci_length",  
            f"{value_var.lower()}_cover"     
        ]

        temp_df = df.melt(
            id_vars=grouping_columns,
            value_vars=[value_var],
            var_name="procedure",
            value_name="estimate"
        )
        
        temp_df["ci_length"] = temp_df[f"{value_var.lower()}_ci_length"]
        temp_df["cover"] = temp_df[f"{value_var.lower()}_cover"]
        temp_df = temp_df.drop(columns=[f"{value_var.lower()}_ci_length", f"{value_var.lower()}_cover"])

        cols_needed = ['estimate', 'ci_length', 'cover']
        df_eval = temp_df.groupby([
            'n_obs', 'dim_x', 'learner_g', 'learner_m', 'R2_d',
            'clipping_threshold', 'procedure', 'method', 'calib_method',
            'overlap', 'share_treated'
        ])[cols_needed].apply(lambda g: pd.Series(
            evaluate_estimation(
                g['estimate'].values,
                theta=theta,
                ci_length=g['ci_length'].values,
                cover=g['cover'].values
            )
        )).reset_index()
        metric_cols = ['ATE','RMSE', 'Cover', 'CI_length']
        grouping_cols = [
            'n_obs', 'dim_x', 'learner_g', 'learner_m', 'R2_d',
            'clipping_threshold', 'procedure', 'method', 'calib_method',
            'overlap', 'share_treated']
        # Use pd.melt to reshape the DataFrame:
        df_eval = pd.melt(
            df_eval,
            id_vars=grouping_cols,
            value_vars=metric_cols,
            var_name='Metrics',   
            value_name='estimate'   
        )
        
        if 'Metrics' not in df_eval.columns:
            print(f"Warning: 'Metrics' column not found in df_eval for setting {setting['name']} and procedure {value_var}")
            continue

        row_data = []
        
        for learner_m in learner_dict_m:

            if not df_eval.empty:
                
                df_eval_new = df_eval.copy()
                # Insert combined method column
                df_eval_new["method"] = df_eval_new["method"].replace(method_mapping)
                df_eval_new["calib_method"] = df_eval_new["calib_method"].replace(calib_mapping)
                df_eval_new.insert(1, "Method", df_eval_new[['method', 'calib_method']].agg('-'.join, axis=1))

                Method_mapping = {
                    "Alg-1-uncalib-Uncalib": "Alg-1-Uncalib"
                }
                df_eval_new.loc[:, "Method"] = df_eval_new["Method"].replace(Method_mapping)

                # Initial filtering for the first part with the original clipping threshold
                df_alg1_clipped = df_eval_new.copy()
                df_eval_new = df_eval_new[
                    (df_eval_new['learner_m'] == learner_m) &
                    (df_eval_new['n_obs'] == n_obs) &
                    (df_eval_new['dim_x'] == dim_x) &
                    (df_eval_new['learner_g'] == learner_g) &
                    (df_eval_new['R2_d'] == R2_d) &
                    (df_eval_new['overlap'] == overlap) &
                    (df_eval_new['share_treated'] == share_treated) &
                    (
                        # Check if the method is either of the specified ones and use clipping_threshold_2
                        (
                            (df_eval_new['Method'].isin(['Alg-2-nested-cf-Iso', 'Alg-3-cf-IVAP', 'Alg-4-single-split-Iso'])) &
                            (df_eval_new['clipping_threshold'] == clipping_threshold_2)
                        ) |
                        # Otherwise, use the original clipping_threshold for other methods
                        (
                            (~df_eval_new['Method'].isin(['Alg-2-nested-cf-Iso', 'Alg-3-cf-IVAP', 'Alg-4-single-split-Iso'])) &
                            (df_eval_new['clipping_threshold'] == clipping_threshold)
                        )
                    )
                ].copy()

                # Filter for the threshold_2 and change the method name to "Alg-1-Clipped"
                df_alg1_clipped = df_alg1_clipped[
                    (df_alg1_clipped['learner_m'] == learner_m) &
                    (df_alg1_clipped['n_obs'] == n_obs) &
                    (df_alg1_clipped['dim_x'] == dim_x) &
                    (df_alg1_clipped['clipping_threshold'] == clipping_threshold_2) &
                    (df_alg1_clipped['learner_g'] == learner_g) &
                    (df_alg1_clipped['R2_d'] == R2_d) &
                    (df_alg1_clipped['overlap'] == overlap) &
                    (df_alg1_clipped['share_treated'] == share_treated) &
                    (df_alg1_clipped['Method'] == "Alg-1-Uncalib")
                ].copy()

                df_alg1_clipped.loc[:, 'Method'] = "Alg-1-Clipped"

                # Combine the filtered DataFrames
                df_eval_new = pd.concat([df_eval_new, df_alg1_clipped], ignore_index=True)

                # Add suffix to 'Metrics' column with {learner_m}
                df_eval_new['Metrics'] = df_eval_new['Metrics'].str.cat([learner_m]*len(df_eval_new), sep='-')

                # Keep only necessary columns
                df_eval_new = df_eval_new[['Method', 'Metrics', 'estimate']]
                
                row_data.append(df_eval_new)

        # Combine all metrics for the current setting and procedures
        if row_data:
            combined_results = pd.concat(row_data, ignore_index=True)
            combined_results = combined_results.pivot_table(values='estimate', index=['Method'], columns='Metrics').reset_index()
            combined_results['Setting'] = setting["name"]
            combined_results['Procedure'] = value_var

            # Concatenate the results to the overall dictionary
            results_by_procedure[value_var] = pd.concat([results_by_procedure[value_var], combined_results], ignore_index=True)
            setting_name = setting["name"]
        print(f"{setting_name}: Processed Method: {value_var}")

Setting 1: Processed Method: IPW
Setting 1: Processed Method: IRM
Setting 1: Processed Method: PLR
Setting 1: Processed Method: Match
Setting 1: Processed Method: TMLE
Setting 2: Processed Method: IPW
Setting 2: Processed Method: IRM
Setting 2: Processed Method: PLR
Setting 2: Processed Method: Match
Setting 2: Processed Method: TMLE
Setting 3: Processed Method: IPW
Setting 3: Processed Method: IRM
Setting 3: Processed Method: PLR
Setting 3: Processed Method: Match
Setting 3: Processed Method: TMLE


In [112]:
def reorder_columns(df):
    # Define the order of learners and metric prefixes.
    learners = ['Logit', 'LGBM']
    metric_order = ['ATE-', 'RMSE-', 'CI_length-', 'Cover-']
    
    # Create the desired order of columns for each learner.
    new_order = []
    for learner in learners:
        for metric in metric_order:
            col_name = metric + learner
            if col_name in df.columns:  # Only add if it exists in DataFrame.
                new_order.append(col_name)
    
    # Append any remaining columns that are not learner-specific.
    remaining_cols = [col for col in df.columns if col not in new_order]
    new_order.extend(remaining_cols)
    
    # Reorder the DataFrame.
    return df[new_order]

# Reorder for each procedure
results_by_procedure["IPW"] = reorder_columns(results_by_procedure["IPW"])
results_by_procedure["IRM"] = reorder_columns(results_by_procedure["IRM"])
results_by_procedure["PLR"] = reorder_columns(results_by_procedure["PLR"])
results_by_procedure["Match"] = reorder_columns(results_by_procedure["Match"])
results_by_procedure["TMLE"] = reorder_columns(results_by_procedure["TMLE"])

In [113]:
results_by_procedure["TMLE"]

Metrics,ATE-Logit,RMSE-Logit,CI_length-Logit,Cover-Logit,ATE-LGBM,RMSE-LGBM,CI_length-LGBM,Cover-LGBM,Method,Setting,Procedure
0,1645.136635,149.203365,2322.756695,1.0,1830.081133,35.741133,3557.57,1.0,Alg-1-Clipped,Setting 1,TMLE
1,1645.136635,149.203365,2322.756695,1.0,1830.081133,35.741133,3557.57,1.0,Alg-1-Uncalib,Setting 1,TMLE
2,1620.260701,174.079299,2170.129713,1.0,1671.20377,123.13623,2110.983,1.0,Alg-5-full-sample-IVAP,Setting 1,TMLE
3,1590.985105,203.354895,2107.057824,1.0,1666.10045,128.23955,2106.836,1.0,Alg-5-full-sample-Iso,Setting 1,TMLE
4,1614.062571,180.277429,2144.501425,1.0,1555.450633,238.889367,2086.11,1.0,Alg-5-full-sample-Platt,Setting 1,TMLE
5,-3507.407521,5301.747521,2900.059211,0.0,-4333.42868,6127.76868,3812.381,0.0,Alg-1-Clipped,Setting 2,TMLE
6,-15040.216692,16834.556692,6466.964408,0.0,-6064.597107,7858.937107,1513959.0,1.0,Alg-1-Uncalib,Setting 2,TMLE
7,-10342.365867,12136.705867,2577.092034,0.0,-3207.53139,5001.87139,5801.527,0.0,Alg-5-full-sample-IVAP,Setting 2,TMLE
8,-15931.593848,17725.933848,3101.15637,0.0,-12954.960251,14749.300251,5551.586,0.0,Alg-5-full-sample-Iso,Setting 2,TMLE
9,-14764.863106,16559.203106,1739.256781,0.0,-3891.547929,5685.887929,1540.875,0.0,Alg-5-full-sample-Platt,Setting 2,TMLE


In [114]:
for procedure, combined_results in results_by_procedure.items():
    if not combined_results.empty:
        # Make a copy to avoid SettingWithCopyWarning
        combined_results = combined_results.copy()

        # Reset index and drop old index
        combined_results.reset_index(drop=True, inplace=True)

        # Sort the DataFrame by Setting and Method columns
        combined_results.sort_values(by=['Setting', 'Method'], inplace=True)
        
        # Rearrange columns to move Setting to the front and exclude Procedure
        column_order = ['Setting', 'Method'] + [col for col in combined_results.columns if col not in ['Setting', 'Method', 'Metrics', 'index', 'Procedure']]
        combined_results = combined_results[column_order]

        Setting_mapping = {
        "Setting 1": "1",
        "Setting 2": "2",
        "Setting 3": "3"}
        combined_results.loc[:, "Setting"] = combined_results["Setting"].replace(Setting_mapping)        

        # Define the MultiIndex for the header
        cidx = pd.MultiIndex.from_arrays([
            ["Setting", "Method", "m = Logit", "m = Logit", "m = Logit" , "m = Logit", "m = LGBM", "m = LGBM", "m = LGBM", "m = LGBM"],
            ["", "", "ATE", "RMSE", "CI Length", "Cover", "ATE", "RMSE", "CI Length", "Cover"]
        ])

        #cidx = pd.MultiIndex.from_arrays([
        #    ["Setting", "Method", "m = Logit", "m = Logit", "m = Logit", "m = Random Forest", "m = Random Forest", "m = Random Forest", "m = LGBM", "m = LGBM", "m = LGBM"],
        #    ["", "", "MAE", "RMSE", "Std. dev.", "MAE", "RMSE", "Std. dev.", "MAE", "RMSE", "Std. dev."]
        #])

        # Create a styled DataFrame object
        styler = pd.DataFrame(combined_results.to_numpy(), columns=cidx, index=combined_results.index).style

        # Define a custom formatting function
        def custom_formatter(x):
            # Check if the value is a number (int or float), process only numeric values
            if isinstance(x, (int, float)):
                if np.abs(x) >= 1e6:
                    return "{:,.2e}".format(x)  # Scientific notation with thousands separator
                else:
                    return "{:,.2f}".format(x)  # Regular format with thousands separator
            return x  # Leave non-numeric values as is

        # Apply the custom formatter
        styler = styler.format(custom_formatter)

        filename = f'{procedure}'

        # Convert to LaTeX
        df_tex = styler.hide(axis="index").to_latex(
            caption=filename,
            convert_css=True,
            position_float="centering",
            multicol_align="|c|",
            hrules=True,
        )

        # Print the final LaTeX table
        print(df_tex)
    else:
        print(f"No results found for procedure {procedure}")

\begin{table}
\centering
\caption{IPW}
\begin{tabular}{llllllllll}
\toprule
Setting & Method & \multicolumn{4}{|c|}{m = Logit} & \multicolumn{4}{|c|}{m = LGBM} \\
 &  & ATE & RMSE & CI Length & Cover & ATE & RMSE & CI Length & Cover \\
\midrule
1 & Alg-1-Clipped & 1,806.89 & 12.55 & 2,562.03 & 1.00 & 2,073.16 & 278.82 & 2,736.22 & 1.00 \\
1 & Alg-1-Uncalib & 1,806.89 & 12.55 & 2,562.03 & 1.00 & 2,073.16 & 278.82 & 2,736.22 & 1.00 \\
1 & Alg-5-full-sample-IVAP & 1,831.67 & 37.33 & 2,556.81 & 1.00 & 1,845.34 & 51.00 & 2,532.98 & 1.00 \\
1 & Alg-5-full-sample-Iso & 1,798.10 & 3.76 & 2,536.11 & 1.00 & 1,838.91 & 44.57 & 2,543.56 & 1.00 \\
1 & Alg-5-full-sample-Platt & 1,850.58 & 56.24 & 2,549.05 & 1.00 & 1,744.58 & 49.76 & 2,506.05 & 1.00 \\
2 & Alg-1-Clipped & -10,563.31 & 12,357.65 & 2,405.94 & 0.00 & -12,138.11 & 13,932.45 & 2,004.23 & 0.00 \\
2 & Alg-1-Uncalib & -14,612.12 & 16,406.46 & 1,641.37 & 0.00 & -8,777.79 & 10,572.13 & 7,770.59 & 0.00 \\
2 & Alg-5-full-sample-IVAP & -12,589.18