## Evaluation Table

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
from pathlib import Path
from itertools import product


In [2]:
project_root = (
    Path.cwd()
    .parents[0]
)
df_1 = pd.read_pickle(f"results/results_irm.pkl")
df_2 = pd.read_pickle(f"results/results_drug.pkl")
df_3 = pd.read_pickle(f"results/results_nonlinear.pkl")
df_4 = pd.read_pickle(f"results/results_unbalanced.pkl")

In [6]:
df_1['overlap'] = 0  
df_1['share_treated'] = 0  

df_2['R2_d'] = 0  
df_2['dim_x'] = 3  
df_2['share_treated'] = 0 

df_3['R2_d'] = 0  
df_3['dim_x'] = 4  
df_3['overlap'] = 0 
df_3['share_treated'] = 0 

df_4['R2_d'] = 0  
df_4['dim_x'] = 20  
df_4['overlap'] = 0 

In [7]:
# Define settings
settings = [
    {
        "name": "Setting 1",
        "df": df_1,
        "n_obs": 2000,
        "dim_x": 20,
        "clipping_threshold": 1e-12,
        "clipping_threshold_2": 0.01,
        "R2_d": 0.5,
        "overlap": 0,
        "share_treated": 0,
        "theta": 0
    },
    {
        "name": "Setting 2",
        "df": df_2,
        "n_obs": 2000,
        "dim_x": 3,
        "clipping_threshold": 1e-12,
        "clipping_threshold_2": 0.01,
        "R2_d": 0,
        "overlap": 0.5,
        "share_treated": 0,
        "theta": -1
    },
    {
        "name": "Setting 3",
        "df": df_3,
        "n_obs": 2000,
        "dim_x": 4,
        "clipping_threshold": 1e-12,
        "clipping_threshold_2": 0.01,
        "R2_d": 0,
        "overlap": 0,
        "share_treated": 0,
        "theta": 1.6257
    },
    {
        "name": "Setting 4",
        "df": df_4,
        "n_obs": 4000,
        "dim_x": 20,
        "clipping_threshold": 1e-12,
        "clipping_threshold_2": 0.01,
        "share_treated": 0.1,
        "R2_d": 0,
        "overlap": 0,
        "theta": 1
    }
]

# Define learner_g
learner_g = 'LGBM'

# Define method and calibration mappings
method_mapping = {
    "alg-1-uncalibrated": "Alg-1-uncalib",
    "alg-2-nested-cross-fitting-calib": "Alg-2-nested-cf",
    "alg-3-cross-fitted-calib": "Alg-3-cf",
    "alg-4-single-split-calib": "Alg-4-single-split",
    "alg-5-full-sample-calib": "Alg-5-full-sample"
}

calib_mapping = {
    "uncalibrated": "Uncalib",
    "isotonic": "Iso",
    "platt": "Platt",
    "ivap": "IVAP"
}

learner_dict_m = ["Logit", "RF", "LGBM"]

In [None]:
def evaluate_estimation(ate: np.ndarray, theta: float, level: float = 0.9) -> dict:
    """Calculate estimation metrics for ATE results.
    
    Args:
        ate: Array of ATE estimates
        theta: True treatment effect value
        level: Confidence level for variance calculation
        
    Returns:
        Dictionary of evaluation metrics
    """
    ate_true = np.full_like(ate, theta)  # Correct: Match array shape
    bias = ate - ate_true
    
    return {
        'RMSE': np.sqrt(np.nanmean(bias ** 2)),
        'Std. dev.': np.nanstd(ate),
        'MAE': np.nanmean(np.abs(bias)),
    }

In [16]:
results_by_procedure = {"IPW": pd.DataFrame(), "IRM": pd.DataFrame(), "PLR": pd.DataFrame()}


for setting in settings:    
    # Initialize variables and preprocess DataFrame (same as before)
    df = setting["df"]
    n_obs = setting["n_obs"]
    dim_x = setting["dim_x"]
    clipping_threshold = setting["clipping_threshold"]
    clipping_threshold_2 = setting["clipping_threshold_2"]
    R2_d = setting["R2_d"]
    overlap = setting["overlap"]
    share_treated = setting["share_treated"]
    theta = setting["theta"]

    df = df.rename(columns={
        "coefs": "IRM",
        "ipw_coefs": "IPW",
        "plr_coefs": "PLR",
    })
    
    calib_methods = ["uncalibrated", "isotonic", "platt", "ivap"]
    methods = [
        "alg-1-uncalibrated",
        "alg-2-nested-cross-fitting-calib",
        "alg-3-cross-fitted-calib",
        "alg-4-single-split-calib",
        "alg-5-full-sample-calib"
    ]

    df = df[df["calib_method"].isin(calib_methods)]
    df = df[df["method"].isin(methods)]

    grouping_columns = [
        "n_obs", "dim_x", "learner_g", "learner_m", "method", "calib_method",
        "clipping_threshold", "R2_d", "rmses", "ci_length", "K",
        "cover", "overlap", "share_treated" 
    ]
    # Collect results for the current setting
    for value_var in ["IPW", "IRM", "PLR"]:
        temp_df = df.melt(
            id_vars=grouping_columns,
            value_vars=[value_var],
            var_name="procedure",
            value_name="estimate"
        )
        temp_df["bias"] = temp_df["estimate"] - theta

        # Evaluating estimations
        df_eval = temp_df.groupby([
            'n_obs', 'dim_x', 'learner_g', 'learner_m', 'R2_d', 
            'clipping_threshold', 'procedure', 'method', 
            'calib_method', 'overlap', 'share_treated'
        ]).estimate.apply(lambda x: evaluate_estimation(x, theta=theta)).reset_index()
        
        # Restructure df_eval to make sure metrics are in rows
        df_eval = df_eval.rename(columns={"level_11": "Metrics"})

        # Ensure 'Metrics' column is created during evaluation
        if 'Metrics' not in df_eval.columns:
            # Assumed structure after evaluate_estimations; missing metrics might be a sign of an issue in logic
            print(f"Warning: 'Metrics' column not found in df_eval for setting {setting['name']} and procedure {value_var}")
            continue

        row_data = []
        
        for learner_m in learner_dict_m:

            print(f"Processing learner_m: {learner_m}, DataFrame size: {df_eval.shape}")

            if not df_eval.empty:
                
                df_eval_new = df_eval.copy()
                # Insert combined method column
                df_eval_new["method"] = df_eval_new["method"].replace(method_mapping)
                df_eval_new["calib_method"] = df_eval_new["calib_method"].replace(calib_mapping)
                df_eval_new.insert(1, "Method", df_eval_new[['method', 'calib_method']].agg('-'.join, axis=1))

                Method_mapping = {
                    "Alg-1-uncalib-Uncalib": "Alg-1-Uncalib"
                }
                df_eval_new.loc[:, "Method"] = df_eval_new["Method"].replace(Method_mapping)

                # Initial filtering for the first part with the original clipping threshold
                df_alg1_clipped = df_eval_new.copy()
                df_eval_new = df_eval_new[
                    (df_eval_new['learner_m'] == learner_m) &
                    (df_eval_new['n_obs'] == n_obs) &
                    (df_eval_new['dim_x'] == dim_x) &
                    (df_eval_new['learner_g'] == learner_g) &
                    (df_eval_new['R2_d'] == R2_d) &
                    (df_eval_new['overlap'] == overlap) &
                    (df_eval_new['share_treated'] == share_treated) &
                    (
                        # Check if the method is either of the specified ones and use clipping_threshold_2
                        (
                            (df_eval_new['Method'].isin(['Alg-2-nested-cf-Iso', 'Alg-3-cf-IVAP', 'Alg-4-single-split-Iso'])) &
                            (df_eval_new['clipping_threshold'] == clipping_threshold_2)
                        ) |
                        # Otherwise, use the original clipping_threshold for other methods
                        (
                            (~df_eval_new['Method'].isin(['Alg-2-nested-cf-Iso', 'Alg-3-cf-IVAP', 'Alg-4-single-split-Iso'])) &
                            (df_eval_new['clipping_threshold'] == clipping_threshold)
                        )
                    )
                ].copy()

                # Handle the special case for "Alg-1-Uncalib"
                # Filter for the threshold_2 and change the method name to "Alg-1-Clipped"
                df_alg1_clipped = df_alg1_clipped[
                    (df_alg1_clipped['learner_m'] == learner_m) &
                    (df_alg1_clipped['n_obs'] == n_obs) &
                    (df_alg1_clipped['dim_x'] == dim_x) &
                    (df_alg1_clipped['clipping_threshold'] == clipping_threshold_2) &
                    (df_alg1_clipped['learner_g'] == learner_g) &
                    (df_alg1_clipped['R2_d'] == R2_d) &
                    (df_alg1_clipped['overlap'] == overlap) &
                    (df_alg1_clipped['share_treated'] == share_treated) &
                    (df_alg1_clipped['Method'] == "Alg-1-Uncalib")
                ].copy()

                df_alg1_clipped.loc[:, 'Method'] = "Alg-1-Clipped"

                # Combine the filtered DataFrames
                df_eval_new = pd.concat([df_eval_new, df_alg1_clipped], ignore_index=True)

                # Add suffix to 'Metrics' column with {learner_m}
                df_eval_new['Metrics'] = df_eval_new['Metrics'].str.cat([learner_m]*len(df_eval_new), sep='-')

                # Keep only necessary columns
                df_eval_new = df_eval_new[['Method', 'Metrics', 'estimate']]
                
                row_data.append(df_eval_new)

        # Combine all metrics for the current setting and procedures
        if row_data:
            combined_results = pd.concat(row_data, ignore_index=True)
            combined_results = combined_results.pivot_table(values='estimate', index=['Method'], columns='Metrics').reset_index()
            combined_results['Setting'] = setting["name"]
            combined_results['Procedure'] = value_var

            # Concatenate the results to the overall dictionary
            results_by_procedure[value_var] = pd.concat([results_by_procedure[value_var], combined_results], ignore_index=True)
            setting_name = setting["name"]
        print(f"{setting_name}: Processed Method: {value_var}")

Processing learner_m: Logit, DataFrame size: (78975, 13)
Processing learner_m: RF, DataFrame size: (78975, 13)
Processing learner_m: LGBM, DataFrame size: (78975, 13)
Setting 1: Processed Method: IPW
Processing learner_m: Logit, DataFrame size: (78975, 13)
Processing learner_m: RF, DataFrame size: (78975, 13)
Processing learner_m: LGBM, DataFrame size: (78975, 13)
Setting 1: Processed Method: IRM
Processing learner_m: Logit, DataFrame size: (78975, 13)
Processing learner_m: RF, DataFrame size: (78975, 13)
Processing learner_m: LGBM, DataFrame size: (78975, 13)
Setting 1: Processed Method: PLR
Processing learner_m: Logit, DataFrame size: (15795, 13)
Processing learner_m: RF, DataFrame size: (15795, 13)
Processing learner_m: LGBM, DataFrame size: (15795, 13)
Setting 2: Processed Method: IPW
Processing learner_m: Logit, DataFrame size: (15795, 13)
Processing learner_m: RF, DataFrame size: (15795, 13)
Processing learner_m: LGBM, DataFrame size: (15795, 13)
Setting 2: Processed Method: IRM


In [42]:
def reorder_columns(df):
    # Define the order of learners and metric prefixes
    learners = ['Logit', 'RF', 'LGBM']
    metric_order = ['RMSE-', 'MAE-', 'Std. dev.-']
    
    # Create the desired order of columns for all learners
    new_order = []
    for learner in learners:
        # Collect columns for the current learner in the desired metric order
        learner_columns = [metric + learner for metric in metric_order]
        # Filter existing columns to ensure they are present in the DataFrame
        learner_columns = [col for col in df.columns if col in learner_columns]
        # Append to the new order list
        new_order.extend(learner_columns)
    
    # Add any remaining columns that are not learner-specific (e.g., Setting, Method)
    new_order += [col for col in df.columns if col not in new_order]
    
    # Reorder the DataFrame
    df = df[new_order]

    return df

# Reorder for each procedure
results_by_procedure["IPW"] = reorder_columns(results_by_procedure["IPW"])
results_by_procedure["IRM"] = reorder_columns(results_by_procedure["IRM"])
results_by_procedure["PLR"] = reorder_columns(results_by_procedure["PLR"])


In [50]:
for procedure, combined_results in results_by_procedure.items():
    if not combined_results.empty:
        # Make a copy to avoid SettingWithCopyWarning
        combined_results = combined_results.copy()

        # Reset index and drop old index
        combined_results.reset_index(drop=True, inplace=True)

        # Sort the DataFrame by Setting and Method columns
        combined_results.sort_values(by=['Setting', 'Method'], inplace=True)
        
        # Rearrange columns to move Setting to the front and exclude Procedure
        column_order = ['Setting', 'Method'] + [col for col in combined_results.columns if col not in ['Setting', 'Method', 'Metrics', 'index', 'Procedure']]
        combined_results = combined_results[column_order]

        Setting_mapping = {
        "Setting 1": "1",
        "Setting 2": "2",
        "Setting 3": "3",
        "Setting 4": "4"}
        combined_results.loc[:, "Setting"] = combined_results["Setting"].replace(Setting_mapping)        

        # Define the MultiIndex for the header
        cidx = pd.MultiIndex.from_arrays([
            ["Setting", "Method", "m = Logit", "m = Logit", "m = Logit", "m = Random Forest", "m = Random Forest", "m = Random Forest", "m = LGBM", "m = LGBM", "m = LGBM"],
            ["", "", "MAE", "RMSE", "Std. dev.", "MAE", "RMSE", "Std. dev.", "MAE", "RMSE", "Std. dev."]
        ])

        # Create a styled DataFrame object
        styler = pd.DataFrame(combined_results.to_numpy(), columns=cidx, index=combined_results.index).style

        # Define a custom formatting function
        def custom_formatter(x):
            # Check if the value is a number (int or float), process only numeric values
            if isinstance(x, (int, float)):
                if np.abs(x) >= 1e6:
                    return "{:,.2e}".format(x)  # Scientific notation with thousands separator
                else:
                    return "{:,.2f}".format(x)  # Regular format with thousands separator
            return x  # Leave non-numeric values as is

        # Apply the custom formatter
        styler = styler.format(custom_formatter)

        filename = f'{procedure}'

        # Convert to LaTeX
        df_tex = styler.hide(axis="index").to_latex(
            caption=filename,
            convert_css=True,
            position_float="centering",
            multicol_align="|c|",
            hrules=True,
        )

        # Print the final LaTeX table
        print(df_tex)
    else:
        print(f"No results found for procedure {procedure}")

\begin{table}
\centering
\caption{IPW}
\begin{tabular}{lllllllllll}
\toprule
Setting & Method & \multicolumn{3}{|c|}{m = Logit} & \multicolumn{3}{|c|}{m = Random Forest} & \multicolumn{3}{|c|}{m = LGBM} \\
 &  & MAE & RMSE & Std. dev. & MAE & RMSE & Std. dev. & MAE & RMSE & Std. dev. \\
\midrule
1 & Alg-1-Clipped & 0.08 & 0.11 & 0.11 & 0.18 & 0.19 & 0.06 & 0.94 & 0.98 & 0.31 \\
1 & Alg-1-Uncalib & 0.10 & 0.16 & 0.16 & 1.09e+06 & 1.09e+07 & 1.09e+07 & 1.57 & 1.77 & 0.83 \\
1 & Alg-2-nested-cf-IVAP & 0.10 & 0.12 & 0.09 & 0.14 & 0.15 & 0.07 & 0.17 & 0.19 & 0.08 \\
1 & Alg-2-nested-cf-Iso & 0.20 & 0.25 & 0.19 & 0.13 & 0.17 & 0.16 & 0.13 & 0.16 & 0.16 \\
1 & Alg-2-nested-cf-Platt & 0.10 & 0.17 & 0.17 & 0.14 & 0.15 & 0.06 & 0.22 & 0.23 & 0.06 \\
1 & Alg-3-cf-IVAP & 0.11 & 0.13 & 0.07 & 0.14 & 0.16 & 0.07 & 0.17 & 0.18 & 0.07 \\
1 & Alg-3-cf-Iso & 0.09 & 0.11 & 0.07 & 0.13 & 0.15 & 0.06 & 0.15 & 0.17 & 0.06 \\
1 & Alg-3-cf-Platt & 0.13 & 0.14 & 0.06 & 0.35 & 0.36 & 0.05 & 0.31 & 0.31 & 0.05 \