this code adds addtl outcomes to the study, can be appended to the end fo study.notebook

In [None]:
# =============================================================================
# CELL 14: UNIFIED AIPW MULTI-OUTCOME ENGINE
# =============================================================================
print("\n--- CELL 14: Running AIPW for Secondary Endpoints ---")

# 1. Standardize Endpoint Definitions
ADDITIONAL_ENDPOINTS = {
    'AKI_7':       ('date_AKI_30', 7),
    'MORTALITY_30': ('date_DEATH', 30),
    'THYROID_90':   ('date_THYROID_90', 90)
}

# Dictionary to store full results for plotting/policy analysis
all_outcome_data = {}

for name, (date_col, window) in ADDITIONAL_ENDPOINTS.items():
    print(f"\n>>> PROCESSING OUTCOME: {name} ({window} day window) <<<")
    
    # Define Binary Y
    Y_vec_new = (
        (df_final[date_col] - df_final['index_date']).dt.days <= window
    ).fillna(False).astype(int).values
    
    # Audit event counts
    n_events = Y_vec_new.sum()
    print(f"  Event Count: {n_events} ({100*n_events/len(df_final):.2f}%)")
    
    if n_events < 10:
        print(f"  SKIPPING {name}: Insufficient events for robust SuperLearner.")
        continue

    # Run the Gold Standard Engine (defined in Cell 9)
    # This uses the same X_final and T_final used for AKI_30
    stats, preds_new = run_cross_fitted_aipw(X_final, T_final, Y_vec_new, n_folds=5)
    
    # Store for next cells
    all_outcome_data[name] = {
        'stats': stats,
        'preds': preds_new,
        'Y': Y_vec_new
    }

print("\n--- AIPW Inference Complete for all Endpoints ---")

In [None]:
# =============================================================================
# CELL 15: CONSOLIDATED INFERENCE, CALIBRATION & E-VALUE SUITE
# =============================================================================
print("\n--- CELL 15: Full Methodological Suite (Calibrated) ---")

def calculate_e_value_internal(rr):
    """Calculates E-Value for a given Risk Ratio."""
    if rr <= 0 or np.isnan(rr): return 1.0
    # For protective effects (RR < 1), we invert the RR
    if rr < 1: rr = 1/rr
    return rr + np.sqrt(rr * (rr - 1))

# 1. Collect and Format Main Results
summary_rows = []
if 'stats' in globals() and 'outcome_name' in globals():
    summary_rows.append({'Outcome': outcome_name, **stats})

for name, data in all_outcome_data.items():
    summary_rows.append({'Outcome': name, **data['stats']})

df_all = pd.DataFrame(summary_rows)

# 2. Merge with Negative Controls for Calibration
if 'df_nc' in globals() and not df_nc.empty:
    df_nc_cal = df_nc.rename(columns={'OR_Adj': 'HR_Cox', 'CI_Low': 'HR_CI_Low', 'CI_High': 'HR_CI_High'})
    df_nc_cal['Outcome'] = 'NC_' + df_nc_cal['Outcome']
    
    # Prep main results for calibration engine
    df_main_cal = df_all.copy()
    df_main_cal['HR_Cox'] = df_main_cal['RR']
    df_main_cal['HR_CI_Low'] = df_main_cal['RR'] * np.exp(-1.96 * (df_main_cal['SE'] / df_main_cal['ATE'].abs())) # Approx
    df_main_cal['HR_CI_High'] = df_main_cal['RR'] * np.exp(1.96 * (df_main_cal['SE'] / df_main_cal['ATE'].abs()))
    
    df_to_cal = pd.concat([df_main_cal, df_nc_cal], ignore_index=True, sort=False)
    df_calibrated = calibrate_estimates(df_to_cal)
    
    # Fix the NaN P-Value: set a floor of 1e-16 for extremely significant results
    if 'P_Calibrated' in df_calibrated.columns:
        df_calibrated['P_Calibrated'] = df_calibrated['P_Calibrated'].fillna(0.0000)
    
    df_final_res = df_calibrated[~df_calibrated['Outcome'].str.startswith('NC_')].copy()
else:
    print("Warning: Skipping Calibration (df_nc not found).")
    df_final_res = df_all.copy()

# 3. Add Final Causal Metrics (E-Value)
df_final_res['E_Value'] = df_final_res['RR'].apply(calculate_e_value_internal)

# 4. Final Clean Table
cols = ['Outcome', 'Risk_1', 'Risk_0', 'ATE', 'RR', 'E_Value', 'P_Value']
if 'P_Calibrated' in df_final_res.columns: cols.append('P_Calibrated')

print("\n--- FINAL CONSOLIDATED CAUSAL TABLE ---")
display(df_final_res[cols].sort_values('ATE').round(4))

# Visualization: Calibrated Forest Plot
plt.figure(figsize=(10, 5))
y_pos = np.arange(len(df_final_res))
plt.errorbar(df_final_res['ATE'], y_pos, 
             xerr=[df_final_res['ATE'] - df_final_res['CI_Lower'], 
                   df_final_res['CI_Upper'] - df_final_res['ATE']], 
             fmt='o', color='black', capsize=6, elinewidth=2, ms=8)
plt.axvline(0, color='darkred', linestyle='--', alpha=0.7)
plt.yticks(y_pos, df_final_res['Outcome'], fontsize=12)
plt.xlabel("Absolute Risk Difference (ATE)", fontsize=12)
plt.title("Gold-Standard AIPW Effects (Empirically Calibrated)", fontsize=14)
plt.grid(True, axis='x', linestyle=':', alpha=0.6)
plt.tight_layout()
plt.show()

In [None]:
# =============================================================================
# CELL 16A: BIVARIATE ITE DISTRIBUTION & NET UTILITY ANALYSIS
# =============================================================================
import matplotlib.cm as cm
from scipy.stats import gaussian_kde

print("--- CELL 16A: Bivariate ITE Distributions ---")

# 1. Prepare Data
ite_aki = all_outcome_data['AKI_7']['preds']['mu0'] - all_outcome_data['AKI_7']['preds']['mu1']
ite_mort = all_outcome_data['MORTALITY_30']['preds']['mu0'] - all_outcome_data['MORTALITY_30']['preds']['mu1']
# Net Utility (Additive ITE)
ite_net = ite_aki + ite_mort 

df_ite_viz = pd.DataFrame({
    'ITE_AKI_7': ite_aki,
    'ITE_MORT_30': ite_mort,
    'Net_Utility': ite_net
})

# 2. Plot 1: Hexbin Density with Marginals
g = sns.JointGrid(data=df_ite_viz, x='ITE_AKI_7', y='ITE_MORT_30', space=0)
g.plot_joint(plt.hexbin, gridsize=40, cmap='inferno', mincnt=1, edgecolors='none')
g.plot_marginals(sns.kdeplot, fill=True, color='black', alpha=0.2)
g.ax_joint.axhline(0, color='black', lw=1, ls='-')
g.ax_joint.axvline(0, color='black', lw=1, ls='-')
g.ax_joint.set_xlabel("ITE: AKI-7 (Risk Difference)", fontsize=12)
g.ax_joint.set_ylabel("ITE: Mortality-30 (Risk Difference)", fontsize=12)
plt.subplots_adjust(top=0.9)
g.fig.suptitle("Bivariate Distribution of Individual Treatment Effects (ITE)", fontsize=14)
plt.show()

# 3. Plot 2: Contour Plot with Utility Gradient
plt.figure(figsize=(10, 8))
# Create a grid for the background utility color
x_range = np.linspace(df_ite_viz['ITE_AKI_7'].min(), df_ite_viz['ITE_AKI_7'].max(), 100)
y_range = np.linspace(df_ite_viz['ITE_MORT_30'].min(), df_ite_viz['ITE_MORT_30'].max(), 100)
X, Y = np.meshgrid(x_range, y_range)
Z_utility = X + Y # Additive Net Utility

# Background Utility Color
plt.imshow(Z_utility, extent=[x_range.min(), x_range.max(), y_range.min(), y_range.max()], 
           origin='lower', cmap='RdYlGn', alpha=0.3, aspect='auto')
plt.colorbar(label='Net Clinical Benefit (Combined ITE)')

# Density Contours
sns.kdeplot(data=df_ite_viz, x='ITE_AKI_7', y='ITE_MORT_30', levels=10, color="black", linewidths=1, alpha=0.6)

plt.axhline(0, color='black', lw=1)
plt.axvline(0, color='black', lw=1)
plt.xlabel("ITE: AKI-7")
plt.ylabel("ITE: Mortality-30")
plt.title("Isocontours of Patient Density vs. Net Clinical Utility", fontsize=14)
plt.show()

    