## 1. Setup and Configuration

In [None]:
# Import required libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import warnings
warnings.filterwarnings('ignore')

# Import custom analysis modules
from cdv_utils.results_analysis import (
    prepare_dataframes_for_analysis,
    calculate_ate_mse_decomposition,
    calculate_ate_by_estimator,
    calculate_ite_mse_pehe,
    calculate_ite_mse_pehe_no_estimator,
    perform_ate_mse_statistical_tests,
    perform_cate_mse_statistical_tests,
    create_comprehensive_statistical_table,
    create_comprehensive_cate_statistical_table
)

from cdv_utils.visualization import (
    setup_plotting_style,
    plot_ate_bias_variance_tradeoff,
    plot_cate_bias_variance_tradeoff,
    plot_statistical_results_summary,
    create_results_visualization_dashboard
)

# Configuration
BEST_ESTIMATOR_NAME = 'Best Estimators Selection Per Seed'

# Setup plotting
setup_plotting_style()
pd.set_option('display.float_format', '{:.4f}'.format)
pd.set_option('display.max_columns', None)

print("‚úì Setup complete! Libraries imported successfully.")
print(f"‚úì Working directory: {os.getcwd()}")

## 2. Data Loading and Preparation

In [None]:
# Load the multi-seed experiment results
results_file = r"C:\Users\Guy\OneDrive\Shared Documents\Technion\Msc\process_mining_BP_CDM\thesis\code\realcause\results\multi_seed_experiment_results_w_validation_with_global_0_till_500.pkl"

print(f"Loading results from: {results_file}")
print("="*80)

try:
    with open(results_file, 'rb') as f:
        results_by_seed = pickle.load(f)
    
    print(f"‚úì Successfully loaded results for {len(results_by_seed)} seeds")
    seeds = sorted(results_by_seed.keys())
    print(f"Seeds: {seeds}")
    
    # Display structure for first seed
    first_seed = seeds[0]
    print(f"\nData structure for seed {first_seed}:")
    for key, df in results_by_seed[first_seed].items():
        if isinstance(df, pd.DataFrame):
            print(f"  {key}: {df.shape} - {type(df)}")
        
    # Check for required columns in the first dataframe
    sample_df = list(results_by_seed[first_seed].values())[0]
    print(f"\nSample columns: {list(sample_df.columns)}")
    
except Exception as e:
    print(f"‚ùå Error loading file: {e}")
    raise

In [None]:
# Prepare dataframes for analysis
dataframes = prepare_dataframes_for_analysis(results_by_seed, seeds)

# Extract individual dataframes
DF_BEST_GLOBAL = dataframes['DF_BEST_GLOBAL']
DF_BEST_VARIANT = dataframes['DF_BEST_VARIANT']
DF_ALL_GLOBAL = dataframes['DF_ALL_GLOBAL']
DF_ALL_VARIANT = dataframes['DF_ALL_VARIANT']
DF_ALL_VARIANT_TRUE = dataframes['DF_ALL_VARIANT_TRUE']

print("\n‚úì Dataframes prepared and ready for analysis")
print(f"üìä Summary:")
print(f"   - Seeds analyzed: {len(seeds)}")
print(f"   - Variants: {sorted(DF_BEST_GLOBAL['variant'].unique())}")
if 'estimator' in DF_ALL_GLOBAL.columns:
    print(f"   - Estimators: {sorted(DF_ALL_GLOBAL['estimator'].unique())}")

## 3. ATE (Average Treatment Effect) Analysis

This section analyzes the performance of estimating Average Treatment Effects (ATE) comparing:
- **Global Method**: Single model trained on all data
- **CPV Partition Method**: Variant-specific models

### 3.1 ATE MSE Decomposition

In [None]:
# Calculate ATE MSE decomposition for best estimators
ate_decomposition_df, ate_by_seed_df = calculate_ate_mse_decomposition(
    DF_BEST_GLOBAL, DF_BEST_VARIANT, seeds
)

print("\nüìà ATE MSE Decomposition Results:")
display(ate_decomposition_df[['method', 'bias', 'variance', 'bias_squared', 'mse', 'n_seeds']])

In [None]:
# Calculate ATE decomposition for all estimators
estimator_ate_summary_df, estimator_ate_by_seed_df = calculate_ate_by_estimator(
    DF_ALL_GLOBAL, DF_ALL_VARIANT, seeds
)

print("\nüìä ATE Analysis Complete - All Estimators")

In [None]:
# Create comprehensive ATE summary
ate_decomposition_df['estimator'] = BEST_ESTIMATOR_NAME
ate_decomposition_df['bias_squared'] = ate_decomposition_df['bias'] ** 2
estimator_ate_summary_df['bias_squared'] = estimator_ate_summary_df['bias'] ** 2

ate_summary_df = pd.concat([
    ate_decomposition_df[['estimator', 'method', 'bias_squared', 'variance', 'mse']],
    estimator_ate_summary_df[['estimator', 'method', 'bias_squared', 'variance', 'mse']]
], ignore_index=True)

ate_summary_df['calculated_mse'] = ate_summary_df['bias_squared'] + ate_summary_df['variance']

print("üìã ATE Summary DataFrame:")
display(ate_summary_df)

### 3.2 ATE Bias-Variance Visualization

In [None]:
# Create ATE bias-variance tradeoff plot
plot_ate_bias_variance_tradeoff(
    ate_summary_df, 
    best_estimator_name=BEST_ESTIMATOR_NAME,
    title_prefix="ATE",
    save_path="ate_bias_variance_tradeoff.png"
)

### 3.3 ATE Statistical Significance Testing

In [None]:
# Perform statistical significance tests for ATE
ate_statistical_results_df = perform_ate_mse_statistical_tests(
    estimator_ate_by_seed_df, 
    estimator_ate_summary_df,
    df_best_global=DF_BEST_GLOBAL,
    df_best_variant=DF_BEST_VARIANT,
    best_estimator_name=BEST_ESTIMATOR_NAME
)

In [None]:
# Create comprehensive ATE statistical results table
comprehensive_ate_statistical_table = create_comprehensive_statistical_table(
    ate_statistical_results_df
)

print("üìä Comprehensive ATE Statistical Results:")
display(comprehensive_ate_statistical_table)



1. **Statistical Testing**: One-sided t-tests were performed to test the hypothesis that CPV partition methods achieve lower MSE than global methods.

2. **Bias-Variance Decomposition**: The analysis decomposes prediction errors into bias¬≤ and variance components, providing insights into the sources of improvement.

3. **Effect Sizes**: Cohen's d effect sizes quantify the practical significance of improvements beyond statistical significance.

4. **Confidence Intervals**: 95% confidence intervals provide uncertainty estimates for MSE values.

### Interpretation Guide

- **P-value < 0.05**: Statistically significant improvement (marked with *)
- **P-value < 0.01**: Highly significant improvement (marked with **)
- **P-value < 0.001**: Very highly significant improvement (marked with ***)
- **Effect Size ‚â• 0.2**: Small practical effect
- **Effect Size ‚â• 0.5**: Medium practical effect  
- **Effect Size ‚â• 0.8**: Large practical effect

In [None]:
# Save ATE results
comprehensive_ate_statistical_table.to_csv("comprehensive_ate_statistical_table.csv")
print("‚úì ATE statistical results saved to 'comprehensive_ate_statistical_table.csv'")

## 4. CATE/ITE (Individual Treatment Effect) Analysis

This section analyzes the performance of estimating Individual Treatment Effects (ITE), also known as 
Conditional Average Treatment Effects (CATE). This is measured using PEHE (Precision in Estimation 
of Heterogeneous Effect).

### 4.1 CATE MSE and PEHE Calculation

In [None]:
# Calculate ITE MSE and PEHE for all estimators
ite_summary_df, ite_decomposition_df = calculate_ite_mse_pehe(
    DF_ALL_GLOBAL, DF_ALL_VARIANT
)

print("üìà ITE/CATE Analysis Complete - All Estimators")

In [None]:
# Calculate ITE for best estimators (no estimator column)
ite_summary_best, ite_decomp_best = calculate_ite_mse_pehe_no_estimator(
    DF_BEST_GLOBAL, DF_BEST_VARIANT, BEST_ESTIMATOR_NAME
)

print("üìà ITE/CATE Analysis Complete - Best Estimators")

In [None]:
# Create comprehensive CATE summary
wanted_cols = ['method', 'estimator', 'bias_squared', 'variance', 'mse_empirical']
cate_summary_df = pd.concat([
    ite_decomposition_df[wanted_cols].rename(columns={'mse_empirical': 'mse'}),
    ite_decomp_best[wanted_cols].rename(columns={'mse_empirical': 'mse'})
], ignore_index=True)

print("üìã CATE Summary DataFrame:")
display(cate_summary_df)

### 4.2 CATE Bias-Variance Visualization

In [None]:
# Create CATE bias-variance tradeoff plot
plot_cate_bias_variance_tradeoff(
    cate_summary_df,
    best_estimator_name=BEST_ESTIMATOR_NAME,
    title_prefix="CATE",
    save_path="cate_bias_variance_tradeoff.png"
)

### 4.3 CATE Statistical Significance Testing

In [None]:
# Perform statistical significance tests for CATE
cate_statistical_results_df = perform_cate_mse_statistical_tests(
    DF_ALL_GLOBAL,
    DF_ALL_VARIANT,
    df_best_global=DF_BEST_GLOBAL,
    df_best_variant=DF_BEST_VARIANT,
    best_estimator_name=BEST_ESTIMATOR_NAME
)

In [None]:
# Create comprehensive CATE statistical results table
comprehensive_cate_statistical_table = create_comprehensive_cate_statistical_table(
    cate_statistical_results_df
)

print("üìä Comprehensive CATE Statistical Results:")
display(comprehensive_cate_statistical_table)



1. **Statistical Testing**: One-sided t-tests were performed to test the hypothesis that CPV partition methods achieve lower MSE than global methods.

2. **Bias-Variance Decomposition**: The analysis decomposes prediction errors into bias¬≤ and variance components, providing insights into the sources of improvement.

3. **Effect Sizes**: Cohen's d effect sizes quantify the practical significance of improvements beyond statistical significance.

4. **Confidence Intervals**: 95% confidence intervals provide uncertainty estimates for MSE values.

### Interpretation Guide

- **P-value < 0.05**: Statistically significant improvement (marked with *)
- **P-value < 0.01**: Highly significant improvement (marked with **)
- **P-value < 0.001**: Very highly significant improvement (marked with ***)
- **Effect Size ‚â• 0.2**: Small practical effect
- **Effect Size ‚â• 0.5**: Medium practical effect  
- **Effect Size ‚â• 0.8**: Large practical effect

In [None]:
# Save CATE results
comprehensive_cate_statistical_table.to_csv("comprehensive_cate_statistical_table.csv")
print("‚úì CATE statistical results saved to 'comprehensive_cate_statistical_table.csv'")

## 6. Summary and Key Findings

### Statistical Significance Summary

In [None]:
# Generate summary statistics
print("üéØ KEY FINDINGS SUMMARY")
print("="*80)

if not ate_statistical_results_df.empty:
    ate_significant = (ate_statistical_results_df['P_Value'] < 0.05).sum()
    ate_total = len(ate_statistical_results_df)
    ate_avg_improvement = ate_statistical_results_df['Improvement_Pct'].mean()
    ate_best_improvement = ate_statistical_results_df['Improvement_Pct'].max()
    
    print(f"\nüìä ATE (Average Treatment Effect) Results:")
    print(f"   ‚Ä¢ Total estimators tested: {ate_total}")
    print(f"   ‚Ä¢ Statistically significant improvements: {ate_significant}/{ate_total} ({ate_significant/ate_total*100:.1f}%)")
    print(f"   ‚Ä¢ Average MSE improvement: {ate_avg_improvement:.1f}%")
    print(f"   ‚Ä¢ Best MSE improvement: {ate_best_improvement:.1f}%")

if not cate_statistical_results_df.empty:
    cate_significant = (cate_statistical_results_df['P_Value'] < 0.05).sum()
    cate_total = len(cate_statistical_results_df)
    cate_avg_improvement = cate_statistical_results_df['Improvement_Pct'].mean()
    cate_best_improvement = cate_statistical_results_df['Improvement_Pct'].max()
    
    print(f"\nüìä CATE (Individual Treatment Effect) Results:")
    print(f"   ‚Ä¢ Total estimators tested: {cate_total}")
    print(f"   ‚Ä¢ Statistically significant improvements: {cate_significant}/{cate_total} ({cate_significant/cate_total*100:.1f}%)")
    print(f"   ‚Ä¢ Average MSE improvement: {cate_avg_improvement:.1f}%")
    print(f"   ‚Ä¢ Best MSE improvement: {cate_best_improvement:.1f}%")
