# Causal-Informed Model Training

This notebook demonstrates how to use causal effects directly in model training, comparing:
1. Traditional Ridge/GB training (baseline)
2. Causal-informed Ridge training (using causal effects as priors)
3. Causal-informed GB training (using causal effects as constraints)

**Goal:** See if incorporating causal knowledge improves model performance and interpretability.

In [None]:
import pandas as pd
import numpy as np
import json
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Libraries imported successfully!")


‚úÖ Libraries imported successfully!


In [None]:
df = pd.read_csv('cleaned_insurance_data.csv')

try:
    with open('causal_effects.json', 'r') as f:
        causal_data = json.load(f)
    if 'causal_effects' in causal_data:
        causal_effects = causal_data['causal_effects']
    else:
        causal_effects = causal_data
    print("‚úÖ Causal effects loaded from file")
except FileNotFoundError:
    causal_effects = {
        'age': 257.41,
        'sex': -131.31, 
        'bmi': 332.04,
        'children': 478.44,
        'smoker': 23808.21,
        'region': -352.96
    }
    print("‚ö†Ô∏è Using predefined causal effects")

X = df[['age', 'sex', 'bmi', 'children', 'smoker', 'region']]
y = df['charges']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"\nüìä Data Summary:")
print(f"   ‚Ä¢ Training samples: {len(X_train)}")
print(f"   ‚Ä¢ Test samples: {len(X_test)}")
print(f"   ‚Ä¢ Features: {list(X.columns)}")

print(f"\nüîç Causal Effects:")
for feature, effect in causal_effects.items():
    print(f"   ‚Ä¢ {feature}: ${effect:.2f}")


‚úÖ Causal effects loaded from file

üìä Data Summary:
   ‚Ä¢ Training samples: 1002
   ‚Ä¢ Test samples: 251
   ‚Ä¢ Features: ['age', 'sex', 'bmi', 'children', 'smoker', 'region']

üîç Causal Effects:
   ‚Ä¢ smoker: $23808.21
   ‚Ä¢ age: $257.41
   ‚Ä¢ bmi: $332.04
   ‚Ä¢ children: $478.44
   ‚Ä¢ sex: $-131.31
   ‚Ä¢ region: $-352.96


## Method 1: Traditional Model Training (Baseline)

In [None]:
print("="*60)
print("üîß METHOD 1: TRADITIONAL MODEL TRAINING")
print("="*60)

traditional_ridge = Ridge(alpha=100, random_state=42)
traditional_ridge.fit(X_train, y_train)
trad_ridge_pred = traditional_ridge.predict(X_test)
trad_ridge_r2 = r2_score(y_test, trad_ridge_pred)
trad_ridge_mae = mean_absolute_error(y_test, trad_ridge_pred)

traditional_gb = GradientBoostingRegressor(
    n_estimators=200, max_depth=6, learning_rate=0.1, random_state=42
)
traditional_gb.fit(X_train, y_train)
trad_gb_pred = traditional_gb.predict(X_test)
trad_gb_r2 = r2_score(y_test, trad_gb_pred)
trad_gb_mae = mean_absolute_error(y_test, trad_gb_pred)

print(f"‚úÖ Traditional Ridge Results:")
print(f"   ‚Ä¢ R¬≤ Score: {trad_ridge_r2:.4f}")
print(f"   ‚Ä¢ MAE: ${trad_ridge_mae:.0f}")

print(f"\n‚úÖ Traditional GB Results:")
print(f"   ‚Ä¢ R¬≤ Score: {trad_gb_r2:.4f}")
print(f"   ‚Ä¢ MAE: ${trad_gb_mae:.0f}")

print(f"\nüìä Traditional Model Coefficients/Importances:")
print(f"{'Feature':<10} {'Ridge Coeff':<12} {'GB Importance':<14} {'Causal Effect'}")
print("-" * 55)
for i, feature in enumerate(X.columns):
    ridge_coeff = traditional_ridge.coef_[i]
    gb_importance = traditional_gb.feature_importances_[i]
    causal_effect = causal_effects.get(feature, 0)
    print(f"{feature:<10} {ridge_coeff:<12.2f} {gb_importance:<14.4f} ${causal_effect:<11.0f}")


üîß METHOD 1: TRADITIONAL MODEL TRAINING
‚úÖ Traditional Ridge Results:
   ‚Ä¢ R¬≤ Score: 0.7230
   ‚Ä¢ MAE: $4098

‚úÖ Traditional GB Results:
   ‚Ä¢ R¬≤ Score: 0.9307
   ‚Ä¢ MAE: $1252

üìä Traditional Model Coefficients/Importances:
Feature    Ridge Coeff  GB Importance  Causal Effect
-------------------------------------------------------
age        229.31       0.1089         $257        
sex        96.21        0.0020         $-131       
bmi        299.51       0.1638         $332        
children   417.66       0.0069         $478        
smoker     14675.35     0.7143         $23808      
region     -303.08      0.0040         $-353       


## Method 2: Causal-Informed Ridge Training (Strong Constraints)

In [None]:
print("\n" + "="*60)
print("üß† METHOD 2: CAUSAL-INFORMED RIDGE TRAINING")
print("="*60)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X.columns)
X_test_scaled_df = pd.DataFrame(X_test_scaled, columns=X.columns)

class CausalInformedRidge:
    def __init__(self, causal_effects, alpha=100, causal_weight=0.7):
        self.causal_effects = causal_effects
        self.alpha = alpha
        self.causal_weight = causal_weight
        self.base_model = Ridge(alpha=alpha, random_state=42)
        
    def fit(self, X, y):
        self.base_model.fit(X, y)
        
        causal_vector = np.array([self.causal_effects.get(col, 0) for col in X.columns])
        
        if np.linalg.norm(causal_vector) > 0:
            causal_vector = causal_vector / np.linalg.norm(causal_vector) * np.linalg.norm(self.base_model.coef_)
        
        self.coef_ = (1 - self.causal_weight) * self.base_model.coef_ + self.causal_weight * causal_vector
        self.intercept_ = self.base_model.intercept_
        
        return self
    
    def predict(self, X):
        return X @ self.coef_ + self.intercept_

causal_ridge = CausalInformedRidge(causal_effects, alpha=100, causal_weight=0.7)
causal_ridge.fit(X_train_scaled_df, y_train)
causal_ridge_pred = causal_ridge.predict(X_test_scaled_df)
causal_ridge_r2 = r2_score(y_test, causal_ridge_pred)
causal_ridge_mae = mean_absolute_error(y_test, causal_ridge_pred)

print(f"‚úÖ Causal-Informed Ridge Results:")
print(f"   ‚Ä¢ R¬≤ Score: {causal_ridge_r2:.4f}")
print(f"   ‚Ä¢ MAE: ${causal_ridge_mae:.0f}")
print(f"   ‚Ä¢ Causal Weight: 70% (strong causal constraints)")

causal_vector = [causal_effects.get(col, 0) for col in X.columns]
causal_normalized = [c/max([abs(x) for x in causal_vector]) if max([abs(x) for x in causal_vector]) > 0 else 0 for c in causal_vector]
coeff_normalized = [c/max([abs(x) for x in causal_ridge.coef_]) if max([abs(x) for x in causal_ridge.coef_]) > 0 else 0 for c in causal_ridge.coef_]
ridge_alignment = np.corrcoef(coeff_normalized, causal_normalized)[0,1] if len(set(coeff_normalized)) > 1 and len(set(causal_normalized)) > 1 else 0

print(f"   ‚Ä¢ Coefficient-Causal Alignment: {ridge_alignment:.3f}")

print(f"\nüìä Causal-Informed Ridge Coefficients:")
for i, feature in enumerate(X.columns):
    coeff = causal_ridge.coef_[i]
    causal_effect = causal_effects.get(feature, 0)
    print(f"   ‚Ä¢ {feature}: {coeff:.2f} (causal: ${causal_effect:.0f})")



üß† METHOD 2: CAUSAL-INFORMED RIDGE TRAINING
‚úÖ Causal-Informed Ridge Results:
   ‚Ä¢ R¬≤ Score: 0.7609
   ‚Ä¢ MAE: $4298
   ‚Ä¢ Causal Weight: 70% (strong causal constraints)
   ‚Ä¢ Coefficient-Causal Alignment: 0.995

üìä Causal-Informed Ridge Coefficients:
   ‚Ä¢ age: 960.70 (causal: $257)
   ‚Ä¢ sex: -53.93 (causal: $-131)
   ‚Ä¢ bmi: 594.51 (causal: $332)
   ‚Ä¢ children: 282.45 (causal: $478)
   ‚Ä¢ smoker: 9134.68 (causal: $23808)
   ‚Ä¢ region: -171.51 (causal: $-353)


## Method 3: Causal-Informed Gradient Boosting (Strong Constraints)

In [None]:
print("\n" + "="*60)
print("üå≥ METHOD 3: CAUSAL-INFORMED GRADIENT BOOSTING")
print("="*60)

class CausalInformedGB:
    def __init__(self, causal_effects, n_estimators=200, max_depth=6, learning_rate=0.1, causal_weight=0.9):
        self.causal_effects = causal_effects
        self.causal_weight = causal_weight
        self.base_model = GradientBoostingRegressor(
            n_estimators=n_estimators, max_depth=max_depth, 
            learning_rate=learning_rate, random_state=42
        )
        
    def _create_causal_features(self, X):
        X_causal = X.copy()
        
        causal_magnitudes = [abs(self.causal_effects.get(col, 0)) for col in X.columns]
        max_causal = max(causal_magnitudes)
        
        for i, col in enumerate(X.columns):
            causal_effect = abs(self.causal_effects.get(col, 0))
            
            if max_causal > 0:
                scale_factor = 0.1 + 1.9 * (causal_effect / max_causal)
                X_causal[col] = X[col] * scale_factor
                
            if causal_effect > max_causal * 0.5:
                X_causal[f'{col}_causal_enhanced'] = X[col] * (causal_effect / max_causal) * 2
        
        return X_causal
    
    def fit(self, X, y):
        X_causal = self._create_causal_features(X)
        
        self.base_model.fit(X_causal, y)
        self.feature_names_ = X_causal.columns
        self.original_features = X.columns
        
        return self
    
    def predict(self, X):
        X_causal = self._create_causal_features(X)
        return self.base_model.predict(X_causal)
    
    @property
    def feature_importances_(self):
        return self.base_model.feature_importances_
    
    def get_original_feature_importances(self):
        original_importances = []
        for col in self.original_features:
            col_idx = list(self.feature_names_).index(col)
            importance = self.base_model.feature_importances_[col_idx]
            
            enhanced_col = f'{col}_causal_enhanced'
            if enhanced_col in self.feature_names_:
                enhanced_idx = list(self.feature_names_).index(enhanced_col)
                importance += self.base_model.feature_importances_[enhanced_idx]
            
            original_importances.append(importance)
        
        return np.array(original_importances)

causal_gb = CausalInformedGB(causal_effects, causal_weight=1.0)
causal_gb.fit(X_train, y_train)
causal_gb_pred = causal_gb.predict(X_test)
causal_gb_r2 = r2_score(y_test, causal_gb_pred)
causal_gb_mae = mean_absolute_error(y_test, causal_gb_pred)

print(f"‚úÖ Causal-Informed GB Results:")
print(f"   ‚Ä¢ R¬≤ Score: {causal_gb_r2:.4f}")
print(f"   ‚Ä¢ MAE: ${causal_gb_mae:.0f}")
print(f"   ‚Ä¢ Method: Strong causal scaling + enhanced features")

causal_magnitudes = [abs(causal_effects[f]) for f in X.columns]
causal_normalized = np.array(causal_magnitudes) / max(causal_magnitudes)
gb_importances = causal_gb.get_original_feature_importances()
gb_normalized = gb_importances / max(gb_importances)
gb_alignment = np.corrcoef(causal_normalized, gb_normalized)[0,1]

print(f"   ‚Ä¢ Feature Importance Alignment: {gb_alignment:.3f}")

print(f"\nüìä Enhanced Feature Set:")
print(f"   ‚Ä¢ Original features: {len(X.columns)}")
print(f"   ‚Ä¢ Total features (with causal): {len(causal_gb.feature_names_)}")
print(f"   ‚Ä¢ Strong causal scaling applied to all features")
print(f"   ‚Ä¢ Enhanced features for top causal drivers")

import joblib

joblib.dump(causal_gb, "causal_informed_gb_model.joblib")
print("‚úÖ Causal-Informed GB model saved as 'causal_informed_gb_model.joblib'")



üå≥ METHOD 3: CAUSAL-INFORMED GRADIENT BOOSTING
‚úÖ Causal-Informed GB Results:
   ‚Ä¢ R¬≤ Score: 0.9311
   ‚Ä¢ MAE: $1250
   ‚Ä¢ Method: Strong causal scaling + enhanced features
   ‚Ä¢ Feature Importance Alignment: 0.970

üìä Enhanced Feature Set:
   ‚Ä¢ Original features: 6
   ‚Ä¢ Total features (with causal): 7
   ‚Ä¢ Strong causal scaling applied to all features
   ‚Ä¢ Enhanced features for top causal drivers
‚úÖ Causal-Informed GB model saved as 'causal_informed_gb_model.joblib'


## Method 4: Causal Effects as Direct Predictions (Upper Bound)

In [None]:
print("\n" + "="*60)
print("üéØ METHOD 4: PURE CAUSAL PREDICTION (UPPER BOUND)")
print("="*60)

def causal_predict(X, causal_effects, baseline_cost=5000):
    predictions = np.full(len(X), baseline_cost)
    
    for i, feature in enumerate(X.columns):
        causal_effect = causal_effects.get(feature, 0)
        
        if feature == 'smoker':
            predictions += X[feature] * causal_effect
        elif feature in ['sex', 'region']:
            predictions += X[feature] * causal_effect
        else:
            if feature == 'age':
                predictions += (X[feature] - 30) * causal_effect
            elif feature == 'bmi':
                predictions += (X[feature] - 25) * causal_effect
            elif feature == 'children':
                predictions += X[feature] * causal_effect
    
    return predictions

pure_causal_pred = causal_predict(X_test, causal_effects)
pure_causal_r2 = r2_score(y_test, pure_causal_pred)
pure_causal_mae = mean_absolute_error(y_test, pure_causal_pred)

print(f"‚úÖ Pure Causal Prediction Results:")
print(f"   ‚Ä¢ R¬≤ Score: {pure_causal_r2:.4f}")
print(f"   ‚Ä¢ MAE: ${pure_causal_mae:.0f}")
print(f"   ‚Ä¢ Method: Linear combination of causal effects only")
print(f"   ‚Ä¢ Note: This represents the theoretical upper bound if")
print(f"           causal effects captured all relationships perfectly")



üéØ METHOD 4: PURE CAUSAL PREDICTION (UPPER BOUND)
‚úÖ Pure Causal Prediction Results:
   ‚Ä¢ R¬≤ Score: 0.8119
   ‚Ä¢ MAE: $3928
   ‚Ä¢ Method: Linear combination of causal effects only
   ‚Ä¢ Note: This represents the theoretical upper bound if
           causal effects captured all relationships perfectly


## Results Comparison & Analysis

In [None]:
print("\n" + "="*80)
print("üìä COMPREHENSIVE RESULTS COMPARISON")
print("="*80)

results = {
    'Method': [
        'Traditional Ridge',
        'Traditional GB', 
        'Causal-Informed Ridge',
        'Causal-Informed GB',
        'Pure Causal Effects'
    ],
    'R¬≤ Score': [
        trad_ridge_r2,
        trad_gb_r2,
        causal_ridge_r2,
        causal_gb_r2,
        pure_causal_r2
    ],
    'MAE': [
        trad_ridge_mae,
        trad_gb_mae,
        causal_ridge_mae,
        causal_gb_mae,
        pure_causal_mae
    ]
}

results_df = pd.DataFrame(results)
results_df['R¬≤ Rank'] = results_df['R¬≤ Score'].rank(ascending=False).astype(int)
results_df['MAE Rank'] = results_df['MAE'].rank(ascending=True).astype(int)

print(f"{'Method':<25} {'R¬≤ Score':<10} {'MAE':<10} {'R¬≤ Rank':<8} {'MAE Rank'}")
print("-" * 70)
for _, row in results_df.iterrows():
    print(f"{row['Method']:<25} {row['R¬≤ Score']:<10.4f} ${row['MAE']:<9.0f} {row['R¬≤ Rank']:<8} {row['MAE Rank']}")

best_r2_idx = results_df['R¬≤ Score'].idxmax()
best_mae_idx = results_df['MAE'].idxmin()
best_r2_method = results_df.loc[best_r2_idx, 'Method']
best_mae_method = results_df.loc[best_mae_idx, 'Method']

print(f"\nüèÜ WINNERS:")
print(f"   ‚Ä¢ Best R¬≤ Score: {best_r2_method} ({results_df.loc[best_r2_idx, 'R¬≤ Score']:.4f})")
print(f"   ‚Ä¢ Best MAE: {best_mae_method} (${results_df.loc[best_mae_idx, 'MAE']:.0f})")

print(f"\nüìà CAUSAL-INFORMED vs TRADITIONAL:")

ridge_r2_improvement = (causal_ridge_r2 - trad_ridge_r2) / trad_ridge_r2 * 100
ridge_mae_improvement = (trad_ridge_mae - causal_ridge_mae) / trad_ridge_mae * 100
print(f"   Ridge R¬≤ Change: {ridge_r2_improvement:+.1f}%")
print(f"   Ridge MAE Change: {ridge_mae_improvement:+.1f}%")

gb_r2_improvement = (causal_gb_r2 - trad_gb_r2) / trad_gb_r2 * 100
gb_mae_improvement = (trad_gb_mae - causal_gb_mae) / trad_gb_mae * 100
print(f"   GB R¬≤ Change: {gb_r2_improvement:+.1f}%")
print(f"   GB MAE Change: {gb_mae_improvement:+.1f}%")



üìä COMPREHENSIVE RESULTS COMPARISON
Method                    R¬≤ Score   MAE        R¬≤ Rank  MAE Rank
----------------------------------------------------------------------
Traditional Ridge         0.7230     $4098      5        4
Traditional GB            0.9307     $1252      2        2
Causal-Informed Ridge     0.7609     $4298      4        5
Causal-Informed GB        0.9311     $1250      1        1
Pure Causal Effects       0.8119     $3928      3        3

üèÜ WINNERS:
   ‚Ä¢ Best R¬≤ Score: Causal-Informed GB (0.9311)
   ‚Ä¢ Best MAE: Causal-Informed GB ($1250)

üìà CAUSAL-INFORMED vs TRADITIONAL:
   Ridge R¬≤ Change: +5.2%
   Ridge MAE Change: -4.9%
   GB R¬≤ Change: +0.0%
   GB MAE Change: +0.1%


In [None]:
print("\n" + "="*80)
print("üîÆ SAMPLE PREDICTIONS COMPARISON")
print("="*80)

print(f"{'Actual':<8} {'Trad Ridge':<12} {'Causal Ridge':<13} {'Trad GB':<10} {'Causal GB':<12} {'Pure Causal'}")
print("-" * 75)

for i in range(min(8, len(y_test))):
    actual = y_test.iloc[i]
    trad_r = trad_ridge_pred[i]
    causal_r = causal_ridge_pred.iloc[i] if hasattr(causal_ridge_pred, 'iloc') else causal_ridge_pred[i]
    trad_g = trad_gb_pred[i]
    causal_g = causal_gb_pred[i]
    pure_c = pure_causal_pred.iloc[i] if hasattr(pure_causal_pred, 'iloc') else pure_causal_pred[i]
    
    print(f"${actual:<7.0f} ${trad_r:<11.0f} ${causal_r:<12.0f} ${trad_g:<9.0f} ${causal_g:<11.0f} ${pure_c:<10.0f}")

print(f"\nüéâ EXPERIMENT COMPLETE!")
print(f"   You now have evidence of whether causal-informed")
print(f"   modeling improves performance for your specific dataset.")



üîÆ SAMPLE PREDICTIONS COMPARISON
Actual   Trad Ridge   Causal Ridge  Trad GB    Causal GB    Pure Causal
---------------------------------------------------------------------------
$14475   $15863       $9540         $14119     $14118       $16653     
$38345   $22147       $29742        $40321     $40320       $30781     
$42761   $27611       $31478        $42881     $42881       $36954     
$6986    $7696        $7169         $7035      $7029        $7343      
$39241   $23960       $30270        $38878     $38878       $32825     
$18328   $19356       $28883        $22268     $21707       $27938     
$2639    $4797        $6073         $2648      $2648        $4033      
$11674   $14134       $8964         $11591     $11591       $14698     

üéâ EXPERIMENT COMPLETE!
   You now have evidence of whether causal-informed
   modeling improves performance for your specific dataset.


In [None]:
print("\n" + "="*70)
print("üìä INTERPRETABILITY ANALYSIS")
print("="*70)

print("\n=== Ridge Coefficient Comparison ===")
print(f"{'Feature':<10} {'Traditional':>12} {'Causal-Informed':>18} {'Causal Effect':>15}")
print("-" * 60)
for i, feature in enumerate(X.columns):
    trad = traditional_ridge.coef_[i]
    causal = causal_ridge.coef_[i]
    effect = causal_effects.get(feature, 0)
    print(f"{feature:<10} {trad:>12.2f} {causal:>18.2f} {effect:>15.2f}")

print(f"\n   üéØ Ridge Coefficient-Causal Alignment: {ridge_alignment:.3f}")

print("\n=== GB Feature Importance Comparison ===")
print(f"{'Feature':<10} {'Traditional':>12} {'Causal-Informed':>18} {'Causal Effect':>15}")
print("-" * 60)
for i, feature in enumerate(X.columns):
    trad = traditional_gb.feature_importances_[i]
    causal = causal_gb.get_original_feature_importances()[i]
    effect = abs(causal_effects.get(feature, 0))
    print(f"{feature:<10} {trad:>12.4f} {causal:>18.4f} {effect:>15.2f}")

print(f"\n   üéØ GB Feature Importance-Causal Alignment: {gb_alignment:.3f}")

print(f"\nüí° INTERPRETABILITY INSIGHTS:")
if ridge_alignment > 0:
    print(f"   ‚úÖ Ridge: Positive alignment achieved - coefficients match causal effects")
else:
    print(f"   ‚ö†Ô∏è  Ridge: Negative alignment - coefficients oppose causal ranking")

if gb_alignment > 0:
    print(f"   ‚úÖ GB: Positive alignment achieved - importances match causal effects")
else:
    print(f"   ‚ö†Ô∏è  GB: Negative alignment - importances oppose causal ranking")



üìä INTERPRETABILITY ANALYSIS

=== Ridge Coefficient Comparison ===
Feature     Traditional    Causal-Informed   Causal Effect
------------------------------------------------------------
age              229.31             960.70          257.41
sex               96.21             -53.93         -131.31
bmi              299.51             594.51          332.04
children         417.66             282.45          478.44
smoker         14675.35            9134.68        23808.21
region          -303.08            -171.51         -352.96

   üéØ Ridge Coefficient-Causal Alignment: 0.995

=== GB Feature Importance Comparison ===
Feature     Traditional    Causal-Informed   Causal Effect
------------------------------------------------------------
age              0.1089             0.1092          257.41
sex              0.0020             0.0021          131.31
bmi              0.1638             0.1638          332.04
children         0.0069             0.0068          478.44
smoker 

## üìã Final Conclusions

**Comparing Traditional vs Causal-Informed Models**

In [None]:
print("\n" + "="*80)
print("üèÜ CAUSAL-INFORMED MODELING CONCLUSIONS")
print("="*80)

print(f"\nüìà PERFORMANCE COMPARISON:")
print(f"   Traditional Ridge - MAE: ${trad_ridge_mae:.0f}, R¬≤: {trad_ridge_r2:.3f}")
print(f"   Causal Ridge      - MAE: ${causal_ridge_mae:.0f}, R¬≤: {causal_ridge_r2:.3f}")
print(f"   Traditional GB    - MAE: ${trad_gb_mae:.0f}, R¬≤: {trad_gb_r2:.3f}")
print(f"   Causal GB         - MAE: ${causal_gb_mae:.0f}, R¬≤: {causal_gb_r2:.3f}")

print(f"\nüß† INTERPRETABILITY ALIGNMENT:")
print(f"   Ridge Coefficient-Causal Alignment:     {ridge_alignment:.3f}")
print(f"   GB Feature Importance-Causal Alignment: {gb_alignment:.3f}")

print(f"\nüéØ KEY INSIGHTS:")
print(f"   ‚Ä¢ Strong causal constraints (70% regularization) improve interpretability")
print(f"   ‚Ä¢ Trade-off exists between pure prediction accuracy and causal alignment")
print(f"   ‚Ä¢ Causal-informed models provide more business-interpretable feature rankings")

if ridge_alignment > 0 and gb_alignment > 0:
    print(f"\n‚úÖ SUCCESS: Both models achieve positive causal alignment!")
    print(f"   Models now rank features similarly to true causal effects")
elif ridge_alignment > 0:
    print(f"\nüéØ PARTIAL SUCCESS: Ridge achieves causal alignment")
elif gb_alignment > 0:
    print(f"\nüéØ PARTIAL SUCCESS: Gradient Boosting achieves causal alignment")
else:
    print(f"\n‚ö†Ô∏è  CONSTRAINT INSUFFICIENT: Consider stronger causal regularization")



üèÜ CAUSAL-INFORMED MODELING CONCLUSIONS

üìà PERFORMANCE COMPARISON:
   Traditional Ridge - MAE: $4098, R¬≤: 0.723
   Causal Ridge      - MAE: $4298, R¬≤: 0.761
   Traditional GB    - MAE: $1252, R¬≤: 0.931
   Causal GB         - MAE: $1250, R¬≤: 0.931

üß† INTERPRETABILITY ALIGNMENT:
   Ridge Coefficient-Causal Alignment:     0.995
   GB Feature Importance-Causal Alignment: 0.970

üéØ KEY INSIGHTS:
   ‚Ä¢ Strong causal constraints (70% regularization) improve interpretability
   ‚Ä¢ Trade-off exists between pure prediction accuracy and causal alignment
   ‚Ä¢ Causal-informed models provide more business-interpretable feature rankings

‚úÖ SUCCESS: Both models achieve positive causal alignment!
   Models now rank features similarly to true causal effects
