# 01. The Generosity-Abundance Link (Direct-Execution Edition)
## A Unified Causal Analysis

**Objective:** Mathematically prove the relationship between Generosity (Giving) and Community Flourishing.

**Note:** To avoid import errors, all Framework classes are defined directly in this notebook.

In [28]:
# ------------------------------------------------------------------------------
# 1. DEFINE ALL FRAMEWORKS DIRECTLY (Bypassing File System Issues)
# ------------------------------------------------------------------------------
import numpy as np
import pandas as pd
import xgboost as xgb
import torch
import torch.nn as nn
import networkx as nx
import logging
from dataclasses import dataclass
from typing import Dict, List, Tuple
from scipy.stats import pearsonr, ks_2samp, norm, qmc
from scipy.optimize import minimize
from sklearn.ensemble import RandomForestRegressor, IsolationForest
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, r2_score
from sklearn.model_selection import KFold
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, ConstantKernel as C
import warnings

warnings.filterwarnings('ignore')
torch.manual_seed(2025)
np.random.seed(2025)
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# --- FRAMEWORK 1: DISCOVERY ---
class UnifiedDiscoveryEngine:
    def __init__(self, df, target_col, date_col='Date_Index'):
        self.raw_df = df.copy()
        self.target = target_col
        self.known_features = [c for c in df.columns if c not in [target_col, date_col]]
        print(f"üöÄ DISCOVERY ENGINE ONLINE. Target: '{self.target}'")

    def scan_environment(self):
        print("\n>> [PHASE 1] SCANNING ENVIRONMENT...")
        X = self.raw_df[self.known_features]
        y = self.raw_df[self.target]
        self.model = xgb.XGBRegressor(n_estimators=100, max_depth=3, random_state=42)
        self.model.fit(X, y)
        print(f"   Baseline Model R¬≤: {self.model.score(X, y):.4f}")

    def add_feature(self, name, data):
        self.raw_df[name] = data
        if name not in self.known_features: self.known_features.append(name)
        print(f"   ‚ûï ADDED: '{name}'")

# --- FRAMEWORK 2: ATTRIBUTION (FIXED) ---
class UniversalAttributionValidator:
    def __init__(self, X_raw=None, y_raw=None):
        print(f"üöÄ ATTRIBUTION ENGINE ONLINE on {DEVICE}...")
        self.model = None
        if X_raw is not None and y_raw is not None:
            self.fit(X_raw, y_raw)
    
    def fit(self, X_raw, y_raw, feature_names=None):
        self.X_raw = X_raw
        self.y_raw = y_raw
        self.features = feature_names if feature_names else list(X_raw.columns)
        self.scaler = StandardScaler()
        self.X_tensor = torch.FloatTensor(self.scaler.fit_transform(self.X_raw)).to(DEVICE)
        y_values = self.y_raw.values
        if len(y_values.shape) == 1: y_values = y_values.reshape(-1, 1)
        self.y_tensor = torch.FloatTensor(y_values).to(DEVICE)
        self.n_feat = self.X_tensor.shape[1]
        self._train_robust_proxy()

    def _train_robust_proxy(self):
        self.model = nn.Sequential(
            nn.Linear(self.n_feat, 48), nn.ReLU(), nn.Dropout(0.1), 
            nn.Linear(48, 24), nn.ReLU(), nn.Linear(24, 1)
        ).to(DEVICE)
        optimizer = torch.optim.Adam(self.model.parameters(), lr=0.005)
        loss_fn = nn.MSELoss()
        for _ in range(300):
            optimizer.zero_grad()
            y_pred = self.model(self.X_tensor)
            loss = loss_fn(y_pred, self.y_tensor)
            loss.backward()
            optimizer.step()
        print(f"   Model Trained. Loss: {loss.item():.4f}")

    def compute_attribution(self, steps=50):
        baseline = torch.zeros_like(self.X_tensor)
        attributions = []
        alphas = torch.linspace(0, 1, steps).to(DEVICE)
        for i in range(len(self.X_tensor)):
            path = baseline[i] + alphas.view(-1, 1) * (self.X_tensor[i] - baseline[i])
            path.requires_grad = True
            preds = self.model(path)
            grads = torch.autograd.grad(torch.sum(preds), path)[0]
            attr = (self.X_tensor[i] - baseline[i]) * torch.mean(grads, dim=0)
            attributions.append(attr.detach().cpu().numpy())
        self.ig_scores = pd.DataFrame(attributions, columns=self.features)
        return self.ig_scores.mean().sort_values(ascending=False)

# --- FRAMEWORK 3: INTERVENTION ---
class PlatinumCausalEngine:
    def __init__(self, causal_graph, data: pd.DataFrame):
        self.G = nx.DiGraph(causal_graph)
        self.df = data.copy()
        self.nodes = list(nx.topological_sort(self.G))
        self.models = {}
        self.residuals = pd.DataFrame(index=self.df.index)
        self._fit_adaptive_models()

    def _fit_adaptive_models(self):
        print("‚öôÔ∏è FITTING CAUSAL MODELS...")
        for node in self.nodes:
            parents = list(self.G.predecessors(node))
            if not parents:
                self.residuals[node] = self.df[node]
                continue
            X = self.df[parents]
            y = self.df[node]
            self.models[node] = xgb.XGBRegressor(n_estimators=50, max_depth=3).fit(X, y)
            self.residuals[node] = y - self.models[node].predict(X)

    def simulate_intervention(self, treatment: dict, target: str):
        df_sim = self.df.copy()
        for t_var, t_val in treatment.items():
            df_sim[t_var] = t_val
        for node in self.nodes:
            if node in treatment: continue
            parents = list(self.G.predecessors(node))
            if not parents: continue
            X = df_sim[parents]
            base_val = self.models[node].predict(X)
            df_sim[node] = base_val + self.residuals[node].values
        return df_sim[target].mean()

# --- FRAMEWORK 4: OPTIMIZATION ---
class UnifiedOptimizer:
    def __init__(self, objective_fn, bounds):
        self.objective_fn = objective_fn
        self.bounds = bounds
    
    def optimize(self):
        print("üîµ OPTIMIZING...")
        # Simple Grid Search for reliability in this demo
        best_val = np.inf
        best_param = 0
        p_name = list(self.bounds.keys())[0]
        low, high = self.bounds[p_name]
        
        for val in np.linspace(low, high, 20):
            score = self.objective_fn(**{p_name: val})
            if score < best_val:
                best_val = score
                best_param = val
        return {p_name: best_param}, best_val

# --- FRAMEWORK 5: VALIDATION ---
class TitanValidationFramework:
    def __init__(self, reference_data):
        self.reference = reference_data
        print(f"üîç VALIDATION ENGINE ONLINE. Ref Size: {len(reference_data)}")

    def validate(self, new_data, target_col=None, subgroups=None):
        report = []
        # 1. Drift Check
        for c in new_data.select_dtypes(include=np.number).columns:
            if ks_2samp(self.reference[c], new_data[c])[1] < 0.05:
                report.append(f"Drift Detected: {c}")
        # 2. Fairness Check
        if target_col and subgroups:
            for g in subgroups:
                means = new_data.groupby(g)[target_col].mean()
                if (means.max() - means.min()) / means.min() > 0.2:
                    report.append(f"Fairness Alert: {g}")
        
        return len(report) == 0, pd.DataFrame(report, columns=["Issues"])

print("‚úÖ ALL SYSTEMS ONLINE.")

‚úÖ ALL SYSTEMS ONLINE.


In [29]:
# --------------------------------------------------------
# 2. GENERATE GRAND DATA (5,000 Rows)
# --------------------------------------------------------
def generate_grand_data(n=5000):
    print(f"üåç GENERATING DATASET (n={n})...")
    np.random.seed(42)
    urban_density = np.random.beta(2, 5, n)
    median_income = 30000 + (50000 * urban_density) + np.random.normal(0, 5000, n)
    giving_percent = np.clip(0.12 - (0.1 * urban_density) + np.random.normal(0, 0.01, n), 0.01, 0.25)
    crime_rate = np.clip(0.2 + (0.5 * urban_density) - (2.0 * giving_percent), 0, 1)
    flourishing = (0.00001 * median_income) + (10 * giving_percent) - (5 * crime_rate) + np.random.normal(0, 0.5, n)
    
    return pd.DataFrame({
        'Community_ID': range(n), 'Urban_Density': urban_density, 
        'Median_Income': median_income, 'Giving_Percent': giving_percent, 
        'Crime_Rate': crime_rate, 'Flourishing_Index': flourishing
    })

df_grand = generate_grand_data()
print(df_grand.head())

üåç GENERATING DATASET (n=5000)...
   Community_ID  Urban_Density  Median_Income  Giving_Percent  Crime_Rate  \
0             0       0.353677   38629.834970        0.083575    0.209689   
1             1       0.248558   41026.762393        0.081281    0.161717   
2             2       0.415959   48004.363098        0.092888    0.222203   
3             3       0.159968   37374.099255        0.094659    0.090666   
4             4       0.550283   61271.821009        0.072220    0.330701   

   Flourishing_Index  
0           0.147771  
1           0.363633  
2          -0.643565  
3           1.634045  
4          -1.002408  


In [30]:
# --------------------------------------------------------
# 3. EXECUTE FULL PIPELINE
# --------------------------------------------------------

# Phase 1
engine = UnifiedDiscoveryEngine(df_grand, 'Flourishing_Index')
engine.scan_environment()

# Phase 2
uav = UniversalAttributionValidator()
uav.fit(df_grand[['Giving_Percent', 'Median_Income', 'Crime_Rate', 'Urban_Density']], df_grand['Flourishing_Index'])
print("\nAttribution Scores:\n", uav.compute_attribution())

# Phase 3
graph = [('Urban_Density', 'Crime_Rate'), ('Giving_Percent', 'Crime_Rate'), 
         ('Crime_Rate', 'Flourishing_Index'), ('Giving_Percent', 'Flourishing_Index')]
pce = PlatinumCausalEngine(graph, df_grand)
res_a = pce.simulate_intervention({'Giving_Percent': 0.02}, 'Flourishing_Index')
res_b = pce.simulate_intervention({'Giving_Percent': 0.10}, 'Flourishing_Index')
print(f"\nScenario 2% Giving: {res_a:.4f}")
print(f"Scenario 10% Giving: {res_b:.4f}")
print(f"Lift: {((res_b - res_a)/res_a):.2%}")

# Phase 4
def obj(giving_rate): return -pce.simulate_intervention({'Giving_Percent': giving_rate}, 'Flourishing_Index')
opt = UnifiedOptimizer(obj, {'giving_rate': (0.01, 0.30)})
best, val = opt.optimize()
print(f"\nOptimal Giving Rate: {best['giving_rate']:.2%}")

# Phase 5
fsv = TitanValidationFramework(df_grand)
v, r = fsv.validate(df_grand.sample(500), 'Flourishing_Index', ['Urban_Density'])
print("\nValidation Passed:", v)
print(r)

üöÄ DISCOVERY ENGINE ONLINE. Target: 'Flourishing_Index'

>> [PHASE 1] SCANNING ENVIRONMENT...
   Baseline Model R¬≤: 0.7063
üöÄ ATTRIBUTION ENGINE ONLINE on cpu...
   Model Trained. Loss: 0.2497

Attribution Scores:
 Crime_Rate        0.028003
Median_Income    -0.000329
Giving_Percent   -0.019961
Urban_Density    -0.029381
dtype: float32
‚öôÔ∏è FITTING CAUSAL MODELS...

Scenario 2% Giving: -0.8162
Scenario 10% Giving: 0.7039
Lift: -186.24%
üîµ OPTIMIZING...

Optimal Giving Rate: 13.21%
üîç VALIDATION ENGINE ONLINE. Ref Size: 5000

Validation Passed: True
Empty DataFrame
Columns: [Issues]
Index: []
