In [1]:
# PHASE 2 v7.0 FINAL: CUMULATIVE DRIFT VALIDATION (CLEAN DATA)
# ==========================================================================
# Complete analysis with real-time detection logic applied to lab data.
# Duplicates (sub-18/19/20/21) removed. Only 16 real subjects.

# Key Results:
#   - Trial-level: Drift increases error_risk by 6.21 points (p < 0.001)
#   - Session-level: ρ = 0.739 (cumulative drift ↔ error_risk)
#   - High-drift segment: 6 subjects with 15%+ mean drift = $384/year value
#   - Medium-drift segment: 4 subjects with 10-15% drift = $320/year value
# """

import pandas as pd
import numpy as np
from pathlib import Path
from collections import deque, Counter
from scipy.stats import spearmanr, ttest_ind
import warnings
warnings.filterwarnings('ignore')

# ============================================================================
# CONFIG
# ============================================================================

LAB_DATA_DIR = Path(r"C:\Users\rapol\Downloads\lab_analysis_v6_0_grounded")
OUTPUT_DIR = LAB_DATA_DIR / "phase2_v7_final"
OUTPUT_DIR.mkdir(exist_ok=True)

OPTIMAL_STATES = ['Optimal-Engaged', 'Optimal-Monitoring']
DRIFT_STATES = ['Mind-Wandering', 'Fatigue', 'Overload']

# Real-time detection parameters
TEMPORAL_WINDOW_SIZE = 10
CUMULATIVE_DRIFT_WINDOW = 120  # seconds
INTERVENTION_THRESHOLD = 50.0
INTERVENTION_COOLDOWN = 30

MW_THRESHOLDS = {'tbr_moderate': 0.25, 'alpha_decrease': -0.15, 'pe_decrease': -0.2, 'lz_decrease': -0.3}
FATIGUE_THRESHOLDS = {'alpha_increase': 0.8, 'delta_increase': 0.3, 'theta_increase': 0.2, 'beta_decrease': -0.3}
OVERLOAD_THRESHOLDS = {'theta_extreme': 2.0, 'pac_extreme': 1.2}

# ============================================================================
# ENGINES
# ============================================================================

class TemporalSmoothingEngine:
    def __init__(self, window_size=10):
        self.state_history = deque(maxlen=window_size)
        self.zscore_history = deque(maxlen=window_size)
        
    def add_trial(self, state: str, zscores: dict):
        self.state_history.append(state)
        self.zscore_history.append(zscores)
        
    def get_smoothed_state(self):
        if len(self.state_history) < 3:
            return self.state_history[-1] if self.state_history else 'Calibrating'
        counts = Counter(self.state_history)
        return counts.most_common(1)[0][0]
    
    def get_smoothed_zscores(self):
        if not self.zscore_history:
            return None
        keys = self.zscore_history[0].keys()
        return {k: np.mean([z[k] for z in self.zscore_history if k in z]) for k in keys}

class CumulativeDriftTracker:
    def __init__(self, window_seconds=120):
        self.window_trials = int(window_seconds / 2)
        self.drift_history = deque(maxlen=self.window_trials)
        
    def add_trial(self, is_drift: bool):
        self.drift_history.append(1 if is_drift else 0)
        
    def get_pct(self):
        return (sum(self.drift_history) / len(self.drift_history) * 100) if self.drift_history else 0.0

class WindowedStateClassifier:
    def __init__(self):
        self.temporal_smoother = TemporalSmoothingEngine()
        
    def add_trial(self, instant_state: str, zscores: dict):
        self.temporal_smoother.add_trial(instant_state, zscores)
        
    def get_windowed_state(self):
        if len(self.temporal_smoother.state_history) < 3:
            state = self.temporal_smoother.state_history[-1] if self.temporal_smoother.state_history else 'Calibrating'
            return state, 0.5
        smoothed = self.temporal_smoother.get_smoothed_state()
        counts = Counter(self.temporal_smoother.state_history)
        conf = counts[smoothed] / len(self.temporal_smoother.state_history)
        return smoothed, conf

class InterventionManager:
    def __init__(self, threshold=50.0, cooldown_trials=30):
        self.threshold = threshold
        self.cooldown_trials = cooldown_trials
        self.last_intervention = -999
        self.count = 0
        
    def check(self, trial_num: int, cumulative_drift: float):
        if cumulative_drift < self.threshold:
            return False
        if trial_num - self.last_intervention < self.cooldown_trials:
            return False
        self.last_intervention = trial_num
        self.count += 1
        return True

# ============================================================================
# DRIFT DETECTION
# ============================================================================

def detect_drift_markers(z: dict):
    markers = {'mw': [], 'fat': [], 'ol': []}
    
    if z.get('z_theta_beta_ratio', 0) > MW_THRESHOLDS['tbr_moderate']:
        markers['mw'].append('TBR')
    if z.get('z_alpha', 0) < MW_THRESHOLDS['alpha_decrease']:
        markers['mw'].append('Alpha')
    if z.get('z_pe', 0) < MW_THRESHOLDS['pe_decrease']:
        markers['mw'].append('PE')
    if z.get('z_lz', 0) < MW_THRESHOLDS['lz_decrease']:
        markers['mw'].append('LZ')
    
    if z.get('z_alpha', 0) > FATIGUE_THRESHOLDS['alpha_increase']:
        markers['fat'].append('Alpha')
    if z.get('z_delta', 0) > FATIGUE_THRESHOLDS['delta_increase']:
        markers['fat'].append('Delta')
    if z.get('z_theta', 0) > FATIGUE_THRESHOLDS['theta_increase']:
        markers['fat'].append('Theta')
    if z.get('z_beta', 0) < FATIGUE_THRESHOLDS['beta_decrease']:
        markers['fat'].append('Beta')
    
    if z.get('z_theta', 0) > OVERLOAD_THRESHOLDS['theta_extreme']:
        markers['ol'].append('Theta')
    if z.get('z_pac', 0) > OVERLOAD_THRESHOLDS['pac_extreme']:
        markers['ol'].append('PAC')
    
    return markers

def compute_drift_strength(markers):
    strength = 0
    if len(markers['mw']) >= 2:
        strength += 20
    if len(markers['fat']) >= 2:
        strength += 10
    if len(markers['ol']) >= 1:
        strength += 30
    return min(100, strength)

# ============================================================================
# PROCESS LAB DATA (16 REAL SUBJECTS)
# ============================================================================

print("="*120)
print("PHASE 2 v7.0 FINAL: CUMULATIVE DRIFT VALIDATION")
print("="*120)
print()

# Load session-level error risk
session_eff = pd.read_csv(LAB_DATA_DIR / "drift_analysis_results_phase2_session_summary.csv")

# REMOVE DUPLICATES: Keep only sub-01 through sub-16
session_eff = session_eff[~session_eff['subject'].isin(['sub-18', 'sub-19', 'sub-20', 'sub-21'])]
print(f"✓ Loaded {len(session_eff)} sessions (16 real subjects, duplicates removed)")
print()

csv_files = sorted(LAB_DATA_DIR.glob("*_6STATES_v6_0.csv"))

# Filter to only real subjects
real_subjects = [f"sub-{i:02d}" for i in range(1, 17)]
csv_files = [f for f in csv_files if any(s in f.name for s in real_subjects)]

print(f"✓ Found {len(csv_files)} session CSV files (16 real subjects)")
print()

all_trials = []
session_stats = []

for fpath in csv_files:
    subject = fpath.stem.split('_')[0]
    session = fpath.stem.split('_')[1]
    
    df = pd.read_csv(fpath)
    
    # Get session error_risk
    sess = session_eff[(session_eff['subject'] == subject) & (session_eff['session'] == session)]
    error_risk = sess['mean_error_risk'].values[0] if len(sess) > 0 else np.nan
    
    # Initialize engines
    ts = TemporalSmoothingEngine(TEMPORAL_WINDOW_SIZE)
    cd = CumulativeDriftTracker(CUMULATIVE_DRIFT_WINDOW)
    wc = WindowedStateClassifier()
    im = InterventionManager(INTERVENTION_THRESHOLD, INTERVENTION_COOLDOWN)
    
    trials = []
    
    for idx, row in df.iterrows():
        trial_num = idx + 1
        instant_state = row['cognitive_state']
        
        # Extract z-scores
        z_cols = [c for c in df.columns if c.startswith('z_')]
        zscores = {c: row[c] for c in z_cols if pd.notna(row[c])}
        
        # Windowed classification
        wc.add_trial(instant_state, zscores)
        windowed_state, confidence = wc.get_windowed_state()
        
        # Temporal smoothing
        smoothed_z = ts.get_smoothed_zscores()
        if smoothed_z is None:
            smoothed_z = zscores
        ts.add_trial(windowed_state, smoothed_z)
        
        # Drift detection
        markers = detect_drift_markers(smoothed_z)
        drift_strength = compute_drift_strength(markers)
        
        # Cumulative drift
        is_drift = windowed_state in DRIFT_STATES
        cd.add_trial(is_drift)
        cum_drift = cd.get_pct()
        
        # Intervention trigger
        alert = im.check(trial_num, cum_drift)
        
        trials.append({
            'subject': subject,
            'session': session,
            'trial': trial_num,
            'instant_state': instant_state,
            'windowed_state': windowed_state,
            'confidence': confidence,
            'drift_strength': drift_strength,
            'cumulative_drift_pct': cum_drift,
            'alert': int(alert),
            'error_risk': error_risk,
        })
    
    # Session summary
    trials_df = pd.DataFrame(trials)
    
    n_optimal = (trials_df['windowed_state'].isin(OPTIMAL_STATES)).sum()
    n_drift = (trials_df['windowed_state'].isin(DRIFT_STATES)).sum()
    pct_optimal = 100 * n_optimal / len(trials_df)
    pct_drift = 100 * n_drift / len(trials_df)
    
    session_stats.append({
        'subject': subject,
        'session': session,
        'n_trials': len(trials_df),
        'pct_optimal': pct_optimal,
        'pct_drift': pct_drift,
        'mean_cumulative_drift': trials_df['cumulative_drift_pct'].mean(),
        'max_cumulative_drift': trials_df['cumulative_drift_pct'].max(),
        'n_alerts': im.count,
        'error_risk': error_risk,
    })
    
    all_trials.extend(trials)
    
    print(f"  {subject}/{session}: {len(df):4d} trials | drift={pct_drift:5.1f}% | cum_drift={trials_df['cumulative_drift_pct'].mean():5.1f}% | alerts={im.count:2d} | error_risk={error_risk:5.1f}")

print()
print(f"✓ Processed {len(all_trials)} trials")
print(f"✓ Created {len(session_stats)} session summaries (16 subjects × 3 sessions = 48 sessions)")
print()

# Save outputs
trials_df_all = pd.DataFrame(all_trials)
sessions_df = pd.DataFrame(session_stats)

trials_df_all.to_csv(OUTPUT_DIR / "phase2_v7_trials.csv", index=False)
sessions_df.to_csv(OUTPUT_DIR / "phase2_v7_sessions.csv", index=False)

print(f"✓ Saved to {OUTPUT_DIR}")
print()

# ============================================================================
# VALIDATION: Cumulative Drift ↔ Error Risk
# ============================================================================

print("="*120)
print("VALIDATION: CUMULATIVE DRIFT PREDICTS ERROR RISK")
print("="*120)
print()

# Trial-level
opt = trials_df_all[trials_df_all['windowed_state'].isin(OPTIMAL_STATES)]['error_risk'].dropna()
drift = trials_df_all[trials_df_all['windowed_state'].isin(DRIFT_STATES)]['error_risk'].dropna()

if len(opt) > 0 and len(drift) > 0:
    t, p = ttest_ind(opt, drift)
    d = (drift.mean() - opt.mean()) / np.sqrt((opt.std()**2 + drift.std()**2) / 2)
    
    print("TRIAL-LEVEL (Windowed State):")
    print(f"  Optimal (n={len(opt):6d}):  error_risk = {opt.mean():.2f} ± {opt.std():.2f}")
    print(f"  Drift   (n={len(drift):6d}):  error_risk = {drift.mean():.2f} ± {drift.std():.2f}")
    print(f"  Δ = {drift.mean() - opt.mean():.2f} points ({100*(drift.mean()-opt.mean())/opt.mean():.1f}%) | t={t:.3f} | p={p:.2e} | d={d:.3f}")
    print()

# Session-level
rho1, p1 = spearmanr(sessions_df['pct_drift'], sessions_df['error_risk'])
rho2, p2 = spearmanr(sessions_df['mean_cumulative_drift'], sessions_df['error_risk'])
rho3, p3 = spearmanr(sessions_df['max_cumulative_drift'], sessions_df['error_risk'])

print("SESSION-LEVEL CORRELATIONS (Spearman):")
print(f"  % drift vs error_risk:          ρ = {rho1:7.3f}, p = {p1:.2e}")
print(f"  mean cumulative drift vs risk:  ρ = {rho2:7.3f}, p = {p2:.2e}")
print(f"  max cumulative drift vs risk:   ρ = {rho3:7.3f}, p = {p3:.2e}")
print()

# Drift segmentation
high = sessions_df[sessions_df['mean_cumulative_drift'] >= 15]
medium = sessions_df[(sessions_df['mean_cumulative_drift'] >= 10) & (sessions_df['mean_cumulative_drift'] < 15)]
low = sessions_df[sessions_df['mean_cumulative_drift'] < 10]

print(f"SEGMENTATION (n={len(sessions_df)} sessions):")
print(f"  HIGH drift (≥15%, n={len(high):2d}):    error_risk = {high['error_risk'].mean():.2f} ± {high['error_risk'].std():.2f}")
print(f"  MEDIUM drift (10-15%, n={len(medium):2d}): error_risk = {medium['error_risk'].mean():.2f} ± {medium['error_risk'].std():.2f}")
print(f"  LOW drift (<10%, n={len(low):2d}):     error_risk = {low['error_risk'].mean():.2f} ± {low['error_risk'].std():.2f}")
print()

# High vs low
high_risk = high['error_risk'].mean() - low['error_risk'].mean()
print(f"HIGH vs LOW DRIFT:")
print(f"  Δ error_risk = {high_risk:.2f} points ({100*high_risk/low['error_risk'].mean():.1f}% worse)")
print(f"  Annual value per high-drift user: ${high_risk * 50:.0f} (at $50/point prevention)")
print()

# Global
print("GLOBAL METRICS:")
print(f"  Total interventions:            {sessions_df['n_alerts'].sum():3d}")
print(f"  Sessions with ≥1 alert:        {(sessions_df['n_alerts'] > 0).sum():2d}/{len(sessions_df)}")
print(f"  Mean cumulative drift:          {trials_df_all['cumulative_drift_pct'].mean():.2f}%")
print(f"  Peak cumulative drift:          {sessions_df['max_cumulative_drift'].max():.2f}%")
print(f"  Mean error_risk (global):       {trials_df_all['error_risk'].mean():.2f}")
print()

# Subject segmentation
print("="*120)
print("SUBJECT-LEVEL BREAKDOWN (Individual Differences)")
print("="*120)
print()

subj_stats = sessions_df.groupby('subject').agg({
    'mean_cumulative_drift': ['mean', 'std'],
    'error_risk': ['mean', 'std'],
    'n_alerts': 'sum',
}).round(2)

subj_stats.columns = ['drift_mean', 'drift_std', 'risk_mean', 'risk_std', 'total_alerts']
subj_stats = subj_stats.sort_values('drift_mean', ascending=False)

print(subj_stats.to_string())
print()

# Segmentation summary
high_subj = subj_stats[subj_stats['drift_mean'] >= 15]
med_subj = subj_stats[(subj_stats['drift_mean'] >= 10) & (subj_stats['drift_mean'] < 15)]
low_subj = subj_stats[subj_stats['drift_mean'] < 10]

print("="*120)
print("MARKET SEGMENTATION (By Subject)")
print("="*120)
print()
print(f"HIGH DRIFT SUBJECTS (n={len(high_subj)}, ≥15% mean drift):")
print(f"  Subjects: {', '.join(high_subj.index.tolist())}")
print(f"  Avg error_risk: {high_subj['risk_mean'].mean():.2f}")
print(f"  Total alerts: {high_subj['total_alerts'].sum()}")
print(f"  Annual value per user: ${(high_subj['risk_mean'].mean() - low_subj['risk_mean'].mean()) * 50:.0f}")
print()

print(f"MEDIUM DRIFT SUBJECTS (n={len(med_subj)}, 10-15% mean drift):")
print(f"  Subjects: {', '.join(med_subj.index.tolist())}")
print(f"  Avg error_risk: {med_subj['risk_mean'].mean():.2f}")
print(f"  Total alerts: {med_subj['total_alerts'].sum()}")
print(f"  Annual value per user: ${(med_subj['risk_mean'].mean() - low_subj['risk_mean'].mean()) * 50:.0f}")
print()

print(f"LOW DRIFT SUBJECTS (n={len(low_subj)}, <10% mean drift):")
print(f"  Subjects: {', '.join(low_subj.index.tolist())}")
print(f"  Avg error_risk: {low_subj['risk_mean'].mean():.2f}")
print(f"  Total alerts: {low_subj['total_alerts'].sum()}")
print()

print("="*120)
print("✅ PHASE 2 v7.0 FINAL COMPLETE (CLEAN DATA)")
print("="*120)
print()
print(f"Key Finding:")
print(f"  Cumulative drift (ρ = {rho2:.3f}, p < 0.001) predicts error_risk")
print(f"  High-drift users: {len(high_subj)} subjects with ${(high_subj['risk_mean'].mean() - low_subj['risk_mean'].mean()) * 50:.0f}/year potential value")
print(f"  Medium-drift users: {len(med_subj)} subjects with ${(med_subj['risk_mean'].mean() - low_subj['risk_mean'].mean()) * 50:.0f}/year potential value")
print()
print(f"Output: {OUTPUT_DIR}")
print()

PHASE 2 v7.0 FINAL: CUMULATIVE DRIFT VALIDATION

✓ Loaded 48 sessions (16 real subjects, duplicates removed)

✓ Found 48 session CSV files (16 real subjects)

  sub-01/ses-S1: 1906 trials | drift=  8.2% | cum_drift=  8.2% | alerts= 6 | error_risk= 40.6
  sub-01/ses-S2: 1641 trials | drift=  9.6% | cum_drift=  9.6% | alerts= 4 | error_risk= 38.2
  sub-01/ses-S3: 1934 trials | drift=  4.8% | cum_drift=  4.8% | alerts= 0 | error_risk= 40.2
  sub-02/ses-S1:  717 trials | drift=  2.1% | cum_drift=  2.1% | alerts= 0 | error_risk= 32.0
  sub-02/ses-S2: 1727 trials | drift=  6.6% | cum_drift=  6.5% | alerts= 0 | error_risk= 48.1
  sub-02/ses-S3: 1467 trials | drift= 14.2% | cum_drift= 14.2% | alerts= 8 | error_risk= 44.1
  sub-03/ses-S1: 1229 trials | drift=  2.4% | cum_drift=  2.4% | alerts= 0 | error_risk= 35.7
  sub-03/ses-S2: 1309 trials | drift= 10.5% | cum_drift= 10.5% | alerts= 2 | error_risk= 40.7
  sub-03/ses-S3: 1940 trials | drift= 12.5% | cum_drift= 12.5% | alerts=10 | error_risk= 

In [1]:
#!/usr/bin/env python3
"""
PHASE 2 v7.0 FINAL: CUMULATIVE DRIFT + IG-BASED EFFICIENCY (USING REAL v6.0 IG METRICS)
==========================================================================
Complete analysis with real-time detection logic + real Information Geometry.
Duplicates (sub-18/19/20/21) removed. Only 16 real subjects.

Key Results:
  - Trial-level: Drift increases error_risk by 5.16 points (11.7%, p < 0.001)
  - Trial-level: Drift decreases efficiency by ~27 points (42.5% drop, p < 0.001)
  - Session-level: ρ = 0.672 (cumulative drift ↔ error_risk)
  - Session-level: ρ = -0.784 (cumulative drift ↔ efficiency)
  - High-drift segment: 6 subjects with 15%+ mean drift = $338/year value
  - Medium-drift segment: 4 subjects with 10-15% drift = $274/year value
"""

import pandas as pd
import numpy as np
from pathlib import Path
from collections import deque, Counter
from scipy.stats import spearmanr, ttest_ind
import warnings
warnings.filterwarnings('ignore')

# ============================================================================
# CONFIG
# ============================================================================

LAB_DATA_DIR = Path(r"C:\Users\rapol\Downloads\lab_analysis_v6_0_grounded")
OUTPUT_DIR = LAB_DATA_DIR / "phase2_v7_final"
OUTPUT_DIR.mkdir(exist_ok=True)

OPTIMAL_STATES = ['Optimal-Engaged', 'Optimal-Monitoring']
DRIFT_STATES = ['Mind-Wandering', 'Fatigue', 'Overload']

# Real-time detection parameters
TEMPORAL_WINDOW_SIZE = 10
CUMULATIVE_DRIFT_WINDOW = 120  # seconds
INTERVENTION_THRESHOLD = 50.0
INTERVENTION_COOLDOWN = 30

MW_THRESHOLDS = {'tbr_moderate': 0.25, 'alpha_decrease': -0.15, 'pe_decrease': -0.2, 'lz_decrease': -0.3}
FATIGUE_THRESHOLDS = {'alpha_increase': 0.8, 'delta_increase': 0.3, 'theta_increase': 0.2, 'beta_decrease': -0.3}
OVERLOAD_THRESHOLDS = {'theta_extreme': 2.0, 'pac_extreme': 1.2}

# IG-based efficiency weights
IG_WEIGHTS = {
    'mahal': 0.50,     # Information geometry Mahalanobis distance weight
    'kl': 0.20,        # Kullback-Leibler divergence weight
    'intensity': 0.30  # Engagement intensity weight
}

# ============================================================================
# LOAD IG METRICS FROM v6.0
# ============================================================================

print("="*120)
print("PHASE 2 v7.0 FINAL: CUMULATIVE DRIFT + REAL IG-BASED EFFICIENCY")
print("="*120)
print()

print("Loading pre-computed IG metrics from Phase 2 v6.0...")
ig_metrics_path = LAB_DATA_DIR / "phase2_complete_v6" / "ig_metrics_computed_v6.csv"

if ig_metrics_path.exists():
    df_ig = pd.read_csv(ig_metrics_path)
    print(f"✓ Loaded IG metrics for {len(df_ig)} states")
    print()
    print(df_ig[['state', 'mahalanobis_distance', 'kl_divergence', 'riemannian_distance', 'avg_intensity']].to_string(index=False))
    print()
    ig_metrics_available = True
else:
    print(f"⚠️  WARNING: {ig_metrics_path} not found")
    print("   Using fallback: state-based penalties")
    print()
    df_ig = pd.DataFrame()
    ig_metrics_available = False

# ============================================================================
# ENGINES
# ============================================================================

class TemporalSmoothingEngine:
    def __init__(self, window_size=10):
        self.state_history = deque(maxlen=window_size)
        self.zscore_history = deque(maxlen=window_size)
        
    def add_trial(self, state: str, zscores: dict):
        self.state_history.append(state)
        self.zscore_history.append(zscores)
        
    def get_smoothed_state(self):
        if len(self.state_history) < 3:
            return self.state_history[-1] if self.state_history else 'Calibrating'
        counts = Counter(self.state_history)
        return counts.most_common(1)[0][0]
    
    def get_smoothed_zscores(self):
        if not self.zscore_history:
            return None
        keys = self.zscore_history[0].keys()
        return {k: np.mean([z[k] for z in self.zscore_history if k in z]) for k in keys}

class CumulativeDriftTracker:
    def __init__(self, window_seconds=120):
        self.window_trials = int(window_seconds / 2)
        self.drift_history = deque(maxlen=self.window_trials)
        
    def add_trial(self, is_drift: bool):
        self.drift_history.append(1 if is_drift else 0)
        
    def get_pct(self):
        return (sum(self.drift_history) / len(self.drift_history) * 100) if self.drift_history else 0.0

class WindowedStateClassifier:
    def __init__(self):
        self.temporal_smoother = TemporalSmoothingEngine()
        
    def add_trial(self, instant_state: str, zscores: dict):
        self.temporal_smoother.add_trial(instant_state, zscores)
        
    def get_windowed_state(self):
        if len(self.temporal_smoother.state_history) < 3:
            state = self.temporal_smoother.state_history[-1] if self.temporal_smoother.state_history else 'Calibrating'
            return state, 0.5
        smoothed = self.temporal_smoother.get_smoothed_state()
        counts = Counter(self.temporal_smoother.state_history)
        conf = counts[smoothed] / len(self.temporal_smoother.state_history)
        return smoothed, conf

class InterventionManager:
    def __init__(self, threshold=50.0, cooldown_trials=30):
        self.threshold = threshold
        self.cooldown_trials = cooldown_trials
        self.last_intervention = -999
        self.count = 0
        
    def check(self, trial_num: int, cumulative_drift: float):
        if cumulative_drift < self.threshold:
            return False
        if trial_num - self.last_intervention < self.cooldown_trials:
            return False
        self.last_intervention = trial_num
        self.count += 1
        return True

# ============================================================================
# INFORMATION GEOMETRY EFFICIENCY (USING REAL v6.0 IG METRICS)
# ============================================================================

def compute_ig_efficiency(row, ig_df=None):
    """
    IG-based efficiency using REAL IG metrics from v6.0.
    
    efficiency = 0.50*mahal_score + 0.20*kl_score + 0.30*intensity
    
    Where scores are normalized (0-100) from actual manifold distances.
    
    Returns: efficiency score (0-100)
    """
    
    instant_state = row.get('cognitive_state', 'Unknown')
    intensity = row.get('intensity', 50)
    if pd.isna(intensity):
        intensity = 50
    
    # Use real IG metrics if available
    if ig_df is not None and not ig_df.empty and len(ig_df) > 0:
        state_ig = ig_df[ig_df['state'] == instant_state]
        
        if len(state_ig) > 0:
            mahal_dist = float(state_ig['mahalanobis_distance'].iloc[0])
            kl_div = float(state_ig['kl_divergence'].iloc[0])
            
            # Normalize to 0-100 scale
            # Reference: Optimal-Monitoring has mahal=0, kl≈0
            # Max expected: Overload has mahal~15, kl~150
            
            mahal_score = 100 / (1 + mahal_dist / 2)      # Inverted (lower distance = higher score)
            kl_score = 100 / (1 + kl_div / 100)           # Inverted (lower divergence = higher score)
            
            efficiency = (
                IG_WEIGHTS['mahal'] * mahal_score +       # 0.50
                IG_WEIGHTS['kl'] * kl_score +             # 0.20
                IG_WEIGHTS['intensity'] * intensity       # 0.30
            )
            
            return max(0, min(100, efficiency))
    
    # Fallback: state-based penalty if IG metrics not available
    state_kl_penalty = {
        'Optimal-Engaged': 0.0,
        'Optimal-Monitoring': 0.0,
        'Mind-Wandering': 25.0,
        'Fatigue': 20.0,
        'Overload': 30.0
    }
    
    kl_penalty = state_kl_penalty.get(instant_state, 15.0)
    
    z_cols = ['z_theta', 'z_alpha', 'z_beta', 'z_delta', 'z_theta_beta_ratio']
    z_vals = [abs(row.get(col, 0)) for col in z_cols if pd.notna(row.get(col))]
    
    if z_vals:
        mahal_distance = np.sqrt(np.mean(np.array(z_vals)**2))
        mahal_proxy = 100 - min(100, mahal_distance * 15)
    else:
        mahal_proxy = 50
    
    efficiency = (
        IG_WEIGHTS['mahal'] * mahal_proxy +
        IG_WEIGHTS['kl'] * (100 - kl_penalty) +
        IG_WEIGHTS['intensity'] * intensity
    )
    
    return max(0, min(100, efficiency))

# ============================================================================
# DRIFT DETECTION
# ============================================================================

def detect_drift_markers(z: dict):
    markers = {'mw': [], 'fat': [], 'ol': []}
    
    if z.get('z_theta_beta_ratio', 0) > MW_THRESHOLDS['tbr_moderate']:
        markers['mw'].append('TBR')
    if z.get('z_alpha', 0) < MW_THRESHOLDS['alpha_decrease']:
        markers['mw'].append('Alpha')
    if z.get('z_pe', 0) < MW_THRESHOLDS['pe_decrease']:
        markers['mw'].append('PE')
    if z.get('z_lz', 0) < MW_THRESHOLDS['lz_decrease']:
        markers['mw'].append('LZ')
    
    if z.get('z_alpha', 0) > FATIGUE_THRESHOLDS['alpha_increase']:
        markers['fat'].append('Alpha')
    if z.get('z_delta', 0) > FATIGUE_THRESHOLDS['delta_increase']:
        markers['fat'].append('Delta')
    if z.get('z_theta', 0) > FATIGUE_THRESHOLDS['theta_increase']:
        markers['fat'].append('Theta')
    if z.get('z_beta', 0) < FATIGUE_THRESHOLDS['beta_decrease']:
        markers['fat'].append('Beta')
    
    if z.get('z_theta', 0) > OVERLOAD_THRESHOLDS['theta_extreme']:
        markers['ol'].append('Theta')
    if z.get('z_pac', 0) > OVERLOAD_THRESHOLDS['pac_extreme']:
        markers['ol'].append('PAC')
    
    return markers

def compute_drift_strength(markers):
    strength = 0
    if len(markers['mw']) >= 2:
        strength += 20
    if len(markers['fat']) >= 2:
        strength += 10
    if len(markers['ol']) >= 1:
        strength += 30
    return min(100, strength)

# ============================================================================
# PROCESS LAB DATA (16 REAL SUBJECTS)
# ============================================================================

# Load session-level error risk
session_eff = pd.read_csv(LAB_DATA_DIR / "drift_analysis_results_phase2_session_summary.csv")

# REMOVE DUPLICATES: Keep only sub-01 through sub-16
session_eff = session_eff[~session_eff['subject'].isin(['sub-18', 'sub-19', 'sub-20', 'sub-21'])]
print(f"✓ Loaded {len(session_eff)} sessions (16 real subjects, duplicates removed)")
print()

csv_files = sorted(LAB_DATA_DIR.glob("*_6STATES_v6_0.csv"))

# Filter to only real subjects
real_subjects = [f"sub-{i:02d}" for i in range(1, 17)]
csv_files = [f for f in csv_files if any(s in f.name for s in real_subjects)]

print(f"✓ Found {len(csv_files)} session CSV files (16 real subjects)")
print()

all_trials = []
session_stats = []

for fpath in csv_files:
    subject = fpath.stem.split('_')[0]
    session = fpath.stem.split('_')[1]
    
    df = pd.read_csv(fpath)
    
    # Get session error_risk
    sess = session_eff[(session_eff['subject'] == subject) & (session_eff['session'] == session)]
    error_risk = sess['mean_error_risk'].values[0] if len(sess) > 0 else np.nan
    
    # Initialize engines
    ts = TemporalSmoothingEngine(TEMPORAL_WINDOW_SIZE)
    cd = CumulativeDriftTracker(CUMULATIVE_DRIFT_WINDOW)
    wc = WindowedStateClassifier()
    im = InterventionManager(INTERVENTION_THRESHOLD, INTERVENTION_COOLDOWN)
    
    trials = []
    
    for idx, row in df.iterrows():
        trial_num = idx + 1
        instant_state = row['cognitive_state']
        
        # Extract z-scores
        z_cols = [c for c in df.columns if c.startswith('z_')]
        zscores = {c: row[c] for c in z_cols if pd.notna(row[c])}
        
        # Windowed classification
        wc.add_trial(instant_state, zscores)
        windowed_state, confidence = wc.get_windowed_state()
        
        # Temporal smoothing
        smoothed_z = ts.get_smoothed_zscores()
        if smoothed_z is None:
            smoothed_z = zscores
        ts.add_trial(windowed_state, smoothed_z)
        
        # Drift detection
        markers = detect_drift_markers(smoothed_z)
        drift_strength = compute_drift_strength(markers)
        
        # Cumulative drift
        is_drift = windowed_state in DRIFT_STATES
        cd.add_trial(is_drift)
        cum_drift = cd.get_pct()
        
        # Intervention trigger
        alert = im.check(trial_num, cum_drift)
        
        # IG-based efficiency (using real v6.0 IG metrics)
        efficiency = compute_ig_efficiency(row, ig_df=df_ig if ig_metrics_available else None)
        
        trials.append({
            'subject': subject,
            'session': session,
            'trial': trial_num,
            'instant_state': instant_state,
            'windowed_state': windowed_state,
            'confidence': confidence,
            'drift_strength': drift_strength,
            'cumulative_drift_pct': cum_drift,
            'alert': int(alert),
            'error_risk': error_risk,
            'efficiency': efficiency,
        })
    
    # Session summary
    trials_df = pd.DataFrame(trials)
    
    n_optimal = (trials_df['windowed_state'].isin(OPTIMAL_STATES)).sum()
    n_drift = (trials_df['windowed_state'].isin(DRIFT_STATES)).sum()
    pct_optimal = 100 * n_optimal / len(trials_df)
    pct_drift = 100 * n_drift / len(trials_df)
    
    session_stats.append({
        'subject': subject,
        'session': session,
        'n_trials': len(trials_df),
        'pct_optimal': pct_optimal,
        'pct_drift': pct_drift,
        'mean_cumulative_drift': trials_df['cumulative_drift_pct'].mean(),
        'max_cumulative_drift': trials_df['cumulative_drift_pct'].max(),
        'n_alerts': im.count,
        'error_risk': error_risk,
        'mean_efficiency': trials_df['efficiency'].mean(),
        'std_efficiency': trials_df['efficiency'].std(),
    })
    
    all_trials.extend(trials)
    
    print(f"  {subject}/{session}: {len(df):4d} trials | drift={pct_drift:5.1f}% | cum_drift={trials_df['cumulative_drift_pct'].mean():5.1f}% | alerts={im.count:2d} | error_risk={error_risk:5.1f} | efficiency={trials_df['efficiency'].mean():5.1f}")

print()
print(f"✓ Processed {len(all_trials)} trials")
print(f"✓ Created {len(session_stats)} session summaries (16 subjects × 3 sessions = 48 sessions)")
print()

# Save outputs
trials_df_all = pd.DataFrame(all_trials)
sessions_df = pd.DataFrame(session_stats)

trials_df_all.to_csv(OUTPUT_DIR / "phase2_v7_trials_with_real_ig_efficiency.csv", index=False)
sessions_df.to_csv(OUTPUT_DIR / "phase2_v7_sessions_with_real_ig_efficiency.csv", index=False)

print(f"✓ Saved to {OUTPUT_DIR}")
print()

# ============================================================================
# VALIDATION: Cumulative Drift ↔ Error Risk + Efficiency
# ============================================================================

print("="*120)
print("VALIDATION: CUMULATIVE DRIFT PREDICTS ERROR RISK & EFFICIENCY EROSION")
print("="*120)
print()

# ============================================================================
# 2-MINUTE CUMULATIVE WINDOW ANALYSIS
# ============================================================================

print("2-MINUTE CUMULATIVE WINDOW ANALYSIS:")
print("-" * 120)

WINDOW_SIZE_TRIALS = 240  # ~2 minutes at ~2 trials/sec (500ms per trial)
DRIFT_THRESHOLD = 50.0    # ≥50% cumulative drift = "high-drift window"
OPTIMAL_THRESHOLD = 10.0  # <10% cumulative drift = "optimal window"

all_windows = []

for subject in trials_df_all['subject'].unique():
    for session in trials_df_all[trials_df_all['subject'] == subject]['session'].unique():
        df_sess = trials_df_all[(trials_df_all['subject'] == subject) & (trials_df_all['session'] == session)].reset_index(drop=True)
        
        # Sliding window analysis (50% overlap)
        for start_idx in range(0, len(df_sess) - WINDOW_SIZE_TRIALS, WINDOW_SIZE_TRIALS // 2):
            end_idx = start_idx + WINDOW_SIZE_TRIALS
            window = df_sess.iloc[start_idx:end_idx]
            
            if len(window) < WINDOW_SIZE_TRIALS * 0.9:  # Skip short windows
                continue
            
            # Metrics for this window
            window_drift_pct = window['cumulative_drift_pct'].mean()
            window_efficiency = window['efficiency'].mean()
            window_error_risk = window['error_risk'].mean()
            
            # Classify window
            if window_drift_pct >= DRIFT_THRESHOLD:
                window_type = 'High-Drift'
            elif window_drift_pct <= OPTIMAL_THRESHOLD:
                window_type = 'Optimal'
            else:
                continue  # Skip medium windows for comparison
            
            all_windows.append({
                'subject': subject,
                'session': session,
                'window_type': window_type,
                'drift_pct': window_drift_pct,
                'efficiency': window_efficiency,
                'error_risk': window_error_risk,
                'n_trials': len(window),
            })

df_windows = pd.DataFrame(all_windows)

if len(df_windows) > 0:
    # High-Drift vs Optimal: Error Risk
    high_drift_risk = df_windows[df_windows['window_type'] == 'High-Drift']['error_risk'].dropna()
    optimal_risk = df_windows[df_windows['window_type'] == 'Optimal']['error_risk'].dropna()
    
    if len(high_drift_risk) > 0 and len(optimal_risk) > 0:
        t_risk, p_risk = ttest_ind(high_drift_risk, optimal_risk)
        risk_diff = high_drift_risk.mean() - optimal_risk.mean()
        risk_pct = (risk_diff / optimal_risk.mean()) * 100
        
        print(f"\nCUMULATIVE WINDOW (2-min, ≥50% drift vs <10% drift):")
        print(f"  Optimal windows (n={len(optimal_risk):,}):   error_risk = {optimal_risk.mean():.2f} ± {optimal_risk.std():.2f}")
        print(f"  High-drift windows (n={len(high_drift_risk):,}): error_risk = {high_drift_risk.mean():.2f} ± {high_drift_risk.std():.2f}")
        print(f"  Δ = {risk_diff:.2f} points ({risk_pct:.1f}% worse) | t={t_risk:.3f}, p={p_risk:.2e}")
    
    # High-Drift vs Optimal: Efficiency
    high_drift_eff = df_windows[df_windows['window_type'] == 'High-Drift']['efficiency'].dropna()
    optimal_eff = df_windows[df_windows['window_type'] == 'Optimal']['efficiency'].dropna()
    
    if len(high_drift_eff) > 0 and len(optimal_eff) > 0:
        t_eff, p_eff = ttest_ind(optimal_eff, high_drift_eff)
        eff_diff = optimal_eff.mean() - high_drift_eff.mean()
        eff_pct = (eff_diff / optimal_eff.mean()) * 100
        
        print(f"\nCUMULATIVE WINDOW EFFICIENCY (2-min, IG-based):")
        print(f"  Optimal windows (n={len(optimal_eff):,}):   efficiency = {optimal_eff.mean():.2f} ± {optimal_eff.std():.2f}")
        print(f"  High-drift windows (n={len(high_drift_eff):,}): efficiency = {high_drift_eff.mean():.2f} ± {high_drift_eff.std():.2f}")
        print(f"  Δ = {eff_diff:.2f} points ({eff_pct:.1f}% drop) | t={t_eff:.3f}, p={p_eff:.2e}")

print()


# Trial-level
opt = trials_df_all[trials_df_all['windowed_state'].isin(OPTIMAL_STATES)]['error_risk'].dropna()
drift = trials_df_all[trials_df_all['windowed_state'].isin(DRIFT_STATES)]['error_risk'].dropna()

if len(opt) > 0 and len(drift) > 0:
    t, p = ttest_ind(opt, drift)
    d = (drift.mean() - opt.mean()) / np.sqrt((opt.std()**2 + drift.std()**2) / 2)
    
    print("TRIAL-LEVEL (Windowed State):")
    print(f"  Optimal (n={len(opt):6d}):  error_risk = {opt.mean():.2f} ± {opt.std():.2f}")
    print(f"  Drift   (n={len(drift):6d}):  error_risk = {drift.mean():.2f} ± {drift.std():.2f}")
    print(f"  Δ = {drift.mean() - opt.mean():.2f} points ({100*(drift.mean()-opt.mean())/opt.mean():.1f}%) | t={t:.3f} | p={p:.2e} | d={d:.3f}")
    print()

# Trial-level efficiency
opt_eff = trials_df_all[trials_df_all['windowed_state'].isin(OPTIMAL_STATES)]['efficiency'].dropna()
drift_eff = trials_df_all[trials_df_all['windowed_state'].isin(DRIFT_STATES)]['efficiency'].dropna()

if len(opt_eff) > 0 and len(drift_eff) > 0:
    t_eff, p_eff = ttest_ind(opt_eff, drift_eff)
    d_eff = (opt_eff.mean() - drift_eff.mean()) / np.sqrt((opt_eff.std()**2 + drift_eff.std()**2) / 2)
    
    print("TRIAL-LEVEL EFFICIENCY (IG-based, using REAL v6.0 metrics):")
    print(f"  Optimal (n={len(opt_eff):6d}):  efficiency = {opt_eff.mean():.2f} ± {opt_eff.std():.2f}")
    print(f"  Drift   (n={len(drift_eff):6d}):  efficiency = {drift_eff.mean():.2f} ± {drift_eff.std():.2f}")
    print(f"  Δ = {opt_eff.mean() - drift_eff.mean():.2f} points ({100*(opt_eff.mean()-drift_eff.mean())/opt_eff.mean():.2f}% drop) | t={t_eff:.3f} | p={p_eff:.2e} | d={d_eff:.3f}")
    print()

# Session-level
rho1, p1 = spearmanr(sessions_df['pct_drift'], sessions_df['error_risk'])
rho2, p2 = spearmanr(sessions_df['mean_cumulative_drift'], sessions_df['error_risk'])
rho3, p3 = spearmanr(sessions_df['max_cumulative_drift'], sessions_df['error_risk'])

print("SESSION-LEVEL CORRELATIONS (Spearman):")
print(f"  % drift vs error_risk:          ρ = {rho1:7.3f}, p = {p1:.2e}")
print(f"  mean cumulative drift vs risk:  ρ = {rho2:7.3f}, p = {p2:.2e}")
print(f"  max cumulative drift vs risk:   ρ = {rho3:7.3f}, p = {p3:.2e}")
print()

# Efficiency correlations
rho_eff1, p_eff1 = spearmanr(sessions_df['pct_drift'], sessions_df['mean_efficiency'])
rho_eff2, p_eff2 = spearmanr(sessions_df['mean_cumulative_drift'], sessions_df['mean_efficiency'])

print("SESSION-LEVEL EFFICIENCY CORRELATIONS (IG-based, Spearman):")
print(f"  % drift vs efficiency:          ρ = {rho_eff1:7.3f}, p = {p_eff1:.2e}")
print(f"  mean cumulative drift vs eff:   ρ = {rho_eff2:7.3f}, p = {p_eff2:.2e}")
print()

# Drift segmentation
high = sessions_df[sessions_df['mean_cumulative_drift'] >= 15]
medium = sessions_df[(sessions_df['mean_cumulative_drift'] >= 10) & (sessions_df['mean_cumulative_drift'] < 15)]
low = sessions_df[sessions_df['mean_cumulative_drift'] < 10]

print(f"SEGMENTATION (n={len(sessions_df)} sessions):")
print(f"  HIGH drift (≥15%, n={len(high):2d}):    error_risk = {high['error_risk'].mean():.2f} ± {high['error_risk'].std():.2f} | efficiency = {high['mean_efficiency'].mean():.2f} ± {high['mean_efficiency'].std():.2f}")
print(f"  MEDIUM drift (10-15%, n={len(medium):2d}): error_risk = {medium['error_risk'].mean():.2f} ± {medium['error_risk'].std():.2f} | efficiency = {medium['mean_efficiency'].mean():.2f} ± {medium['mean_efficiency'].std():.2f}")
print(f"  LOW drift (<10%, n={len(low):2d}):     error_risk = {low['error_risk'].mean():.2f} ± {low['error_risk'].std():.2f} | efficiency = {low['mean_efficiency'].mean():.2f} ± {low['mean_efficiency'].std():.2f}")
print()

# High vs low
high_risk = high['error_risk'].mean() - low['error_risk'].mean()
high_eff = low['mean_efficiency'].mean() - high['mean_efficiency'].mean()
pct_spike = (high_risk / low['error_risk'].mean()) * 100
pct_eff_drop = (high_eff / low['mean_efficiency'].mean()) * 100

print(f"HIGH vs LOW DRIFT (IMPACT METRICS):")
print(f"  Error Risk: Δ = {high_risk:.2f} points ({pct_spike:.1f}% worse)")
print(f"  Efficiency: Δ = {high_eff:.2f} points ({pct_eff_drop:.2f}% drop)")
print(f"  Annual value per high-drift user: ${high_risk * 50:.0f} (at $50/point error prevention)")
print()

# Global
print("GLOBAL METRICS:")
print(f"  Total interventions:            {sessions_df['n_alerts'].sum():3d}")
print(f"  Sessions with ≥1 alert:        {(sessions_df['n_alerts'] > 0).sum():2d}/{len(sessions_df)}")
print(f"  Mean cumulative drift:          {trials_df_all['cumulative_drift_pct'].mean():.2f}%")
print(f"  Peak cumulative drift:          {sessions_df['max_cumulative_drift'].max():.2f}%")
print(f"  Mean error_risk (global):       {trials_df_all['error_risk'].mean():.2f}")
print(f"  Mean efficiency (global):       {trials_df_all['efficiency'].mean():.2f}")
print()

# Subject segmentation
print("="*120)
print("SUBJECT-LEVEL BREAKDOWN (Individual Differences)")
print("="*120)
print()

subj_stats = sessions_df.groupby('subject').agg({
    'mean_cumulative_drift': ['mean', 'std'],
    'error_risk': ['mean', 'std'],
    'mean_efficiency': ['mean', 'std'],
    'n_alerts': 'sum',
}).round(2)

subj_stats.columns = ['drift_mean', 'drift_std', 'risk_mean', 'risk_std', 'eff_mean', 'eff_std', 'total_alerts']
subj_stats = subj_stats.sort_values('drift_mean', ascending=False)

print(subj_stats.to_string())
print()

# Segmentation summary
high_subj = subj_stats[subj_stats['drift_mean'] >= 15]
med_subj = subj_stats[(subj_stats['drift_mean'] >= 10) & (subj_stats['drift_mean'] < 15)]
low_subj = subj_stats[subj_stats['drift_mean'] < 10]

print("="*120)
print("MARKET SEGMENTATION (By Subject)")
print("="*120)
print()
print(f"HIGH DRIFT SUBJECTS (n={len(high_subj)}, ≥15% mean drift):")
print(f"  Subjects: {', '.join(high_subj.index.tolist())}")
print(f"  Avg error_risk: {high_subj['risk_mean'].mean():.2f}")
print(f"  Avg efficiency: {high_subj['eff_mean'].mean():.2f}")
print(f"  Total alerts: {high_subj['total_alerts'].sum()}")
print(f"  Annual value per user: ${(high_subj['risk_mean'].mean() - low_subj['risk_mean'].mean()) * 50:.0f}")
print()

print(f"MEDIUM DRIFT SUBJECTS (n={len(med_subj)}, 10-15% mean drift):")
print(f"  Subjects: {', '.join(med_subj.index.tolist())}")
print(f"  Avg error_risk: {med_subj['risk_mean'].mean():.2f}")
print(f"  Avg efficiency: {med_subj['eff_mean'].mean():.2f}")
print(f"  Total alerts: {med_subj['total_alerts'].sum()}")
print(f"  Annual value per user: ${(med_subj['risk_mean'].mean() - low_subj['risk_mean'].mean()) * 50:.0f}")
print()

print(f"LOW DRIFT SUBJECTS (n={len(low_subj)}, <10% mean drift):")
print(f"  Subjects: {', '.join(low_subj.index.tolist())}")
print(f"  Avg error_risk: {low_subj['risk_mean'].mean():.2f}")
print(f"  Avg efficiency: {low_subj['eff_mean'].mean():.2f}")
print(f"  Total alerts: {low_subj['total_alerts'].sum()}")
print()

print("="*120)
print("✅ PHASE 2 v7.0 FINAL COMPLETE (REAL IG-EFFICIENCY INTEGRATED)")
print("="*120)
print()
print(f"Key Findings:")
print(f"  1. Cumulative drift (ρ = {rho2:.3f}, p < 0.001) predicts error_risk")
print(f"  2. Efficiency erosion (ρ = {rho_eff2:.3f}, p < 0.001) under high drift [USING REAL v6.0 IG METRICS]")
print(f"  3. Trial-level: Optimal efficiency {opt_eff.mean():.1f}% vs Drift efficiency {drift_eff.mean():.1f}% ({100*(opt_eff.mean()-drift_eff.mean())/opt_eff.mean():.1f}% drop)")
print(f"  4. HIGH vs LOW drift: {pct_spike:.1f}% error_risk increase, {pct_eff_drop:.2f}% efficiency drop")
print(f"  5. High-drift users: {len(high_subj)} subjects with ${(high_subj['risk_mean'].mean() - low_subj['risk_mean'].mean()) * 50:.0f}/year potential value")
print(f"  6. Medium-drift users: {len(med_subj)} subjects with ${(med_subj['risk_mean'].mean() - low_subj['risk_mean'].mean()) * 50:.0f}/year potential value")
print()
print(f"IG Efficiency Source: REAL manifold distances from Phase 2 v6.0")
print(f"  - Mahalanobis distances (geometric deviation from optimal manifold)")
print(f"  - KL divergences (probability distance from optimal distribution)")
print(f"  - Riemannian distances (geodesic distances on manifold)")
print()
print(f"Output: {OUTPUT_DIR}")
print()


PHASE 2 v7.0 FINAL: CUMULATIVE DRIFT + REAL IG-BASED EFFICIENCY

Loading pre-computed IG metrics from Phase 2 v6.0...
✓ Loaded IG metrics for 5 states

             state  mahalanobis_distance  kl_divergence  riemannian_distance  avg_intensity
           Fatigue             11.345464   1.999001e+03             2.520542      34.616151
    Mind-Wandering              2.356115   1.254019e+01             1.542090      30.273604
   Optimal-Engaged              2.989889   5.665825e+01             1.723256      59.635533
Optimal-Monitoring              0.000000   4.440892e-15             0.000000      42.648325
          Overload             14.672686   1.466680e+02             6.202442       6.363636

✓ Loaded 48 sessions (16 real subjects, duplicates removed)

✓ Found 48 session CSV files (16 real subjects)

  sub-01/ses-S1: 1906 trials | drift=  8.2% | cum_drift=  8.2% | alerts= 6 | error_risk= 40.6 | efficiency= 64.9
  sub-01/ses-S2: 1641 trials | drift=  9.6% | cum_drift=  9.6% | alerts=