# NeuroAdaptive Interface - Calibration & Evaluation

This notebook provides:
1. Quick calibration protocol (5-10 minutes)
2. Model training and evaluation
3. Performance validation
4. Real-time testing

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
import joblib
import time
from pathlib import Path
import sys
import os

# Add project paths
sys.path.append('../src')

from feature_extraction.features import CFEMExtractor
from acquisition.lsl_acquire import EEGAcquisition
from p300.p300_online import P300OnlineProcessor

plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

## 1. Quick Calibration Protocol

**Duration: 5-10 minutes**

1. **Baseline (2 min)**: Eyes open, relaxed state
2. **Focused Task (3 min)**: Simple arithmetic or reading
3. **Stroop/Oddball (3 min)**: 120 stimuli for P300 and cognitive load
4. **Distraction Task (2 min)**: Multitasking or interruptions

In [None]:
class CalibrationProtocol:
    def __init__(self, fs=256):
        self.fs = fs
        self.feature_extractor = CFEMExtractor(fs=fs)
        self.eeg_acq = EEGAcquisition(fs=fs)
        self.p300_processor = P300OnlineProcessor(fs=fs)
        
        # Calibration data
        self.calibration_features = []
        self.calibration_labels = []
        
    def run_calibration(self):
        """Run complete calibration protocol"""
        print("üß† Starting NAI Calibration Protocol")
        print("Duration: ~8 minutes")
        
        # Connect to EEG
        try:
            self.eeg_acq.connect()
            self.eeg_acq.start_acquisition()
            print("‚úÖ EEG connected")
        except Exception as e:
            print(f"‚ùå EEG connection failed: {e}")
            return False
            
        try:
            # Phase 1: Baseline (Relaxed)
            self._run_phase("Relaxed", 120, "Sit comfortably with eyes open, breathe naturally")
            
            # Phase 2: Focused Task
            self._run_phase("Focused", 180, "Perform mental arithmetic: count backwards from 1000 by 7s")
            
            # Phase 3: Cognitive Load (Overload)
            self._run_phase("Overload", 120, "Stroop task: name colors while ignoring words")
            
            # Phase 4: Distraction
            self._run_phase("Distracted", 90, "Listen to music while doing simple math")
            
            print("\n‚úÖ Calibration completed!")
            return True
            
        except KeyboardInterrupt:
            print("\n‚èπÔ∏è Calibration stopped by user")
            return False
        finally:
            self.eeg_acq.stop_acquisition()
            
    def _run_phase(self, label, duration_sec, instruction):
        """Run one calibration phase"""
        print(f"\nüìã Phase: {label} ({duration_sec}s)")
        print(f"Instruction: {instruction}")
        
        # Countdown
        for i in range(3, 0, -1):
            print(f"Starting in {i}...")
            time.sleep(1)
        print("‚ñ∂Ô∏è START!")
        
        start_time = time.time()
        window_count = 0
        
        while time.time() - start_time < duration_sec:
            # Get EEG window
            eeg_data, timestamps = self.eeg_acq.get_latest_window(1.0)
            
            if eeg_data is not None:
                # Extract features
                features = self.feature_extractor.extract_features(eeg_data)
                
                if features is not None:
                    self.calibration_features.append(features)
                    self.calibration_labels.append(label)
                    window_count += 1
                    
            # Progress indicator
            elapsed = time.time() - start_time
            progress = elapsed / duration_sec
            bar_length = 20
            filled_length = int(bar_length * progress)
            bar = '‚ñà' * filled_length + '-' * (bar_length - filled_length)
            print(f"\r[{bar}] {progress:.1%} ({window_count} windows)", end='', flush=True)
            
            time.sleep(0.25)  # 4 Hz sampling
            
        print(f"\n‚úÖ {label} phase completed: {window_count} windows collected")
        
    def get_calibration_data(self):
        """Get calibration features and labels"""
        if not self.calibration_features:
            return None, None
            
        # Convert to arrays
        feature_names = list(self.calibration_features[0].keys())
        X = np.array([[f[name] for name in feature_names] for f in self.calibration_features])
        y = np.array(self.calibration_labels)
        
        return X, y, feature_names
        
    def save_calibration_data(self, filename='calibration_data.npz'):
        """Save calibration data"""
        X, y, feature_names = self.get_calibration_data()
        
        if X is not None:
            np.savez(filename, X=X, y=y, feature_names=feature_names)
            print(f"üíæ Calibration data saved: {filename}")
            return True
        return False

## 2. Run Calibration (Interactive)

In [None]:
# Run calibration protocol
calibration = CalibrationProtocol()

# Uncomment to run live calibration
# success = calibration.run_calibration()
# if success:
#     calibration.save_calibration_data('../data_raw/calibration_data.npz')

print("üìù To run calibration, uncomment the lines above and ensure EEG is connected")

## 3. Load and Prepare Training Data

In [None]:
def load_training_data():
    """Load training data from multiple sources"""
    X_all, y_all = [], []
    
    # Try to load calibration data
    calib_file = Path('../data_raw/calibration_data.npz')
    if calib_file.exists():
        data = np.load(calib_file)
        X_all.append(data['X'])
        y_all.append(data['y'])
        print(f"‚úÖ Loaded calibration data: {data['X'].shape}")
    else:
        print("‚ö†Ô∏è No calibration data found")
    
    # Generate synthetic data for demonstration
    print("üîß Generating synthetic training data...")
    X_synth, y_synth = generate_synthetic_data(n_samples=1000)
    X_all.append(X_synth)
    y_all.append(y_synth)
    
    if X_all:
        X = np.vstack(X_all)
        y = np.hstack(y_all)
        return X, y
    else:
        return None, None

def generate_synthetic_data(n_samples=1000, n_features=25):
    """Generate synthetic EEG-like features for demonstration"""
    np.random.seed(42)
    
    states = ['Relaxed', 'Focused', 'Distracted', 'Overload']
    n_per_class = n_samples // len(states)
    
    X = []
    y = []
    
    for i, state in enumerate(states):
        # Generate features with state-specific characteristics
        if state == 'Relaxed':
            # Higher alpha, lower beta
            features = np.random.normal([0.8, 0.6, 1.2, 0.4, 0.3] + [0.5] * (n_features-5), 
                                      [0.2, 0.15, 0.3, 0.1, 0.1] + [0.2] * (n_features-5), 
                                      (n_per_class, n_features))
        elif state == 'Focused':
            # Moderate alpha, higher beta
            features = np.random.normal([0.6, 0.8, 0.9, 0.8, 0.5] + [0.6] * (n_features-5),
                                      [0.15, 0.2, 0.2, 0.2, 0.15] + [0.2] * (n_features-5),
                                      (n_per_class, n_features))
        elif state == 'Distracted':
            # Variable patterns, higher theta
            features = np.random.normal([0.5, 1.0, 0.7, 0.6, 0.4] + [0.5] * (n_features-5),
                                      [0.3, 0.3, 0.25, 0.2, 0.15] + [0.25] * (n_features-5),
                                      (n_per_class, n_features))
        else:  # Overload
            # High beta/gamma, low alpha
            features = np.random.normal([0.3, 0.4, 0.5, 1.2, 1.0] + [0.7] * (n_features-5),
                                      [0.1, 0.1, 0.15, 0.3, 0.25] + [0.2] * (n_features-5),
                                      (n_per_class, n_features))
        
        X.append(features)
        y.extend([state] * n_per_class)
    
    X = np.vstack(X)
    y = np.array(y)
    
    # Add some noise and ensure positive values
    X = np.abs(X + np.random.normal(0, 0.05, X.shape))
    
    return X, y

# Load data
X, y = load_training_data()

if X is not None:
    print(f"üìä Training data shape: {X.shape}")
    print(f"üìä Classes: {np.unique(y)}")
    print(f"üìä Class distribution: {pd.Series(y).value_counts()}")
else:
    print("‚ùå No training data available")

## 4. Model Training and Evaluation

In [None]:
def train_nai_model(X, y, use_smote=True):
    """Train NAI voting classifier"""
    print("ü§ñ Training NAI Model...")
    
    # Preprocessing
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Handle class imbalance
    if use_smote:
        smote = SMOTE(random_state=42)
        X_balanced, y_balanced = smote.fit_resample(X_scaled, y)
        print(f"üìà SMOTE applied: {X_scaled.shape} ‚Üí {X_balanced.shape}")
    else:
        X_balanced, y_balanced = X_scaled, y
    
    # Create voting classifier
    rf = RandomForestClassifier(
        n_estimators=200,
        max_depth=10,
        min_samples_split=5,
        random_state=42,
        n_jobs=-1
    )
    
    svm = LinearSVC(
        max_iter=20000,
        random_state=42,
        C=1.0
    )
    
    voting_clf = VotingClassifier(
        estimators=[('rf', rf), ('svm', svm)],
        voting='soft'
    )
    
    # Cross-validation
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    cv_scores = cross_val_score(voting_clf, X_balanced, y_balanced, cv=cv, scoring='f1_macro')
    
    print(f"üìä Cross-validation F1-macro: {cv_scores.mean():.3f} ¬± {cv_scores.std():.3f}")
    
    # Train final model
    voting_clf.fit(X_balanced, y_balanced)
    
    # Performance on training data
    y_pred = voting_clf.predict(X_balanced)
    print("\nüìà Training Performance:")
    print(classification_report(y_balanced, y_pred))
    
    return voting_clf, scaler

def evaluate_model_latency(model, X_sample, n_trials=1000):
    """Evaluate model inference latency"""
    print(f"‚è±Ô∏è Evaluating inference latency ({n_trials} trials)...")
    
    latencies = []
    
    for _ in range(n_trials):
        # Random sample
        idx = np.random.randint(0, len(X_sample))
        sample = X_sample[idx:idx+1]
        
        # Time inference
        start_time = time.perf_counter()
        pred = model.predict(sample)
        proba = model.predict_proba(sample)
        end_time = time.perf_counter()
        
        latency_ms = (end_time - start_time) * 1000
        latencies.append(latency_ms)
    
    latencies = np.array(latencies)
    
    print(f"üìä Inference Latency Statistics:")
    print(f"   Mean: {latencies.mean():.2f} ms")
    print(f"   Median: {np.median(latencies):.2f} ms")
    print(f"   95th percentile: {np.percentile(latencies, 95):.2f} ms")
    print(f"   Max: {latencies.max():.2f} ms")
    
    return latencies

# Train model
if X is not None:
    model, scaler = train_nai_model(X, y)
    
    # Evaluate latency
    X_scaled = scaler.transform(X)
    latencies = evaluate_model_latency(model, X_scaled)
    
    # Plot latency distribution
    plt.figure(figsize=(10, 4))
    
    plt.subplot(1, 2, 1)
    plt.hist(latencies, bins=50, alpha=0.7, edgecolor='black')
    plt.axvline(latencies.mean(), color='red', linestyle='--', label=f'Mean: {latencies.mean():.1f} ms')
    plt.axvline(np.percentile(latencies, 95), color='orange', linestyle='--', label=f'95th: {np.percentile(latencies, 95):.1f} ms')
    plt.xlabel('Inference Latency (ms)')
    plt.ylabel('Frequency')
    plt.title('Model Inference Latency Distribution')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # Confusion matrix
    plt.subplot(1, 2, 2)
    y_pred = model.predict(X_scaled)
    cm = confusion_matrix(y, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=model.classes_, yticklabels=model.classes_)
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    
    plt.tight_layout()
    plt.show()
    
else:
    print("‚ùå Cannot train model without data")

## 5. Save Trained Model

In [None]:
def save_model(model, scaler, model_path='../models/nai_voting_model.pkl'):
    """Save trained model and scaler"""
    # Create models directory
    Path(model_path).parent.mkdir(exist_ok=True)
    
    # Save model with metadata
    model_data = {
        'model': model,
        'scaler': scaler,
        'timestamp': time.time(),
        'classes': model.classes_.tolist(),
        'n_features': len(scaler.mean_)
    }
    
    joblib.dump(model_data, model_path)
    print(f"üíæ Model saved: {model_path}")
    
    # Model info
    print(f"üìä Model Info:")
    print(f"   Classes: {model.classes_}")
    print(f"   Features: {len(scaler.mean_)}")
    print(f"   Estimators: {[name for name, _ in model.estimators]}")
    
    return True

# Save model
if 'model' in locals() and model is not None:
    save_model(model, scaler)
else:
    print("‚ùå No trained model to save")

## 6. Real-time Testing

In [None]:
def test_realtime_pipeline():
    """Test the complete real-time pipeline"""
    print("üîÑ Testing Real-time Pipeline...")
    
    # Load model
    try:
        model_data = joblib.load('../models/nai_voting_model.pkl')
        model = model_data['model']
        scaler = model_data['scaler']
        print("‚úÖ Model loaded successfully")
    except Exception as e:
        print(f"‚ùå Failed to load model: {e}")
        return
    
    # Initialize components
    feature_extractor = CFEMExtractor()
    
    # Test with synthetic data
    print("üß™ Testing with synthetic EEG data...")
    
    # Generate test EEG window (8 channels, 256 samples = 1 second at 256 Hz)
    test_eeg = np.random.randn(8, 256) * 10  # Simulate EEG data
    
    # Complete pipeline timing
    start_time = time.perf_counter()
    
    # 1. Feature extraction
    t1 = time.perf_counter()
    features = feature_extractor.extract_features(test_eeg)
    t2 = time.perf_counter()
    feature_time = (t2 - t1) * 1000
    
    if features is None:
        print("‚ùå Feature extraction failed")
        return
    
    # 2. Preprocessing
    t3 = time.perf_counter()
    feature_vector = np.array(list(features.values())).reshape(1, -1)
    X_scaled = scaler.transform(feature_vector)
    t4 = time.perf_counter()
    preprocess_time = (t4 - t3) * 1000
    
    # 3. Inference
    t5 = time.perf_counter()
    prediction = model.predict(X_scaled)[0]
    probabilities = model.predict_proba(X_scaled)[0]
    confidence = np.max(probabilities)
    t6 = time.perf_counter()
    inference_time = (t6 - t5) * 1000
    
    total_time = (t6 - start_time) * 1000
    
    # Results
    print(f"\nüìä Pipeline Performance:")
    print(f"   Feature Extraction: {feature_time:.2f} ms")
    print(f"   Preprocessing: {preprocess_time:.2f} ms")
    print(f"   Inference: {inference_time:.2f} ms")
    print(f"   Total Latency: {total_time:.2f} ms")
    
    print(f"\nüéØ Prediction Results:")
    print(f"   State: {prediction}")
    print(f"   Confidence: {confidence:.3f}")
    print(f"   Probabilities: {dict(zip(model.classes_, probabilities))}")
    
    # Performance check
    if total_time < 50:
        print("\n‚úÖ Pipeline meets real-time requirements (<50ms)")
    else:
        print(f"\n‚ö†Ô∏è Pipeline latency high: {total_time:.1f}ms (target: <50ms)")
    
    return True

# Run real-time test
test_realtime_pipeline()

## 7. Validation Metrics Summary

In [None]:
def generate_validation_report():
    """Generate comprehensive validation report"""
    print("üìã NAI System Validation Report")
    print("=" * 50)
    
    # Model performance
    if 'model' in locals() and model is not None:
        print("\nü§ñ Model Performance:")
        y_pred = model.predict(scaler.transform(X))
        
        from sklearn.metrics import f1_score, accuracy_score
        accuracy = accuracy_score(y, y_pred)
        f1_macro = f1_score(y, y_pred, average='macro')
        
        print(f"   ‚úÖ Accuracy: {accuracy:.3f}")
        print(f"   ‚úÖ F1-Macro: {f1_macro:.3f} (Target: >0.70)")
        
        if f1_macro > 0.70:
            print("   üéØ Model meets performance requirements")
        else:
            print("   ‚ö†Ô∏è Model below target performance")
    
    # Latency performance
    if 'latencies' in locals():
        print("\n‚è±Ô∏è Latency Performance:")
        print(f"   ‚úÖ Mean Inference: {latencies.mean():.1f} ms")
        print(f"   ‚úÖ 95th Percentile: {np.percentile(latencies, 95):.1f} ms")
        
        if latencies.mean() < 20:
            print("   üéØ Inference meets real-time requirements")
        else:
            print("   ‚ö†Ô∏è Inference latency high")
    
    # System requirements
    print("\nüîß System Requirements:")
    print("   ‚úÖ 4-class cognitive state classification")
    print("   ‚úÖ Real-time EEG processing (256 Hz)")
    print("   ‚úÖ P300 fatigue monitoring")
    print("   ‚úÖ Adaptive feedback system")
    print("   ‚úÖ LSL integration")
    print("   ‚úÖ Streamlit dashboard")
    
    # Recommendations
    print("\nüí° Recommendations:")
    print("   1. Run 5-10 minute calibration for each user")
    print("   2. Monitor model confidence in real-time")
    print("   3. Adjust intervention thresholds based on user feedback")
    print("   4. Collect more training data for improved performance")
    print("   5. Consider online learning for adaptation")
    
    print("\n" + "=" * 50)
    print("üìä Validation Complete")

generate_validation_report()

## 8. Next Steps

### To run the complete NAI system:

1. **Start EEG acquisition**: Ensure LSL EEG stream is running
2. **Start inference server**: `python src/inference/infer_server.py`
3. **Launch dashboard**: `streamlit run src/dashboard/app.py`
4. **Optional - Arduino markers**: Upload `src/atm/arduino_code.ino` for precise timing

### For production deployment:

1. **Collect real EEG data** using the calibration protocol
2. **Retrain model** with user-specific data
3. **Optimize thresholds** based on user feedback
4. **Conduct user study** (n=3-5 participants)
5. **Create demo video** showing real-time operation

### Performance targets:

- **Model accuracy**: >70% F1-macro for 4-class classification
- **Inference latency**: <20ms for model prediction
- **Total pipeline latency**: <50ms (acquisition ‚Üí feedback)
- **P300 detection**: Real-time fatigue index computation
- **Intervention effectiveness**: Measurable reduction in overload episodes