In [None]:
# Phase 1 checkpoint decision
print("Phase 1 Checkpoint: Multi-Scenario Readiness Assessment")
print("=" * 54)

if phase1_success:
    print(f"✅ CHECKPOINT PASSED: Proceeding to Phase 2")
    print(f"   Both controllers validated against V2-8/V2-9 baselines")
    print(f"   Ready for multi-scenario performance comparison")
    print(f"   Expected strategy differences confirmed")
    
    proceed_to_phase2 = True
    
    # Save Phase 1 results
    phase1_results = {
        'timestamp': datetime.now().isoformat(),
        'baseline_validation': baseline_results,
        'controllers_ready': True,
        'next_phase': 'Phase 2'
    }
    
else:
    print(f"⚠️  CHECKPOINT CONCERNS: Controllers need attention")
    print(f"   One or both controllers differ from expected baselines")
    print(f"   Recommend investigation before proceeding")
    
    proceed_to_phase2 = False
    
    # Save Phase 1 results with issues
    phase1_results = {
        'timestamp': datetime.now().isoformat(),
        'baseline_validation': baseline_results,
        'controllers_ready': False,
        'issues': 'Baseline validation differs from expected',
        'next_phase': 'Investigation needed'
    }

# Save results to file
with open(RESULTS_PATH / "phase1_results.json", 'w') as f:
    json.dump(phase1_results, f, indent=2, default=str)

print(f"\n📋 Phase 1 results saved to: {RESULTS_PATH / 'phase1_results.json'}")
print(f"🚀 Next action: {'Proceed to Phase 2' if proceed_to_phase2 else 'Investigate baseline differences'}")

### Phase 1 Checkpoint: Ready for Multi-Scenario Comparison?

**Decision Point**: Based on baseline validation results, determine if both controllers are ready for comprehensive comparison or if additional debugging is needed.

In [None]:
def validate_controller_baselines():
    """Validate both controllers produce expected actions from V2-8/V2-9.
    
    Expected Results:
    - V1: [162.90, 556.22, 33.04] 
    - V2: [130.0, 550.0, 30.0]
    """
    
    print("Phase 1.4: Baseline Validation (V2-8/V2-9 Reproduction)")
    print("=" * 56)
    
    # Test conditions IDENTICAL to V2-8/V2-9
    test_setpoint = np.array([450.0, 1.4])  # d50=450μm, LOD=1.4%
    
    print(f"Test Conditions (IDENTICAL to V2-8/V2-9):")
    print(f"  Setpoint: d50={test_setpoint[0]:.0f}μm, LOD={test_setpoint[1]:.1f}%")
    print(f"  Data: indices 2000-2036 (identical segment)")
    
    results = {
        'v1': {'expected': np.array([162.90, 556.22, 33.04]), 'actual': None, 'error': None},
        'v2': {'expected': np.array([130.0, 550.0, 30.0]), 'actual': None, 'error': None}
    }
    
    # Test V1 Controller
    print(f"\n🔍 Testing V1 Controller:")
    try:
        # Create target for V1 (repeated for horizon)
        horizon = v1_config['horizon']
        target_cmas_v1 = np.tile(test_setpoint, (horizon, 1))
        
        start_time = time.time()
        v1_action = v1_controller.suggest_action(
            v1_cmas_df,
            v1_cpps_df, 
            target_cmas_v1
        )
        v1_time = time.time() - start_time
        
        results['v1']['actual'] = v1_action
        results['v1']['time'] = v1_time
        
        print(f"   ✅ V1 Action: {v1_action}")
        print(f"   ✅ Expected:  {results['v1']['expected']}")
        print(f"   ✅ Time: {v1_time:.3f}s")
        
        # Check accuracy
        v1_diff = np.abs(v1_action - results['v1']['expected'])
        v1_max_diff = np.max(v1_diff)
        
        if v1_max_diff < 0.1:
            print(f"   🎉 V1 BASELINE PERFECT: Max diff {v1_max_diff:.6f}")
            results['v1']['status'] = 'perfect'
        elif v1_max_diff < 1.0:
            print(f"   ✅ V1 BASELINE GOOD: Max diff {v1_max_diff:.3f}")
            results['v1']['status'] = 'good'
        else:
            print(f"   ⚠️  V1 BASELINE DIFFERS: Max diff {v1_max_diff:.3f}")
            results['v1']['status'] = 'different'
        
    except Exception as e:
        print(f"   ❌ V1 Controller failed: {e}")
        results['v1']['error'] = str(e)
        results['v1']['status'] = 'failed'
    
    # Test V2 Controller
    print(f"\n🔍 Testing V2 Controller:")
    try:
        # Convert test state to arrays for V2
        current_cmas_array = np.array([test_cmas['d50'], test_cmas['lod']])
        current_cpps_array = np.array([test_cpps['spray_rate'], test_cpps['air_flow'], test_cpps['carousel_speed']])
        
        start_time = time.time()
        v2_action = v2_controller.suggest_action(
            noisy_measurement=current_cmas_array,
            control_input=current_cpps_array,
            setpoint=test_setpoint
        )
        v2_time = time.time() - start_time
        
        results['v2']['actual'] = v2_action
        results['v2']['time'] = v2_time
        
        print(f"   ✅ V2 Action: {v2_action}")
        print(f"   ✅ Expected:  {results['v2']['expected']}")
        print(f"   ✅ Time: {v2_time:.3f}s")
        
        # Check accuracy
        v2_diff = np.abs(v2_action - results['v2']['expected'])
        v2_max_diff = np.max(v2_diff)
        
        if v2_max_diff < 0.1:
            print(f"   🎉 V2 BASELINE PERFECT: Max diff {v2_max_diff:.6f}")
            results['v2']['status'] = 'perfect'
        elif v2_max_diff < 1.0:
            print(f"   ✅ V2 BASELINE GOOD: Max diff {v2_max_diff:.3f}")
            results['v2']['status'] = 'good'
        else:
            print(f"   ⚠️  V2 BASELINE DIFFERS: Max diff {v2_max_diff:.3f}")
            results['v2']['status'] = 'different'
        
    except Exception as e:
        print(f"   ❌ V2 Controller failed: {e}")
        results['v2']['error'] = str(e)
        results['v2']['status'] = 'failed'
        traceback.print_exc()
    
    # Overall validation assessment
    print(f"\n📊 BASELINE VALIDATION SUMMARY:")
    print(f"=" * 35)
    
    v1_ok = results['v1']['status'] in ['perfect', 'good']
    v2_ok = results['v2']['status'] in ['perfect', 'good']
    
    if v1_ok and v2_ok:
        print(f"🎉 PHASE 1 SUCCESS: Both controllers validated successfully")
        print(f"   V1 Controller: {results['v1']['status'].upper()}")
        print(f"   V2 Controller: {results['v2']['status'].upper()}")
        print(f"   ✅ Ready for Phase 2: Multi-scenario comparison")
        phase1_success = True
    else:
        print(f"⚠️  PHASE 1 ISSUES DETECTED:")
        if not v1_ok:
            print(f"   V1 Controller: {results['v1']['status'].upper()}")
        if not v2_ok:
            print(f"   V2 Controller: {results['v2']['status'].upper()}")
        print(f"   🔄 May need investigation before Phase 2")
        phase1_success = False
    
    # Performance comparison preview
    if v1_ok and v2_ok:
        print(f"\n🔍 INITIAL PERFORMANCE PREVIEW:")
        v1_act = results['v1']['actual']
        v2_act = results['v2']['actual']
        
        action_diff = np.abs(v1_act - v2_act)
        max_diff = np.max(action_diff)
        
        print(f"   Controller action difference: {max_diff:.3f}")
        print(f"   V1 strategy: {v1_act}")
        print(f"   V2 strategy: {v2_act}")
        print(f"   Time ratio V2/V1: {results['v2']['time']/results['v1']['time']:.2f}x")
    
    return results, phase1_success

# Execute baseline validation
baseline_results, phase1_success = validate_controller_baselines()

print(f"\n🎯 Phase 1 Complete - Controllers {'validated' if phase1_success else 'need attention'}")

### Phase 1.4: Baseline Validation

In [None]:
def create_v2_controller_validated():
    """Create V2 controller using exact patterns validated in V2-9.
    
    Expected result: action [130.0, 550.0, 30.0] with setpoint d50=450μm LOD=1.4%
    """
    
    print("Creating V2 Controller (Validated V2-9 Patterns)")
    print("=" * 47)
    
    # Import V2 components exactly as in V2-9
    from V2.robust_mpc.core import RobustMPCController
    from V2.robust_mpc.estimators import KalmanStateEstimator
    from V2.robust_mpc.optimizers import GeneticOptimizer
    from V2.robust_mpc.data_buffer import DataBuffer
    
    # Create V2 DataBuffer with identical data from comparison_data
    lookback = 36
    buffer_size = 150
    
    v2_data_buffer = DataBuffer(
        cma_features=2,  # d50, lod
        cpp_features=3,  # spray_rate, air_flow, carousel_speed
        buffer_size=buffer_size,
        validate_sequence=True
    )
    
    print(f"✓ V2 DataBuffer created with capacity: {buffer_size}")
    
    # Populate buffer with identical data sequence using atomic operations
    final_cmas = None
    final_cpps = None
    
    for idx in range(len(comparison_data)):
        row = comparison_data.iloc[idx]
        
        # Convert to numpy arrays for atomic add_sample operation
        cma_array = np.array([row['d50'], row['lod']])
        cpp_array = np.array([row['spray_rate'], row['air_flow'], row['carousel_speed']])
        
        # Use atomic operation
        v2_data_buffer.add_sample(cma_array, cpp_array)
        
        # Store final state for testing
        if idx == len(comparison_data) - 1:
            final_cmas = {'d50': row['d50'], 'lod': row['lod']}
            final_cpps = {
                'spray_rate': row['spray_rate'],
                'air_flow': row['air_flow'], 
                'carousel_speed': row['carousel_speed']
            }
    
    current_size = len(v2_data_buffer)
    print(f"✓ Buffer populated: {current_size}/{buffer_size} steps")
    print(f"  Buffer ready for {lookback}-step lookback: {current_size >= lookback}")
    
    # Load V2 configuration
    with open(V2_CONFIG_PATH, 'r') as f:
        v2_config_base = yaml.safe_load(f)
    
    # Create complete V2 configuration (using V2-9 patterns)
    mpc_config = v2_config_base['mpc']
    process_vars = v2_config_base['process_variables']
    kalman_config = v2_config_base['kalman']
    
    # Build complete configuration
    v2_config = {
        # Root level keys (required by V2 controller)
        'cma_names': process_vars['cma_names'],
        'cpp_names': process_vars['cpp_names'],
        'cpp_full_names': process_vars['cpp_full_names'],
        'lookback': mpc_config['lookback'],
        'horizon': mpc_config['horizon'],
        'mc_samples': mpc_config['mc_samples'],
        'cpp_constraints': process_vars['cpp_constraints'],
        'scalers': comparison_scalers,
        
        # Genetic algorithm config (fixed key name from V2-9)
        'ga_config': {
            'population_size': mpc_config.get('population_size', 40),
            'num_generations': mpc_config.get('generations', 15),  # num_generations not generations
            'mutation_rate': mpc_config.get('mutation_rate', 0.1),
            'crossover_rate': mpc_config.get('crossover_rate', 0.7)
        },
        
        # Kalman parameters
        'kalman': {
            'process_noise': kalman_config.get('process_noise_std', 1.0),
            'measurement_noise': kalman_config.get('measurement_noise_std', 15.0),
            'initial_uncertainty': kalman_config.get('initial_covariance_scale', 1.0)
        },
        
        # MPC parameters
        'mpc': mpc_config,
        'verbose': False  # Silent operation for comparison
    }
    
    print(f"✓ V2 configuration created with all required keys")
    
    # Create KalmanStateEstimator (using V2-9 patterns)
    n_states = len(v2_config['cma_names'])  # 2
    n_controls = len(v2_config['cpp_names'])  # 3
    
    transition_matrix = np.eye(n_states) * 0.95
    control_matrix = np.ones((n_states, n_controls)) * 0.1
    initial_state = np.array([final_cmas['d50'], final_cmas['lod']])
    
    estimator = KalmanStateEstimator(
        transition_matrix=transition_matrix,
        control_matrix=control_matrix,
        initial_state_mean=initial_state,
        process_noise_std=v2_config['kalman']['process_noise'],
        measurement_noise_std=v2_config['kalman']['measurement_noise']
    )
    
    print(f"✓ KalmanStateEstimator created")
    
    # Create RobustMPCController
    v2_controller = RobustMPCController(
        model=comparison_model,
        estimator=estimator,
        optimizer_class=GeneticOptimizer,  # Pass class, not instance
        config=v2_config,
        scalers=v2_config['scalers'],
        history_buffer=v2_data_buffer  # Pre-populated buffer
    )
    
    print(f"✓ V2 RobustMPCController created successfully")
    print(f"  Model device: {next(v2_controller.model.parameters()).device}")
    print(f"  Estimator: {type(estimator).__name__}")
    print(f"  Buffer ready: {len(v2_data_buffer) >= lookback}")
    
    return v2_controller, v2_data_buffer, v2_config, final_cmas, final_cpps

# Create V2 controller with validated patterns
v2_controller, v2_data_buffer, v2_config, test_cmas, test_cpps = create_v2_controller_validated()

print(f"\n🎯 Phase 1.3 Complete - V2 controller ready for baseline testing")
print(f"   Final test state: d50={test_cmas['d50']:.1f}μm, LOD={test_cmas['lod']:.2f}%")

### Phase 1.3: V2 Controller Recreation

### Phase 1.2: V1 Controller Recreation

In [None]:
def create_v1_controller_validated():
    """Create V1 controller using exact patterns validated in V2-8.
    
    Expected result: action [162.90, 556.22, 33.04] with setpoint d50=450μm LOD=1.4%
    """
    
    print("Creating V1 Controller (Validated V2-8 Patterns)")
    print("=" * 47)
    
    # Import V1 components exactly as in V2-8
    from V1.src.mpc_controller import MPCController as V1Controller
    from V1.src.model_architecture import GranulationPredictor
    
    # Create perfect V1 DataFrames in unscaled engineering units
    lookback = 36
    
    # CMAs: Critical Material Attributes (UNSCALED)
    cma_columns = ['d50', 'lod']
    v1_cmas_df = comparison_data[cma_columns].copy()
    
    # CPPs: Critical Process Parameters + Soft Sensors (UNSCALED)
    cpp_columns = ['spray_rate', 'air_flow', 'carousel_speed', 'specific_energy', 'froude_number_proxy']
    v1_cpps_df = comparison_data[cpp_columns].copy()
    
    print(f"✓ V1 DataFrames created:")
    print(f"  CMAs shape: {v1_cmas_df.shape}, columns: {list(v1_cmas_df.columns)}")
    print(f"  CPPs shape: {v1_cpps_df.shape}, columns: {list(v1_cpps_df.columns)}")
    
    # Validate data is in engineering units
    d50_max = v1_cmas_df['d50'].max()
    if d50_max > 100:
        print(f"  ✓ Data in engineering units (d50 max: {d50_max:.1f} μm)")
    else:
        print(f"  ❌ WARNING: Data appears scaled (d50 max: {d50_max:.3f})")
    
    # Create V1 configuration exactly as in V2-8
    v1_config = {
        # Core parameters
        'lookback': lookback,
        'horizon': 72,
        
        # Variable definitions
        'cpp_names': ['spray_rate', 'air_flow', 'carousel_speed'],
        'cma_names': ['d50', 'lod'],
        'cpp_names_and_soft_sensors': ['spray_rate', 'air_flow', 'carousel_speed', 'specific_energy', 'froude_number_proxy'],
        
        # MPC parameters (optimized from V2-8 debugging)
        'control_effort_lambda': 0.01,
        'discretization_steps': 5,
        
        # Constraints
        'cpp_constraints': {
            'spray_rate': {'min_val': 80.0, 'max_val': 180.0, 'max_change_per_step': 15.0},
            'air_flow': {'min_val': 400.0, 'max_val': 700.0, 'max_change_per_step': 30.0},
            'carousel_speed': {'min_val': 20.0, 'max_val': 40.0, 'max_change_per_step': 3.0}
        }
    }
    
    # Create V1 controller
    v1_controller = V1Controller(
        model=comparison_model,
        config=v1_config,
        constraints=v1_config['cpp_constraints'],
        scalers=comparison_scalers
    )
    
    print(f"✓ V1 controller created successfully")
    print(f"  Device: {v1_controller.device}")
    print(f"  Model device: {next(v1_controller.model.parameters()).device}")
    
    return v1_controller, v1_cmas_df, v1_cpps_df, v1_config

# Create V1 controller with validated patterns
v1_controller, v1_cmas_df, v1_cpps_df, v1_config = create_v1_controller_validated()

print(f"\n🎯 Phase 1.2 Complete - V1 controller ready for baseline testing")

# V2-10: Comprehensive V1 vs V2 Performance Comparison

**Project:** RobustMPC-Pharma V2  
**Version:** 2.10 - Comprehensive Controller Performance Analysis  
**Date:** 2025-08-24  

## Project Context

Building on successful debugging in V2-8 (V1 controller) and V2-9 (V2 controller), this notebook performs systematic performance comparison between both controllers using the established 4-phase methodology.

### Expected Baseline Results (from V2-8/V2-9)
- **V1 Controller**: [162.90, 556.22, 33.04] (grid search optimization)
- **V2 Controller**: [130.0, 550.0, 30.0] (genetic algorithm optimization)
- **Test Conditions**: Data indices 2000-2036, setpoint d50=450μm LOD=1.4%

## Strategic Objectives
- **Direct Performance Comparison**: Test both controllers under identical conditions across multiple scenarios
- **Statistical Analysis**: Compare optimization strategies, convergence behavior, and control effectiveness  
- **Production Readiness Assessment**: Validate industrial deployment capabilities
- **Decision Framework**: Provide data-driven controller selection criteria

## 4-Phase Methodology
**Phase 1:** Controller Setup & Validation (reproduce V2-8/V2-9 results)  
**Phase 2:** Multi-Scenario Performance Testing (statistical comparison)  
**Phase 3:** Advanced Feature Comparison (uncertainty, Kalman, optimization)  
**Phase 4:** Analysis & Recommendations (deployment guidance)  

## Phase 1: Controller Setup & Validation

Ensure both controllers are properly configured with identical test data and reproduce expected results from V2-8/V2-9.

### Phase 1.1: Environment & Data Preparation

In [None]:
# System imports
import torch
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import sys
from pathlib import Path
import traceback
from typing import Dict, List, Tuple, Optional
import yaml
import time
from datetime import datetime
import json

warnings.filterwarnings('ignore')

# Enhanced plotting setup
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print(f"V2-10: Comprehensive V1 vs V2 Performance Comparison")
print(f"=" * 55)
print(f"Session Start: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"PyTorch: {torch.__version__}")
print(f"Device: {'CUDA' if torch.cuda.is_available() else 'CPU'}")

# Configuration
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
V1_DATA_PATH = Path("../../V1/data")
V2_CONFIG_PATH = Path("../../V2/config.yaml")
V2_MODEL_PATH = Path("../../V2/models")
RESULTS_PATH = Path("../../V2/comparison_results")
RESULTS_PATH.mkdir(exist_ok=True)

print(f"\nConfiguration:")
print(f"  V1 Data: {V1_DATA_PATH}")
print(f"  V2 Config: {V2_CONFIG_PATH}")
print(f"  Results: {RESULTS_PATH}")
print(f"  Using IDENTICAL test conditions from V2-8/V2-9")

In [None]:
def load_identical_training_data():
    """Load identical training data used in V2-8 and V2-9.
    
    CRITICAL: Must produce IDENTICAL data to V2-8/V2-9 for baseline validation.
    """
    
    print("Loading Identical Training Data (V2-8/V2-9 Reproduction)")
    print("=" * 57)
    
    # Load exact same data as V2-8/V2-9
    try:
        raw_data = pd.read_csv(V1_DATA_PATH / "train_data_raw.csv")
        print(f"✓ Raw training data loaded: {len(raw_data):,} samples")
    except FileNotFoundError:
        try:
            raw_data = pd.read_csv(V1_DATA_PATH / "granulation_data_raw.csv")
            print(f"✓ Raw granulation data loaded: {len(raw_data):,} samples")
        except FileNotFoundError:
            print("Raw data not found, generating from scaled data...")
            scaled_data = pd.read_csv(V1_DATA_PATH / "train_data.csv")
            scalers = joblib.load(V1_DATA_PATH / "scalers.joblib")
            
            raw_data = scaled_data.copy()
            for col in scaled_data.columns:
                if col in scalers:
                    scaler = scalers[col]
                    raw_data[col] = scaler.inverse_transform(scaled_data[[col]]).flatten()
            
            print(f"✓ Generated unscaled data from scaled data: {len(raw_data):,} samples")
    
    # Load V1 scalers and model
    scalers = joblib.load(V1_DATA_PATH / "scalers.joblib")
    print(f"✓ V1 scalers loaded: {list(scalers.keys())}")
    
    # Load model for testing (try V2 model first, fallback to V1)
    from V2.robust_mpc.models import load_trained_model
    
    v2_model_path = V2_MODEL_PATH / "best_model.pth"
    v1_model_path = V1_DATA_PATH / "best_predictor_model.pth"
    
    if v2_model_path.exists():
        model = load_trained_model(v2_model_path, device=DEVICE, validate=True)
        model_source = "V2"
        print(f"✓ V2 model loaded: {v2_model_path}")
    else:
        model = load_trained_model(v1_model_path, device=DEVICE, validate=True)
        model_source = "V1"
        print(f"✓ V1 model loaded as fallback: {v1_model_path}")
    
    # CRITICAL: Use IDENTICAL data segment as V2-8/V2-9 (indices 2000-2036)
    start_idx = 2000
    lookback = 36
    end_idx = start_idx + lookback  # 2036
    
    if len(raw_data) < end_idx:
        start_idx = len(raw_data) - lookback - 100
        end_idx = start_idx + lookback
    
    data_segment = raw_data.iloc[start_idx:end_idx].copy()
    
    print(f"\n✓ Extracted IDENTICAL data segment: indices {start_idx}-{end_idx}")
    print(f"  Shape: {data_segment.shape}")
    print(f"  Columns: {list(data_segment.columns)}")
    
    # Validate data ranges (should match V2-8/V2-9)
    print(f"\nData ranges (should match V2-8/V2-9 exactly):")
    for col in ['d50', 'lod', 'spray_rate', 'air_flow', 'carousel_speed']:
        if col in data_segment.columns:
            print(f"  {col}: [{data_segment[col].min():.1f}, {data_segment[col].max():.1f}]")
    
    return data_segment, scalers, model, model_source

# Load identical components for both V1 and V2 testing
comparison_data, comparison_scalers, comparison_model, model_source = load_identical_training_data()

print(f"\n🎯 Phase 1.1 Complete - Identical data loaded successfully")
print(f"   Model source: {model_source}")
print(f"   Data segment ready for controller comparison")