In [None]:
"""
Bayesian Optimization Pipeline - Phase 2
=========================================
Iterative Bayesian Optimization for Chemical Experiments
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
import json
import pickle
from datetime import datetime
from pathlib import Path
from dataclasses import dataclass
from typing import List, Dict, Optional, Tuple, Any, Union
from itertools import product

warnings.filterwarnings('ignore')

from sklearn.preprocessing import StandardScaler
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ConstantKernel, Matern, WhiteKernel
from sklearn.metrics import r2_score, mean_squared_error
from scipy.stats import norm
from scipy.optimize import minimize

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# Configuration
# ─────────────────────────────────────────────────────────────────────────────

@dataclass
class Phase2Config:
    """Phase 2 configuration parameters."""
    
    # Data
    response_column: str
    sheet_name: str = 'data'
    header_row: int = 5
    split_keyword: Optional[str] = "PREDICTED OPTIMUM RUNS"
    stop_feature: Optional[str] = "Batch ID"
    
    # Optimization
    maximize_response: bool = True
    n_suggestions: int = 5
    selection_strategy: str = 'diverse'  # 'diverse' hedges bets; 'greedy' exploits best region
    min_distance: float = 0.1  # Normalized distance; prevents clustered suggestions
    
    # Acquisition function
    # EI chosen over UCB: naturally balances exploration/exploitation without tuning
    exploration_weight: float = 0.01  # xi parameter; small value favors exploitation
    
    # Search space
    # 10% margin allows mild extrapolation beyond observed data
    bounds_margin: float = 0.1
    
    # Optimizer
    # Multi-start needed because acquisition function is multimodal
    n_optimizer_restarts: int = 25
    
    # Stopping
    # Patience-based: simple, interpretable, works well empirically
    patience: int = 3
    min_improvement: float = 0.01  # 1% relative improvement threshold
    
    # Duplicate prevention
    # Prevents suggesting experiments too similar to existing ones
    duplicate_threshold: float = 0.05
    
    # Output
    output_dir: str = 'bo_phase2_output'
    fresh_start: bool = False

In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# DataLoader (Same as Phase 1)
# ─────────────────────────────────────────────────────────────────────────────

class DataLoader:
    """Handles data loading and feature classification."""
    
    def load_excel(self, file_path: str, sheet_name: str, header_row: int) -> pd.DataFrame:
        xls = pd.ExcelFile(file_path, engine='openpyxl')
        return pd.read_excel(xls, sheet_name=sheet_name, header=header_row)
    
    def split_at_keyword(self, df: pd.DataFrame, keyword: str, 
                         column: str = 'Run') -> Tuple[pd.DataFrame, pd.DataFrame]:
        split_index = df.index[df[column] == keyword].tolist()
        if split_index:
            idx = split_index[0]
            return df.iloc[:idx], df.iloc[idx+1:]
        return df.copy(), pd.DataFrame()
    
    def clean_data(self, df: pd.DataFrame) -> pd.DataFrame:
        df = df.drop(index=0, errors='ignore')
        df = df.dropna(how='all')
        return df.reset_index(drop=True)
    
    def classify_features(self, df: pd.DataFrame, feature_cols: List[str]
                         ) -> Tuple[List[str], List[str], Dict]:
        """Classify as binary (2 unique values) or continuous."""
        numeric_features = df[feature_cols].select_dtypes(include=[np.number]).columns.tolist()
        
        binary_cols, continuous_cols, binary_mappings = [], [], {}
        
        for col in numeric_features:
            if df[col].nunique() == 2:
                binary_cols.append(col)
                unique_vals = sorted(df[col].dropna().unique())
                mapping = {unique_vals[0]: 0, unique_vals[1]: 1}
                df[col] = df[col].map(mapping)
                binary_mappings[col] = mapping
            else:
                continuous_cols.append(col)
        
        return binary_cols, continuous_cols, binary_mappings
    
    def get_feature_columns(self, df: pd.DataFrame, stop_feature: Optional[str], 
                           response_column: str) -> List[str]:
        columns = df.columns.tolist()
        
        if stop_feature and stop_feature in columns:
            feature_list = columns[:columns.index(stop_feature)]
        else:
            feature_list = [c for c in columns if c != response_column]
        
        # Remove index-like columns
        return [f for f in feature_list if f.lower() not in ['run', 'index', 'unnamed: 0']]

In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# SearchSpace
# ─────────────────────────────────────────────────────────────────────────────

class SearchSpace:
    """
    Manages bounds and binary enumeration.
    
    Binary features are enumerated (not treated as continuous) because:
    - GPs struggle with discontinuous functions
    - Categorical boundaries are sharp in chemistry (catalyst A vs B)
    - Enumeration is tractable for typical 1-2 binary features
    """
    
    def __init__(self, X: pd.DataFrame, binary_features: List[str], 
                 continuous_features: List[str], bounds_margin: float = 0.1):
        self.binary_features = binary_features
        self.continuous_features = continuous_features
        
        self._bounds = {}
        self._binary_combinations = []
        
        # Compute continuous bounds with margin for extrapolation
        for col in continuous_features:
            col_min, col_max = X[col].min(), X[col].max()
            margin = (col_max - col_min) * bounds_margin
            self._bounds[col] = {
                'min': col_min - margin, 'max': col_max + margin,
                'observed_min': col_min, 'observed_max': col_max, 'type': 'continuous'
            }
        
        for col in binary_features:
            self._bounds[col] = {'min': 0, 'max': 1, 'observed_min': 0, 
                                 'observed_max': 1, 'type': 'binary'}
        
        # Enumerate all binary combinations (2^n, typically n <= 2)
        if binary_features:
            values = list(product([0, 1], repeat=len(binary_features)))
            self._binary_combinations = [dict(zip(binary_features, combo)) for combo in values]
        else:
            self._binary_combinations = [{}]
    
    def get_continuous_bounds(self) -> List[Tuple[float, float]]:
        return [(self._bounds[col]['min'], self._bounds[col]['max']) 
                for col in self.continuous_features]
    
    def get_bounds_df(self) -> pd.DataFrame:
        rows = [{'feature': col, **bounds} for col, bounds in self._bounds.items()]
        return pd.DataFrame(rows)
    
    @property
    def bounds(self) -> Dict:
        return self._bounds
    
    @property
    def binary_combinations(self) -> List[Dict]:
        return self._binary_combinations
    
    @property
    def n_binary_combinations(self) -> int:
        return len(self._binary_combinations)

In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# SurrogateModel
# ─────────────────────────────────────────────────────────────────────────────

class SurrogateModel:
    """
    Gaussian Process surrogate model.
    
    Design choices:
    - Matern 5/2 kernel: twice differentiable, handles non-smooth responses
      better than RBF while remaining smooth enough for optimization
    - StandardScaler on X and y: improves numerical stability and 
      makes length scale interpretation consistent across features
    - WhiteKernel: explicitly models observation noise
    """
    
    def __init__(self):
        self.model = None
        self.scaler_X = StandardScaler()
        self.scaler_y = StandardScaler()
        self._is_fitted = False
    
    def fit(self, X: np.ndarray, y: np.ndarray):
        X_scaled = self.scaler_X.fit_transform(X)
        y_scaled = self.scaler_y.fit_transform(y.reshape(-1, 1)).ravel()
        
        # Matern 5/2: good default for physical processes
        kernel = (
            ConstantKernel(1.0, (1e-3, 1e3)) * 
            Matern(length_scale=np.ones(X.shape[1]), 
                   length_scale_bounds=(1e-3, 1e3), nu=2.5) +
            WhiteKernel(noise_level=0.1, noise_level_bounds=(1e-5, 1e1))
        )
        
        self.model = GaussianProcessRegressor(
            kernel=kernel, n_restarts_optimizer=10,
            normalize_y=False, random_state=RANDOM_STATE
        )
        self.model.fit(X_scaled, y_scaled)
        self._is_fitted = True
        return self
    
    def predict(self, X: np.ndarray, return_std: bool = True):
        X_scaled = self.scaler_X.transform(X)
        
        if return_std:
            mu_scaled, sigma_scaled = self.model.predict(X_scaled, return_std=True)
            mu = self.scaler_y.inverse_transform(mu_scaled.reshape(-1, 1)).ravel()
            sigma = sigma_scaled * self.scaler_y.scale_[0]
            return mu, sigma
        else:
            mu_scaled = self.model.predict(X_scaled)
            return self.scaler_y.inverse_transform(mu_scaled.reshape(-1, 1)).ravel()
    
    def score(self, X: np.ndarray, y: np.ndarray) -> Dict[str, float]:
        y_pred = self.predict(X, return_std=False)
        return {
            'r2': r2_score(y, y_pred),
            'rmse': np.sqrt(mean_squared_error(y, y_pred))
        }
    
    @property
    def is_fitted(self) -> bool:
        return self._is_fitted

In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# AcquisitionOptimizer
# ─────────────────────────────────────────────────────────────────────────────

class AcquisitionOptimizer:
    """
    Optimizes Expected Improvement acquisition function.
    
    EI chosen because:
    - Balances exploration/exploitation without explicit weighting
    - Well-understood theoretical properties
    - Works well empirically for chemical optimization
    """
    
    def __init__(self, surrogate: SurrogateModel, search_space: SearchSpace,
                 config: Phase2Config):
        self.surrogate = surrogate
        self.search_space = search_space
        self.config = config
        self._best_y = None
    
    def set_best_y(self, best_y: float):
        self._best_y = best_y
    
    def expected_improvement(self, X: np.ndarray, xi: float = 0.01) -> np.ndarray:
        """
        EI(x) = (mu - f_best - xi) * Phi(Z) + sigma * phi(Z)
        
        xi > 0 encourages exploration; xi ≈ 0 favors exploitation
        """
        mu, sigma = self.surrogate.predict(X, return_std=True)
        sigma = np.maximum(sigma, 1e-9)
        
        if self.config.maximize_response:
            improvement = mu - self._best_y - xi
        else:
            improvement = self._best_y - mu - xi
        
        Z = improvement / sigma
        ei = improvement * norm.cdf(Z) + sigma * norm.pdf(Z)
        ei[sigma < 1e-9] = 0.0
        return ei
    
    def _construct_full_x(self, continuous_x: np.ndarray, binary_values: Dict) -> np.ndarray:
        """Combine continuous values with fixed binary values."""
        full_x = []
        cont_idx = 0
        for col in self.search_space.continuous_features + self.search_space.binary_features:
            if col in self.search_space.binary_features:
                full_x.append(binary_values[col])
            else:
                full_x.append(continuous_x[cont_idx])
                cont_idx += 1
        return np.array(full_x)
    
    def _negative_ei(self, x: np.ndarray, binary_values: Dict) -> float:
        full_x = self._construct_full_x(x, binary_values)
        ei = self.expected_improvement(full_x.reshape(1, -1), xi=self.config.exploration_weight)
        return -ei[0]
    
    def optimize_single(self, binary_values: Dict, 
                        existing_points: Optional[np.ndarray] = None) -> Tuple[np.ndarray, float]:
        """Find best point for given binary combination using multi-start L-BFGS-B."""
        bounds = self.search_space.get_continuous_bounds()
        
        if not bounds:
            full_x = self._construct_full_x(np.array([]), binary_values)
            ei = self.expected_improvement(full_x.reshape(1, -1), xi=self.config.exploration_weight)[0]
            return full_x, ei
        
        best_x, best_ei = None, -np.inf
        
        # Multi-start: acquisition function is often multimodal
        for _ in range(self.config.n_optimizer_restarts):
            x0 = np.array([np.random.uniform(b[0], b[1]) for b in bounds])
            
            try:
                result = minimize(
                    lambda x: self._negative_ei(x, binary_values),
                    x0, method='L-BFGS-B', bounds=bounds
                )
                
                if -result.fun > best_ei:
                    candidate_x = self._construct_full_x(result.x, binary_values)
                    
                    # Enforce diversity constraint
                    if existing_points is not None and len(existing_points) > 0:
                        min_dist = np.min(np.linalg.norm(existing_points - candidate_x, axis=1))
                        if min_dist < self.config.min_distance:
                            continue
                    
                    best_x, best_ei = candidate_x, -result.fun
            except Exception:
                continue
        
        return best_x, best_ei
    
    def find_next_points(self, n_points: int, existing_X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        """Find n_points suggestions using configured strategy."""
        all_points = existing_X.copy() if len(existing_X) > 0 else None
        
        if self.config.selection_strategy == 'greedy':
            return self._greedy_selection(n_points, all_points)
        else:
            return self._diverse_selection(n_points, all_points)
    
    def _greedy_selection(self, n_points: int, existing_points: np.ndarray) -> Tuple[List, List]:
        """Select top n points by EI, regardless of spread."""
        candidates = []
        
        for binary_combo in self.search_space.binary_combinations:
            best_x, best_ei = self.optimize_single(binary_combo, existing_points)
            if best_x is not None:
                candidates.append((best_x, best_ei))
        
        candidates.sort(key=lambda x: x[1], reverse=True)
        
        suggestions, ei_values = [], []
        for x, ei in candidates[:n_points]:
            if existing_points is not None:
                min_dist = np.min(np.linalg.norm(existing_points - x, axis=1))
                if min_dist < self.config.duplicate_threshold:
                    continue
            suggestions.append(x)
            ei_values.append(ei)
        
        return np.array(suggestions), np.array(ei_values)
    
    def _diverse_selection(self, n_points: int, existing_points: np.ndarray) -> Tuple[List, List]:
        """Distribute suggestions across binary combinations and enforce spacing."""
        suggestions, ei_values = [], []
        points_to_avoid = existing_points.copy() if existing_points is not None and len(existing_points) > 0 else None
        
        n_combos = self.search_space.n_binary_combinations
        combo_counts = {i: 0 for i in range(n_combos)}
        
        for _ in range(n_points):
            # Round-robin across binary combinations
            combo_idx = min(combo_counts, key=combo_counts.get)
            binary_combo = self.search_space.binary_combinations[combo_idx]
            
            best_x, best_ei = self.optimize_single(binary_combo, points_to_avoid)
            
            if best_x is not None and best_ei > 0:
                suggestions.append(best_x)
                ei_values.append(best_ei)
                combo_counts[combo_idx] += 1
                
                # Add to avoidance set for diversity
                if points_to_avoid is not None:
                    points_to_avoid = np.vstack([points_to_avoid, best_x])
                else:
                    points_to_avoid = best_x.reshape(1, -1)
            else:
                combo_counts[combo_idx] = float('inf')
        
        return np.array(suggestions), np.array(ei_values)

In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# ConvergenceTracker
# ─────────────────────────────────────────────────────────────────────────────

class ConvergenceTracker:
    """
    Tracks optimization progress with patience-based stopping.
    
    Patience approach chosen over fixed iterations because:
    - Adapts to problem difficulty
    - Clear interpretation for experimenters
    - Avoids premature stopping or wasted experiments
    """
    
    def __init__(self, config: Phase2Config):
        self.config = config
        self.history = []
        self.no_improvement_count = 0
    
    def load_history(self, history: List[Dict]):
        self.history = history
        self._update_no_improvement_count()
    
    def _update_no_improvement_count(self):
        if len(self.history) < 2:
            self.no_improvement_count = 0
            return
        
        count = 0
        best_so_far = self.history[0]['best_response']
        
        for record in self.history[1:]:
            if self.config.maximize_response:
                improved = record['best_response'] > best_so_far * (1 + self.config.min_improvement)
            else:
                improved = record['best_response'] < best_so_far * (1 - self.config.min_improvement)
            
            if improved:
                best_so_far = record['best_response']
                count = 0
            else:
                count += 1
        
        self.no_improvement_count = count
    
    def record_iteration(self, iteration: int, best_response: float, 
                         n_experiments: int, suggestions: pd.DataFrame):
        record = {
            'iteration': iteration,
            'timestamp': datetime.now().isoformat(),
            'best_response': best_response,
            'n_experiments': n_experiments,
            'n_suggestions': len(suggestions)
        }
        
        if len(self.history) > 0:
            prev_best = self.history[-1]['best_response']
            if self.config.maximize_response:
                improvement = (best_response - prev_best) / abs(prev_best) if prev_best != 0 else 0
                improved = improvement > self.config.min_improvement
            else:
                improvement = (prev_best - best_response) / abs(prev_best) if prev_best != 0 else 0
                improved = improvement > self.config.min_improvement
            
            record['improvement'] = improvement
            record['improved'] = improved
            
            self.no_improvement_count = 0 if improved else self.no_improvement_count + 1
        
        self.history.append(record)
    
    @property
    def should_stop(self) -> bool:
        return self.no_improvement_count >= self.config.patience
    
    @property
    def current_iteration(self) -> int:
        return len(self.history)
    
    @property
    def best_response(self) -> Optional[float]:
        if not self.history:
            return None
        if self.config.maximize_response:
            return max(h['best_response'] for h in self.history)
        return min(h['best_response'] for h in self.history)
    
    def get_history_df(self) -> pd.DataFrame:
        return pd.DataFrame(self.history)


In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# Checkpoint Manager
# ─────────────────────────────────────────────────────────────────────────────

class Phase2Checkpoint:
    """Persists state between iterations (days/weeks apart in practice)."""
    
    def __init__(self, output_dir: str):
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(exist_ok=True)
        self.checkpoint_file = self.output_dir / 'phase2_checkpoint.pkl'
        self.json_file = self.output_dir / 'phase2_checkpoint.json'
    
    def save(self, state: Dict[str, Any]):
        with open(self.checkpoint_file, 'wb') as f:
            pickle.dump(state, f)
        
        # Human-readable summary
        json_state = {k: v for k, v in state.items() 
                      if k not in ['history']}  # Exclude large objects
        with open(self.json_file, 'w') as f:
            json.dump(json_state, f, indent=2, default=str)
    
    def load(self) -> Optional[Dict[str, Any]]:
        if not self.checkpoint_file.exists():
            return None
        with open(self.checkpoint_file, 'rb') as f:
            return pickle.load(f)
    
    def exists(self) -> bool:
        return self.checkpoint_file.exists()
    
    def clear(self):
        if self.checkpoint_file.exists():
            self.checkpoint_file.unlink()
        if self.json_file.exists():
            self.json_file.unlink()



In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# Plotter
# ─────────────────────────────────────────────────────────────────────────────

class Phase2Plotter:
    """Generates diagnostic visualizations."""
    
    def __init__(self, output_dir: str):
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(exist_ok=True)
    
    def plot_convergence(self, tracker: ConvergenceTracker, config: Phase2Config):
        history_df = tracker.get_history_df()
        if len(history_df) < 1:
            return
        
        fig, ax = plt.subplots(figsize=(8, 5))
        ax.plot(history_df['iteration'], history_df['best_response'], 'bo-', linewidth=2, markersize=8)
        ax.set_xlabel('Iteration')
        ax.set_ylabel('Best Response')
        ax.set_title(f'Optimization Progress ({"Max" if config.maximize_response else "Min"})')
        ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig(self.output_dir / 'convergence_plot.png', dpi=150, bbox_inches='tight')
        plt.show()
    
    def plot_gp_1d(self, surrogate: SurrogateModel, X: np.ndarray, y: np.ndarray,
                   feature_names: List[str], feature_idx: int, search_space: SearchSpace):
        """1D slice through GP (other features at mean)."""
        feature_name = feature_names[feature_idx]
        if feature_name not in search_space.continuous_features:
            return
        
        fig, ax = plt.subplots(figsize=(8, 5))
        
        n_test = 100
        X_mean = X.mean(axis=0)
        bounds = search_space.bounds[feature_name]
        test_values = np.linspace(bounds['min'], bounds['max'], n_test)
        
        X_test = np.tile(X_mean, (n_test, 1))
        X_test[:, feature_idx] = test_values
        
        mu, sigma = surrogate.predict(X_test, return_std=True)
        
        ax.fill_between(test_values, mu - 2*sigma, mu + 2*sigma, alpha=0.3, label='95% CI')
        ax.plot(test_values, mu, 'b-', linewidth=2, label='GP Mean')
        ax.scatter(X[:, feature_idx], y, c='red', s=50, zorder=5, edgecolors='black', label='Observed')
        ax.set_xlabel(feature_name)
        ax.set_ylabel('Response')
        ax.legend()
        ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig(self.output_dir / f'gp_1d_{feature_name}.png', dpi=150, bbox_inches='tight')
        plt.show()
    
    def plot_gp_2d(self, surrogate: SurrogateModel, X: np.ndarray, y: np.ndarray,
                   feature_names: List[str], idx1: int, idx2: int, search_space: SearchSpace):
        """2D contour of GP surface."""
        feat_1, feat_2 = feature_names[idx1], feature_names[idx2]
        
        if feat_1 not in search_space.continuous_features or \
           feat_2 not in search_space.continuous_features:
            return
        
        fig, axes = plt.subplots(1, 2, figsize=(12, 5))
        
        n_grid = 50
        x1_range = np.linspace(search_space.bounds[feat_1]['min'], 
                               search_space.bounds[feat_1]['max'], n_grid)
        x2_range = np.linspace(search_space.bounds[feat_2]['min'], 
                               search_space.bounds[feat_2]['max'], n_grid)
        X1, X2 = np.meshgrid(x1_range, x2_range)
        
        X_mean = X.mean(axis=0)
        X_test = np.tile(X_mean, (n_grid * n_grid, 1))
        X_test[:, idx1] = X1.ravel()
        X_test[:, idx2] = X2.ravel()
        
        mu, sigma = surrogate.predict(X_test, return_std=True)
        Mu = mu.reshape(n_grid, n_grid)
        Sigma = sigma.reshape(n_grid, n_grid)
        
        # Mean surface
        cs1 = axes[0].contourf(X1, X2, Mu, levels=20, cmap='viridis')
        axes[0].scatter(X[:, idx1], X[:, idx2], c='red', s=50, edgecolors='white')
        axes[0].set_xlabel(feat_1)
        axes[0].set_ylabel(feat_2)
        axes[0].set_title('GP Mean')
        plt.colorbar(cs1, ax=axes[0])
        
        # Uncertainty surface
        cs2 = axes[1].contourf(X1, X2, Sigma, levels=20, cmap='YlOrRd')
        axes[1].scatter(X[:, idx1], X[:, idx2], c='blue', s=50, edgecolors='white')
        axes[1].set_xlabel(feat_1)
        axes[1].set_ylabel(feat_2)
        axes[1].set_title('GP Uncertainty')
        plt.colorbar(cs2, ax=axes[1])
        
        plt.tight_layout()
        plt.savefig(self.output_dir / f'gp_2d_{feat_1}_{feat_2}.png', dpi=150, bbox_inches='tight')
        plt.show()

In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# ExperimentSuggester
# ─────────────────────────────────────────────────────────────────────────────

class ExperimentSuggester:
    """Formats and exports experiment suggestions."""
    
    def __init__(self, search_space: SearchSpace, config: Phase2Config):
        self.search_space = search_space
        self.config = config
    
    def format_suggestions(self, suggested_X: np.ndarray, ei_values: np.ndarray,
                           feature_names: List[str], surrogate: SurrogateModel,
                           binary_mappings: Dict) -> pd.DataFrame:
        if len(suggested_X) == 0:
            return pd.DataFrame()
        
        mu, sigma = surrogate.predict(suggested_X, return_std=True)
        
        df = pd.DataFrame(suggested_X, columns=feature_names)
        
        # Reverse binary mappings to original values
        for col in self.search_space.binary_features:
            if col in binary_mappings:
                reverse_map = {v: k for k, v in binary_mappings[col].items()}
                df[col] = df[col].round().astype(int).map(reverse_map)
            else:
                df[col] = df[col].round().astype(int)
        
        # Round continuous features
        for col in self.search_space.continuous_features:
            df[col] = df[col].round(4)
        
        df['predicted_response'] = mu.round(4)
        df['uncertainty'] = sigma.round(4)
        df['acquisition_value'] = ei_values.round(6)
        
        df = df.sort_values('acquisition_value', ascending=False).reset_index(drop=True)
        df.insert(0, 'rank', range(1, len(df) + 1))
        
        return df
    
    def export_csv(self, suggestions_df: pd.DataFrame, iteration: int) -> str:
        output_path = Path(self.config.output_dir)
        output_path.mkdir(exist_ok=True)
        filename = output_path / f'suggestions_iter{iteration}.csv'
        suggestions_df.to_csv(filename, index=False)
        return str(filename)

In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# Results Container
# ─────────────────────────────────────────────────────────────────────────────

@dataclass
class Phase2Results:
    """Container for pipeline outputs."""
    iteration: int
    suggestions: pd.DataFrame
    current_best: float
    previous_best: Optional[float]
    improved: bool
    should_continue: bool
    no_improvement_count: int
    model_metrics: Dict[str, float]
    suggestions_file: str


In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# Main Pipeline
# ─────────────────────────────────────────────────────────────────────────────

class Phase2Pipeline:
    """
    Bayesian Optimization pipeline for chemical experiments.
    
    Workflow:
    1. Load experimental data from Excel
    2. Fit GP surrogate model
    3. Optimize EI acquisition to suggest next experiments
    4. Save checkpoint for next iteration
    5. Repeat after lab runs suggested experiments
    """
    
    def __init__(self, config: Phase2Config):
        self.config = config
        self._output_dir = Path(config.output_dir)
        self._output_dir.mkdir(exist_ok=True)
        
        self._loader = DataLoader()
        self._checkpoint_mgr = Phase2Checkpoint(config.output_dir)
        self._plotter = Phase2Plotter(config.output_dir)
        self._tracker = ConvergenceTracker(config)
        
        self._X = None
        self._y = None
        self._feature_names = []
        self._binary_features = []
        self._continuous_features = []
        self._binary_mappings = {}
        self._search_space = None
        self._surrogate = None
        self._iteration = 0
    
    def run(self, data_file: str) -> Phase2Results:
        """Run one BO iteration."""
        print("=" * 60)
        print("Phase 2: Bayesian Optimization")
        print("=" * 60)
        
        # Load checkpoint if continuing
        previous_best = None
        if not self.config.fresh_start and self._checkpoint_mgr.exists():
            self._load_checkpoint()
            previous_best = self._tracker.best_response
            print(f"  Loaded checkpoint (iteration {self._iteration})")
        
        self._load_data(data_file)
        self._iteration += 1
        
        print(f"\n  Iteration: {self._iteration}")
        print(f"  Experiments: {len(self._X)}")
        
        # Build search space
        self._search_space = SearchSpace(
            self._X, self._binary_features, self._continuous_features,
            self.config.bounds_margin
        )
        
        # Fit surrogate
        self._fit_surrogate()
        
        # Find current best
        if self.config.maximize_response:
            current_best = self._y.max()
        else:
            current_best = self._y.min()
        print(f"  Current best: {current_best:.4f}")
        
        # Generate suggestions
        suggestions_df = self._generate_suggestions(current_best)
        
        # Export
        suggester = ExperimentSuggester(self._search_space, self.config)
        suggestions_file = suggester.export_csv(suggestions_df, self._iteration)
        print(f"  Suggestions saved: {suggestions_file}")
        
        # Update tracker
        self._tracker.record_iteration(self._iteration, current_best, len(self._X), suggestions_df)
        
        # Check improvement
        improved = False
        if previous_best is not None:
            if self.config.maximize_response:
                improved = current_best > previous_best * (1 + self.config.min_improvement)
            else:
                improved = current_best < previous_best * (1 - self.config.min_improvement)
        
        # Save checkpoint
        self._save_checkpoint(current_best)
        
        # Generate plots
        self._generate_plots()
        
        # Print status
        self._print_status(current_best, previous_best, improved)
        
        return Phase2Results(
            iteration=self._iteration,
            suggestions=suggestions_df,
            current_best=current_best,
            previous_best=previous_best,
            improved=improved,
            should_continue=not self._tracker.should_stop,
            no_improvement_count=self._tracker.no_improvement_count,
            model_metrics=self._surrogate.score(self._X.values, self._y.values),
            suggestions_file=suggestions_file
        )
    
    def _load_data(self, data_file: str):
        print(f"\n  Loading: {data_file}")
        
        df = self._loader.load_excel(data_file, self.config.sheet_name, self.config.header_row)
        
        if self.config.split_keyword:
            df, _ = self._loader.split_at_keyword(df, self.config.split_keyword)
        
        df = self._loader.clean_data(df)
        
        feature_cols = self._loader.get_feature_columns(
            df, self.config.stop_feature, self.config.response_column
        )
        
        self._binary_features, self._continuous_features, self._binary_mappings = \
            self._loader.classify_features(df, feature_cols)
        
        self._feature_names = self._continuous_features + self._binary_features
        self._X = df[self._feature_names].copy()
        self._y = df[self.config.response_column].copy()
        
        # Drop missing response
        valid = ~self._y.isnull()
        self._X = self._X[valid].reset_index(drop=True)
        self._y = self._y[valid].reset_index(drop=True)
        
        print(f"  Features: {len(self._feature_names)} "
              f"({len(self._continuous_features)} cont, {len(self._binary_features)} bin)")
    
    def _fit_surrogate(self):
        print("\n  Fitting GP model...")
        self._surrogate = SurrogateModel()
        self._surrogate.fit(self._X.values, self._y.values)
        metrics = self._surrogate.score(self._X.values, self._y.values)
        print(f"  R²: {metrics['r2']:.4f}, RMSE: {metrics['rmse']:.4f}")
    
    def _generate_suggestions(self, current_best: float) -> pd.DataFrame:
        print(f"\n  Generating {self.config.n_suggestions} suggestions ({self.config.selection_strategy})...")
        
        optimizer = AcquisitionOptimizer(self._surrogate, self._search_space, self.config)
        optimizer.set_best_y(current_best)
        
        suggested_X, ei_values = optimizer.find_next_points(
            self.config.n_suggestions, self._X.values
        )
        
        suggester = ExperimentSuggester(self._search_space, self.config)
        return suggester.format_suggestions(
            suggested_X, ei_values, self._feature_names,
            self._surrogate, self._binary_mappings
        )
    
    def _save_checkpoint(self, current_best: float):
        state = {
            'iteration': self._iteration,
            'timestamp': datetime.now().isoformat(),
            'n_experiments': len(self._X),
            'best_response': current_best,
            'no_improvement_count': self._tracker.no_improvement_count,
            'should_stop': self._tracker.should_stop,
            'features': self._feature_names,
            'binary_features': self._binary_features,
            'continuous_features': self._continuous_features,
            'binary_mappings': self._binary_mappings,
            'history': self._tracker.history
        }
        self._checkpoint_mgr.save(state)
    
    def _load_checkpoint(self):
        state = self._checkpoint_mgr.load()
        if state:
            self._iteration = state['iteration']
            self._tracker.load_history(state.get('history', []))
    
    def _generate_plots(self):
        print("\n  Generating plots...")
        
        if len(self._tracker.history) > 0:
            self._plotter.plot_convergence(self._tracker, self.config)
        
        # 1D plots for first 3 continuous features
        for i, feat in enumerate(self._continuous_features[:3]):
            feat_idx = self._feature_names.index(feat)
            self._plotter.plot_gp_1d(
                self._surrogate, self._X.values, self._y.values,
                self._feature_names, feat_idx, self._search_space
            )
        
        # 2D plot if 2+ continuous features
        if len(self._continuous_features) >= 2:
            idx1 = self._feature_names.index(self._continuous_features[0])
            idx2 = self._feature_names.index(self._continuous_features[1])
            self._plotter.plot_gp_2d(
                self._surrogate, self._X.values, self._y.values,
                self._feature_names, idx1, idx2, self._search_space
            )
    
    def _print_status(self, current_best: float, previous_best: Optional[float], improved: bool):
        print("\n" + "-" * 60)
        print("Status")
        print("-" * 60)
        
        if previous_best is not None:
            arrow = "↑" if improved else "→"
            print(f"  Best: {previous_best:.4f} {arrow} {current_best:.4f}")
            print(f"  Improved: {'Yes' if improved else 'No'}")
        else:
            print(f"  Best: {current_best:.4f} (first iteration)")
        
        print(f"  No improvement: {self._tracker.no_improvement_count}/{self.config.patience}")
        
        if self._tracker.should_stop:
            print("\n  ⚠ STOPPING CRITERION MET")
        else:
            print("\n  ✓ Continue with suggested experiments")
        print("-" * 60)
    
    def reset(self):
        """Clear checkpoint and start fresh."""
        self._checkpoint_mgr.clear()
        self._tracker = ConvergenceTracker(self.config)
        self._iteration = 0
        print("  Pipeline reset.")
    
    @property
    def iteration(self) -> int:
        return self._iteration
    
    @property
    def history(self) -> pd.DataFrame:
        return self._tracker.get_history_df()
    
    @property
    def bounds(self) -> pd.DataFrame:
        if self._search_space:
            return self._search_space.get_bounds_df()
        return pd.DataFrame()


In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# Convenience Function
# ─────────────────────────────────────────────────────────────────────────────

def run_bo_iteration(data_file: str, response_column: str, 
                     n_suggestions: int = 5, maximize: bool = True,
                     strategy: str = 'diverse', output_dir: str = 'bo_results',
                     **kwargs) -> Phase2Results:
    """Quick single-iteration BO run."""
    config = Phase2Config(
        response_column=response_column,
        n_suggestions=n_suggestions,
        maximize_response=maximize,
        selection_strategy=strategy,
        output_dir=output_dir,
        **kwargs
    )
    return Phase2Pipeline(config).run(data_file)

In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# Example Usage
# ─────────────────────────────────────────────────────────────────────────────

config = Phase2Config(
    response_column='Yield',
    sheet_name='data',
    header_row=5,
    stop_feature='Batch ID',
    maximize_response=True,
    n_suggestions=5,
    selection_strategy='diverse',
    patience=3,
    output_dir='bo_phase2_output'
)

pipeline = Phase2Pipeline(config)
results = pipeline.run('experiments.xlsx')

print("\nSuggested Experiments:")
print(results.suggestions.to_string(index=False))
print(f"\nContinue: {results.should_continue}")