In [1]:
%matplotlib ipympl
%load_ext autoreload
%autoreload 2
import os
import sys
import ipywidgets as widgets
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import display, Markdown, HTML
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sys.path.append(os.path.dirname(os.getcwd()))
from api_calls import get_ids_in_batch, get_sample_description, get_all_eqe as get_all_ijp
import batch_selection
import access_token

url_base ="https://nomad-hzb-se.de"
url = f"{url_base}/nomad-oasis/api/v1"
token = access_token.get_token(url)
access_token.log_notebook_usage()

In [2]:
def get_ijp_data(try_sample_ids, variation):
    print(f"Fetching data for {len(try_sample_ids)} samples")
    
    # Make API call, result has everything in json format
    all_ijp = get_all_ijp(url, token, try_sample_ids, eqe_type="HySprint_Inkjet_Printing")
    # Make API call, result has everything in json format
    all_jv = get_all_ijp(url, token, try_sample_ids, eqe_type="HySprint_JVmeasurement")
    
    # Check if there's any IJP data
    existing_sample_ids = list(all_ijp.keys())
    if len(existing_sample_ids) == 0:
        return None  # Return None value to indicate no data
    
    # List to hold all dataframes that will be concatenated
    sample_data_list = []
    
    # Process each sample's data
    for sample_id, sample_entries in all_ijp.items():
        print(sample_id)
        if len(sample_entries) > 1:
            assert "Multiple entries found for sample_id: {}".format(sample_id)
        for entry in sample_entries:
            # Extract the data part from the entry (index 0 is data, index 1 is metadata)
            ijp_data = entry[0]
            
            # Create a dictionary to hold flattened data
            row_data = {
                # Basic sample information
                'sample_id': sample_id,
                'variation': variation.get(sample_id, ''),
                'name': ijp_data.get('name', ''),
                'datetime': ijp_data.get('datetime', ''),
                'description': ijp_data.get('description', ''),
                'location': ijp_data.get('location', ''),
                
                # Annealing information
                'annealing_temperature': ijp_data.get('annealing', {}).get('temperature', None),
                'annealing_time': ijp_data.get('annealing', {}).get('time', None),
                'annealing_atmosphere': ijp_data.get('annealing', {}).get('atmosphere', ''),
                
                # Atmosphere information
                'relative_humidity': ijp_data.get('atmosphere', {}).get('relative_humidity', None),
            }
            
            # Extract printing properties
            properties = ijp_data.get('properties', {})
            row_data.update({
                'cartridge_pressure': properties.get('cartridge_pressure', None),
                'drop_density': properties.get('drop_density', None),
                'printed_area': properties.get('printed_area', None),
                'substrate_temperature': properties.get('substrate_temperature', None),
            })
            
            # Extract print head properties
            print_head = properties.get('print_head_properties', {})
            row_data.update({
                'print_head_name': print_head.get('print_head_name', ''),
                'print_head_temperature': print_head.get('print_head_temperature', None),
                'num_active_nozzles': print_head.get('number_of_active_print_nozzles', None),
                'nozzle_drop_frequency': print_head.get('print_nozzle_drop_frequency', None),
                'nozzle_drop_volume': print_head.get('print_nozzle_drop_volume', None),
            })
            
            # Extract quenching information
            quenching = ijp_data.get('quenching', {})
            if quenching:
                # Extract vacuum properties
                vacuum_props = quenching.get('vacuum_properties', {})
                if vacuum_props:
                    row_data.update({
                        'vacuum_pressure': vacuum_props.get('pressure', None),
                        'vacuum_start_time': vacuum_props.get('start_time', None),
                        'vacuum_duration': vacuum_props.get('duration', None),
                        'vacuum_temperature': vacuum_props.get('temperature', None),
                    })
                
                # Extract gas quenching properties
                gas_props = quenching.get('gas_quenching_properties', {})
                if gas_props:
                    row_data.update({
                        'quenching_gas': gas_props.get('gas', ''),
                        'quenching_duration': gas_props.get('duration', None),
                        'quenching_pressure': gas_props.get('pressure', None),
                    })
                    
                # Extract any additional quenching fields at the top level
                row_data.update({
                    'quenching_comment': quenching.get('comment', ''),
                    'quenching_type': quenching.get('m_def', '').split('.')[-1] if 'm_def' in quenching else '',
                })
            
            # Extract layer information
            if 'layer' in ijp_data and len(ijp_data['layer']) > 0:
                layer = ijp_data['layer'][0]  # Take first layer as example
                if "absorber" not in layer.get('layer_type', '').lower():
                    continue  # Skip if layer material is not an absorber
                # Update row_data with layer information
                row_data.update({
                    'layer_material': layer.get('layer_material', ''),
                    'layer_material_name': layer.get('layer_material_name', ''),
                    'layer_type': layer.get('layer_type', '')
                })
            
            # Extract solution information
            if 'solution' in ijp_data and len(ijp_data['solution']) > 0:
                solution = ijp_data['solution'][0]  # Take first solution
                solution_details = solution.get('solution_details', {})
                
                # Extract solvent information
                solvents = solution_details.get('solvent', [])
                for i, solvent in enumerate(solvents): 
                    
                    solvent_name = solvent["chemical_2"]["name"] if "chemical_2" in solvent else f'solvent{i+1}'
                    # Include name directly in column name instead of separate column
                    row_data.update({
                        f'solvent_amount_{solvent_name}': solvent.get('amount_relative', None),
                        f'solvent_volume_{solvent_name}': solvent.get('chemical_volume', None)
                    })
                
                # Extract solute information
                solutes = solution_details.get('solute', [])
                for i, solute in enumerate(solutes): 
                    if 'name' in solute:  # Only process if name exists
                        solute_name = solute["chemical_2"]["name"] if "chemical_2" in solute else f'solute{i+1}'
                        # Include name directly in column name instead of separate column
                        row_data.update({
                            f'solute_concentration_{solute_name}': solute.get('concentration_mol', None)
                        })
            if not all_jv.get(sample_id):
                continue
            for jv in all_jv[sample_id]:
                jv = jv[0]  # Get the data part of the JV measurement
                if "efficiency" not in row_data.keys():
                    # Initialize efficiency and other JV parameters if not present
                    row_data['efficiency'] = []
                    row_data['open_circuit_voltage'] = []
                    row_data['fill_factor'] = []
                    row_data['short_circuit_current_density'] = []
                    row_data['series_resistance'] = []
                    row_data['shunt_resistance'] = []
                row_data['efficiency'].extend([c["efficiency"] for c in  jv["jv_curve"]])
                row_data['open_circuit_voltage'].extend([c["open_circuit_voltage"] for c in  jv["jv_curve"]])
                row_data['fill_factor'].extend([c["fill_factor"] for c in  jv["jv_curve"]])
                row_data['short_circuit_current_density'].extend([c["short_circuit_current_density"] for c in  jv["jv_curve"]])
                row_data['series_resistance'].extend([c["series_resistance"] for c in  jv["jv_curve"]])
                row_data['shunt_resistance'].extend([c["shunt_resistance"] for c in  jv["jv_curve"]])
                
            
            # Create a DataFrame from the row data and append to our list
            sample_df = pd.DataFrame([row_data])
            sample_data_list.append(sample_df)
    
    # Concatenate all sample DataFrames
    if sample_data_list:
        result_df = pd.concat(sample_data_list, ignore_index=True)
        return result_df
    return None

In [3]:
warning_sign = "\u26A0"

# Output widgets for different sections
out = widgets.Output()
dynamic_content = widgets.Output()
results_content = widgets.Output(layout={
    'max_height': '1000px',
    'overflow': 'scroll',
})

# Global variables
data = None
original_data = None
optimization_history = []
current_optimizer = None
parameter_widgets = {}
target_widget = None

# Bayesian optimization imports (add these to your imports in cell 1)
try:
    from skopt import gp_minimize
    from skopt.space import Real, Integer, Categorical
    from skopt.utils import use_named_args
    from skopt.acquisition import gaussian_ei
    from sklearn.preprocessing import StandardScaler
    from sklearn.gaussian_process import GaussianProcessRegressor
    from sklearn.gaussian_process.kernels import RBF, Matern
    import warnings
    warnings.filterwarnings('ignore')
    BAYESIAN_AVAILABLE = True
except ImportError:
    BAYESIAN_AVAILABLE = False

class BayesianOptimizationAnalyzer:
    def __init__(self):
        self.data = None
        self.clean_data = None
        self.parameter_columns = []
        self.target_column = None
        self.scaler = StandardScaler()
        self.optimization_space = []
        self.adequacy_status = None
        self.data_metrics = {}
        
    def validate_dataset(self, df, target_column, parameter_columns):
        """Validates dataset and returns clean data with metrics"""
        initial_samples = len(df)
        
        # Remove rows with missing target values
        df_clean = df.dropna(subset=[target_column])
        
        # Remove rows with missing parameter values
        df_clean = df_clean.dropna(subset=parameter_columns)
        
        valid_samples = len(df_clean)
        removal_rate = (initial_samples - valid_samples) / initial_samples if initial_samples > 0 else 0
        
        return df_clean, {
            'initial_samples': initial_samples,
            'valid_samples': valid_samples,
            'removal_rate': removal_rate,
            'samples_per_parameter': valid_samples / len(parameter_columns) if parameter_columns else 0
        }
    
    def assess_sample_adequacy(self, n_samples, n_parameters):
        """Provides recommendations based on sample size"""
        if n_parameters == 0:
            return "ERROR: No parameters selected"
        
        ratio = n_samples / n_parameters
        
        if ratio < 5:
            return "CRITICAL: Insufficient data - optimization not recommended"
        elif ratio < 10:
            return "WARNING: Very limited data - high risk of overfitting"
        elif ratio < 20:
            return "CAUTION: Limited data - use simple models only"
        elif ratio < 50:
            return "ACCEPTABLE: Adequate for basic optimization"
        else:
            return "GOOD: Sufficient data for robust optimization"
    
    def prepare_optimization_data(self, df, target_col, param_cols):
        """Complete preprocessing pipeline"""
        # Validate dataset
        df_clean, metrics = self.validate_dataset(df, target_col, param_cols)
        
        if metrics['valid_samples'] == 0:
            return None, metrics, "ERROR: No valid samples after cleaning"
        
        # Assess sample adequacy
        adequacy = self.assess_sample_adequacy(metrics['valid_samples'], len(param_cols))
        
        # Handle list-type JV parameters by taking mean
        jv_params = ['efficiency', 'open_circuit_voltage', 'fill_factor', 
                    'short_circuit_current_density', 'series_resistance', 'shunt_resistance']
        
        for param in param_cols + [target_col]:
            if param in jv_params and not df_clean.empty:
                if isinstance(df_clean[param].iloc[0], list):
                    df_clean[param] = df_clean[param].apply(
                        lambda x: np.mean(x) if isinstance(x, list) and len(x) > 0 else np.nan
                    )
        
        # Remove any remaining NaN values after mean calculation
        df_clean = df_clean.dropna(subset=[target_col] + param_cols)
        
        # Update metrics after final cleaning
        metrics['final_valid_samples'] = len(df_clean)
        metrics['final_samples_per_parameter'] = len(df_clean) / len(param_cols) if param_cols else 0
        
        return df_clean, metrics, adequacy
    
    def create_optimization_space(self, df, param_cols):
        """Creates optimization space for scikit-optimize"""
        space = []
        
        for param in param_cols:
            if df[param].dtype in ['int64', 'float64']:
                min_val = float(df[param].min())
                max_val = float(df[param].max())
                
                # Add some padding to avoid boundary issues
                range_pad = (max_val - min_val) * 0.1
                min_val -= range_pad
                max_val += range_pad
                
                if df[param].dtype == 'int64':
                    space.append(Integer(int(min_val), int(max_val), name=param))
                else:
                    space.append(Real(min_val, max_val, name=param))
            else:
                # Categorical parameter
                unique_values = df[param].unique().tolist()
                space.append(Categorical(unique_values, name=param))
        
        return space
    
    def estimate_convergence_budget(self, n_parameters, complexity_factor=1.0):
        """Estimates iterations needed for convergence"""
        base_iterations = {
            'exploration_phase': 5 * n_parameters,
            'exploitation_phase': 10 * n_parameters,
            'convergence_buffer': 5 * n_parameters
        }
        
        total_estimate = sum(base_iterations.values()) * complexity_factor
        
        return {
            'minimum_budget': int(total_estimate * 0.5),
            'recommended_budget': int(total_estimate),
            'maximum_useful': int(total_estimate * 1.5),
            'phase_breakdown': base_iterations
        }


def create_parameter_selection_widgets():
    """Creates dynamic parameter selection interface"""
    if data is None:
        return widgets.HTML("No data loaded. Please load data first.")
    
    # Get numeric and categorical columns
    numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
    categorical_cols = data.select_dtypes(include=['object']).columns.tolist()
    
    # JV parameters (these can be lists but we'll take means)
    jv_params = ['efficiency', 'open_circuit_voltage', 'fill_factor', 
                'short_circuit_current_density', 'series_resistance', 'shunt_resistance']
    
    # All available parameters
    all_params = numeric_cols + categorical_cols + [p for p in jv_params if p in data.columns]
    
    # Remove non-parameter columns
    excluded_cols = ['sample_id', 'name', 'datetime', 'description', 'location', 'variation']
    available_params = [col for col in all_params if col not in excluded_cols]
    
    # Create parameter variation analysis
    def analyze_parameter_variations(data, available_params):
        """Analyzes parameter variations to help users choose optimization parameters"""
        param_variations = {}
        
        for param in available_params:
            if param in data.columns:
                # Handle list-type parameters (JV params)
                if param in jv_params and not data.empty:
                    if isinstance(data[param].iloc[0], list):
                        # For list parameters, calculate variation based on means
                        means = data[param].apply(lambda x: np.mean(x) if isinstance(x, list) and len(x) > 0 else np.nan)
                        unique_count = len(means.dropna().unique())
                        param_range = means.max() - means.min() if not means.isna().all() else 0
                        std_dev = means.std() if not means.isna().all() else 0
                    else:
                        unique_count = len(data[param].dropna().unique())
                        param_range = data[param].max() - data[param].min() if data[param].dtype in ['int64', 'float64'] else 0
                        std_dev = data[param].std() if data[param].dtype in ['int64', 'float64'] else 0
                else:
                    unique_count = len(data[param].dropna().unique())
                    param_range = data[param].max() - data[param].min() if data[param].dtype in ['int64', 'float64'] else 0
                    std_dev = data[param].std() if data[param].dtype in ['int64', 'float64'] else 0
                
                param_variations[param] = {
                    'unique_count': unique_count,
                    'range': param_range,
                    'std_dev': std_dev,
                    'data_type': 'Continuous' if data[param].dtype in ['int64', 'float64'] or param in jv_params else 'Categorical'
                }
        
        return param_variations
    
    # Analyze parameter variations
    param_variations = analyze_parameter_variations(data, available_params)
    
    # Filter parameters with 4+ variations and sort by variation count
    varied_params = {k: v for k, v in param_variations.items() if v['unique_count'] >= 4}
    sorted_params = sorted(varied_params.items(), key=lambda x: x[1]['unique_count'], reverse=True)
    
    # Create parameter recommendations widget (collapsed by default)
    recommendations_output = widgets.Output()
    
    def create_recommendations_content():
        """Creates the parameter recommendations content"""
        with recommendations_output:
            recommendations_output.clear_output(wait=True)
            
            if not sorted_params:
                display(Markdown("❌ No parameters found with sufficient variation for optimization (need >3 unique values)."))
                return
            
            display(Markdown("### 📊 Parameter Variation Analysis"))
            display(Markdown("Parameters are ranked by the number of unique values (only showing parameters with 4+ unique values):"))
            
            # Create recommendations table
            recommendations_data = []
            for param, info in sorted_params:
                recommendation = "🟢 Excellent" if info['unique_count'] > 10 else \
                               "🟡 Good" if info['unique_count'] > 5 else \
                               "🟠 Fair"  # All displayed parameters have at least 4 unique values
                
                range_info = f"{info['range']:.3f}" if info['data_type'] == 'Continuous' and info['range'] > 0 else "N/A"
                std_info = f"{info['std_dev']:.3f}" if info['data_type'] == 'Continuous' and info['std_dev'] > 0 else "N/A"
                
                recommendations_data.append({
                    'Parameter': param,
                    'Unique Values': info['unique_count'],
                    'Type': info['data_type'],
                    'Range': range_info,
                    'Std Dev': std_info,
                    'Recommendation': recommendation
                })
            
            recommendations_df = pd.DataFrame(recommendations_data)
            display(recommendations_df)
            
            # Add interpretation guide
            display(Markdown("""
            ### 🎯 How to Use This Analysis:
            
            **Recommendation Colors:**
            - 🟢 **Excellent (>10 unique values):** Perfect for optimization, provides fine-grained control
            - 🟡 **Good (6-10 unique values):** Very suitable for optimization
            - 🟠 **Fair (4-5 unique values):** Usable but limited resolution
            
            **Note:** Parameters with ≤3 unique values are excluded as they provide insufficient variation for effective optimization.
            
            **Selection Tips:**
            - **Start with top-ranked parameters** (highest unique values)
            - **Mix continuous and categorical** parameters for comprehensive optimization
            - **Consider parameter importance** in your experimental process
            - **Begin with 2-5 parameters** and expand as you collect more data
            """))
            
            # Show top recommendations
            top_params = [param for param, _ in sorted_params[:5]]
            display(Markdown("### 💡 **Top 5 Recommended Parameters:**"))
            for i, param in enumerate(top_params, 1):
                info = varied_params[param]
                print(f"{i}. {param} ({info['unique_count']} unique values, {info['data_type']})")
    
    # Create toggle button for recommendations
    toggle_button = widgets.Button(
        description='📊 Show Parameter Recommendations',
        button_style='info',
        icon='chart-bar',
        tooltip='Click to see which parameters are best for optimization'
    )
    
    recommendations_visible = [False]  # Use list to make it mutable in nested function
    
    def toggle_recommendations(b):
        """Toggles the recommendations section"""
        if recommendations_visible[0]:
            # Hide recommendations
            recommendations_output.clear_output()
            toggle_button.description = '📊 Show Parameter Recommendations'
            toggle_button.icon = 'chart-bar'
            recommendations_visible[0] = False
        else:
            # Show recommendations
            create_recommendations_content()
            toggle_button.description = '📊 Hide Parameter Recommendations'
            toggle_button.icon = 'eye-slash'
            recommendations_visible[0] = True
    
    toggle_button.on_click(toggle_recommendations)
    
    # Container for parameter widgets
    parameter_container = widgets.VBox()
    
    # Target variable selection
    target_dropdown = widgets.Dropdown(
        options=available_params,
        value='efficiency' if 'efficiency' in available_params else available_params[0],
        description='Target Variable:',
        style={'description_width': '150px'},
        layout={'width': '400px'},
        tooltip='The variable you want to optimize (maximize)'
    )
    
    # Initial parameter selection - prefer parameters with more variation
    initial_params = [param for param, _ in sorted_params if param != target_dropdown.value][:3]
    if len(initial_params) < 3:
        # Fill with any available parameters if not enough varied ones
        remaining_params = [p for p in available_params if p != target_dropdown.value and p not in initial_params]
        initial_params.extend(remaining_params[:3-len(initial_params)])
    
    # Parameter selection widgets
    param_widgets = []
    for i in range(3):  # Start with 3 parameters
        param_widget = widgets.Dropdown(
            options=available_params,
            value=initial_params[i] if i < len(initial_params) else available_params[0],
            description=f'Parameter {i+1}:',
            style={'description_width': '150px'},
            layout={'width': '400px'}
        )
        param_widgets.append(param_widget)
    
    # Buttons to add/remove parameters
    add_param_button = widgets.Button(
        description='Add Parameter',
        button_style='success',
        icon='plus',
        tooltip='Add another parameter to optimize'
    )
    
    remove_param_button = widgets.Button(
        description='Remove Parameter',
        button_style='warning',
        icon='minus',
        tooltip='Remove the last parameter'
    )
    
    # Analysis button
    analyze_button = widgets.Button(
        description='Analyze Data for Optimization',
        button_style='primary',
        icon='chart-line',
        tooltip='Analyze selected parameters and prepare for optimization'
    )
    
    # Output for analysis results
    analysis_output = widgets.Output()
    
    def update_parameter_container():
        """Updates the parameter container with current widgets"""
        param_box = widgets.VBox(param_widgets)
        button_box = widgets.HBox([add_param_button, remove_param_button])
        parameter_container.children = [param_box, button_box]
    
    def on_add_parameter(b):
        """Adds a new parameter widget"""
        if len(param_widgets) < 15:  # Limit to 15 parameters
            new_param = widgets.Dropdown(
                options=available_params,
                value=available_params[0],
                description=f'Parameter {len(param_widgets)+1}:',
                style={'description_width': '150px'},
                layout={'width': '400px'}
            )
            param_widgets.append(new_param)
            update_parameter_container()
    
    def on_remove_parameter(b):
        """Removes the last parameter widget"""
        if len(param_widgets) > 1:  # Keep at least 1 parameter
            param_widgets.pop()
            update_parameter_container()
    
    # Create explanations toggle for analysis section
    analysis_explanations_toggle = widgets.Checkbox(
        value=False,
        description='Show detailed analysis explanations',
        style={'description_width': 'initial'},
        tooltip='Toggle to show/hide detailed explanations in data analysis'
    )
    
    def on_analyze_data(b):
        """Analyzes the selected parameters for optimization"""
        with analysis_output:
            analysis_output.clear_output(wait=True)
            
            # Get selected parameters
            selected_params = [w.value for w in param_widgets]
            target_param = target_dropdown.value
            
            # Check for duplicates
            if len(set(selected_params)) != len(selected_params):
                print(f"{warning_sign} ERROR: Duplicate parameters selected!")
                return
            
            if target_param in selected_params:
                print(f"{warning_sign} ERROR: Target variable cannot be a parameter!")
                return
            
            # Create analyzer
            analyzer = BayesianOptimizationAnalyzer()
            
            # Prepare optimization data
            clean_data, metrics, adequacy = analyzer.prepare_optimization_data(
                data, target_param, selected_params
            )
            
            if clean_data is None:
                print(f"{warning_sign} {adequacy}")
                return
            
            # Display results
            display(Markdown("## 📊 Data Analysis Results"))
            
            # Data quality metrics
            display(Markdown("### Data Quality"))
            display(Markdown(f"Initial samples: {metrics['initial_samples']}"))
            display(Markdown(f"Valid samples after cleaning: {metrics['final_valid_samples']}"))
            display(Markdown(f"Data removal rate: {metrics['removal_rate']:.1%}"))
            display(Markdown(f"Samples per parameter: {metrics['final_samples_per_parameter']:.1f}"))
            
            # Adequacy assessment
            adequacy_color = {
                'GOOD': '🟢',
                'ACCEPTABLE': '🟡', 
                'CAUTION': '🟠',
                'WARNING': '🟠',
                'CRITICAL': '🔴',
                'ERROR': '🔴'
            }
            
            adequacy_key = adequacy.split(':')[0]
            color = adequacy_color.get(adequacy_key, '⚪')
            
            display(Markdown("### 🎯 Data Adequacy Assessment"))
            display(Markdown(f"{color} {adequacy}"))
            
            # Show detailed explanations based on toggle
            if analysis_explanations_toggle.value:
                # Add detailed recommendations based on adequacy
                if 'GOOD' in adequacy:
                    display(Markdown("""
                    ✅ **Excellent data quality!** You can proceed with confidence using any optimization strategy.
                    Recommended approach: Start with 'balanced' strategy for best results.
                    """))
                elif 'ACCEPTABLE' in adequacy:
                    display(Markdown("""
                    ✅ **Good data quality.** Bayesian optimization should work well.
                    Recommended approach: Use 'balanced' strategy and monitor convergence carefully.
                    """))
                elif 'CAUTION' in adequacy:
                    display(Markdown("""
                    ⚠️ **Proceed with caution.** Limited data may affect optimization quality.
                    Recommended approach: Use 'explorative' strategy and smaller batch sizes.
                    """))
                elif 'WARNING' in adequacy:
                    display(Markdown("""
                    ⚠️ **Data quality concerns.** Consider collecting more data before optimization.
                    If proceeding: Use 'explorative' strategy, small batches, and validate results carefully.
                    """))
            
            # Convergence budget estimation
            if metrics['final_valid_samples'] > 0:
                budget = analyzer.estimate_convergence_budget(len(selected_params))
                display(Markdown("### 📈 Estimated Optimization Budget"))
                
                if analysis_explanations_toggle.value:
                    budget_explanation = f"""
                    Based on {len(selected_params)} parameters and {metrics['final_valid_samples']} samples:
                    
                    - **Exploration Phase:** {budget['phase_breakdown']['exploration_phase']} iterations
                      - Initial parameter space mapping
                      - Identify promising regions
                      
                    - **Exploitation Phase:** {budget['phase_breakdown']['exploitation_phase']} iterations  
                      - Focus on best regions
                      - Fine-tune optimal parameters
                      
                    - **Convergence Buffer:** {budget['phase_breakdown']['convergence_buffer']} iterations
                      - Ensure robust convergence
                      - Validation experiments
                    """
                    
                    display(Markdown(budget_explanation))
                
                display(Markdown(f"💡 **Recommended total budget:** {budget['recommended_budget']} iterations"))
                display(Markdown(f"⚡ **Minimum effective budget:** {budget['minimum_budget']} iterations"))
                display(Markdown(f"🔝 **Maximum useful budget:** {budget['maximum_useful']} iterations"))
                
                if analysis_explanations_toggle.value:
                    # Cost estimation
                    display(Markdown("### 💰 Resource Planning"))
                    cost_per_exp = 2  # Assume 2 hours per experiment
                    total_cost = budget['recommended_budget'] * cost_per_exp
                    
                    display(Markdown(f"""
                    **Estimated Resource Requirements:**
                    - Time per experiment: ~{cost_per_exp} hours
                    - Total optimization time: ~{total_cost} hours
                    - Recommended batch size: {min(max(3, metrics['final_valid_samples'] // 20), 10)} experiments
                    - Number of optimization rounds: ~{budget['recommended_budget'] // min(max(3, metrics['final_valid_samples'] // 20), 10)}
                    """))
            
            # Parameter statistics
            display(Markdown("### 📊 Parameter Statistics"))
            param_stats = clean_data[selected_params].describe()
            display(param_stats)
            
            if analysis_explanations_toggle.value:
                # Show parameter ranges and types
                display(Markdown("### 🎚️ Parameter Ranges"))
                for param in selected_params:
                    param_type = "Continuous" if clean_data[param].dtype in ['float64', 'int64'] else "Categorical"
                    if param_type == "Continuous":
                        param_range = f"{clean_data[param].min():.3f} to {clean_data[param].max():.3f}"
                    else:
                        param_range = f"{len(clean_data[param].unique())} categories: {list(clean_data[param].unique())}"
                    display(Markdown(f"• **{param}** ({param_type}): {param_range}"))
            
            # Target variable statistics
            display(Markdown(f"### 🎯 Target Variable Statistics ({target_param})"))
            target_stats = clean_data[target_param].describe()
            display(target_stats)
            
            if analysis_explanations_toggle.value:
                # Show target variable distribution info
                target_range = clean_data[target_param].max() - clean_data[target_param].min()
                target_std = clean_data[target_param].std()
                display(Markdown(f"📈 **Range:** {target_range:.3f}"))
                display(Markdown(f"📊 **Standard Deviation:** {target_std:.3f}"))
                display(Markdown(f"🎯 **Optimization Goal:** Maximize {target_param}"))
                
                # Show correlations
                display(Markdown("### 🔗 Parameter Correlations"))
                correlation_matrix = clean_data[selected_params + [target_param]].corr()
                
                # Find strongest correlations with target
                target_corr = correlation_matrix[target_param].abs().sort_values(ascending=False)[1:]  # Exclude self-correlation
                
                display(Markdown("**Strongest correlations with target variable:**"))
                for param, corr in target_corr.head(3).items():
                    strength = "Strong" if corr > 0.7 else "Moderate" if corr > 0.4 else "Weak"
                    display(Markdown(f"• **{param}**: {corr:.3f} ({strength})"))
                
                if target_corr.max() < 0.3:
                    display(Markdown("⚠️ **Note:** Low correlations detected. This suggests complex, non-linear relationships that Bayesian optimization can help uncover."))
            
            # Store results globally for optimization
            global current_analyzer, optimization_ready
            current_analyzer = analyzer
            current_analyzer.clean_data = clean_data
            current_analyzer.parameter_columns = selected_params
            current_analyzer.target_column = target_param
            current_analyzer.data_metrics = metrics
            current_analyzer.adequacy_status = adequacy
            
            # Create optimization space
            current_analyzer.optimization_space = analyzer.create_optimization_space(
                clean_data, selected_params
            )
            
            optimization_ready = True
            
            # Show optimization interface if data is adequate
            if 'CRITICAL' not in adequacy and 'ERROR' not in adequacy:
                display(Markdown("---"))
                display(create_optimization_interface())
            else:
                display(Markdown("### ⚠️ Optimization Not Recommended"))
                display(Markdown("Please collect more data or select fewer parameters before proceeding."))
    
    # Connect event handlers
    add_param_button.on_click(on_add_parameter)
    remove_param_button.on_click(on_remove_parameter)
    analyze_button.on_click(on_analyze_data)
    
    # Initialize parameter container
    update_parameter_container()
    
    # Create main layout
    main_layout = widgets.VBox([
        widgets.HTML("""
        <div style='background-color: #f0f8ff; padding: 20px; border-radius: 10px; margin-bottom: 20px;'>
            <h2>🎯 Bayesian Optimization Setup</h2>
            <p><strong>What is Bayesian Optimization?</strong></p>
            <p>Bayesian optimization is an intelligent approach to finding optimal experimental parameters. 
            Unlike random testing, it learns from each experiment to suggest the most promising parameters 
            for your next tests.</p>
            
            <p><strong>How it works:</strong></p>
            <ol>
                <li><strong>Learn:</strong> Analyzes your existing experimental data</li>
                <li><strong>Predict:</strong> Builds a model of how parameters affect your target variable</li>
                <li><strong>Optimize:</strong> Suggests new experiments most likely to improve results</li>
                <li><strong>Adapt:</strong> Updates predictions based on new experimental results</li>
            </ol>
            
            <p><strong>Benefits:</strong></p>
            <ul>
                <li>🎯 <strong>Efficient:</strong> Finds optimal parameters with fewer experiments</li>
                <li>📊 <strong>Smart:</strong> Balances exploration of new regions with exploitation of promising areas</li>
                <li>🔄 <strong>Adaptive:</strong> Improves suggestions as you collect more data</li>
                <li>💡 <strong>Insightful:</strong> Reveals complex relationships between parameters</li>
            </ul>
        </div>
        """),
        
        widgets.HTML("""
        <h3>Step 1: Select Variables</h3>
        <p>Choose the target variable you want to optimize (maximize) and the parameters you want to vary:</p>
        """),
        
        widgets.HTML("""
        <div style='background-color: #fff3e0; padding: 15px; border-radius: 8px; margin-bottom: 15px;'>
            <h4>💡 Selection Tips:</h4>
            <ul>
                <li><strong>Target Variable:</strong> Choose the most important outcome (e.g., efficiency, performance)</li>
                <li><strong>Parameters:</strong> Select variables you can control in experiments</li>
                <li><strong>Parameter Count:</strong> Start with 2-5 parameters, add more as you collect data</li>
                <li><strong>Data Quality:</strong> Ensure parameters have meaningful variation in your dataset</li>
            </ul>
        </div>
        """),
        
        target_dropdown,
        widgets.HTML("<br>"),
        toggle_button,
        recommendations_output,
        widgets.HTML("<br><b>Optimization Parameters:</b>"),
        widgets.HTML("""
        <p><em>Add/remove parameters dynamically. Start with the most important ones.</em></p>
        """),
        parameter_container,
        widgets.HTML("<br>"),
        analysis_explanations_toggle,
        widgets.HTML("<br>"),
        analyze_button,
        analysis_output
    ])
    
    return main_layout


def create_optimization_interface():
    """Creates the optimization interface"""
    if not BAYESIAN_AVAILABLE:
        return widgets.HTML("""
        <div style='color: red; font-weight: bold;'>
        ⚠️ Bayesian optimization libraries not available. 
        Please install: pip install scikit-optimize
        </div>
        """)
    
    # Get metrics for recommendations
    n_samples = current_analyzer.data_metrics['final_valid_samples']
    n_params = len(current_analyzer.parameter_columns)
    max_iterations = current_analyzer.estimate_convergence_budget(n_params)
    
    # Batch size recommendations
    recommended_batch = min(max(3, n_samples // 20), 10)  # 3-10 based on data size
    
    # Optimization strategy selection
    strategy_dropdown = widgets.Dropdown(
        options=['balanced', 'explorative', 'exploitative'],
        value='balanced',
        description='Strategy:',
        style={'description_width': '150px'},
        layout={'width': '300px'}
    )
    
    # Batch size selection
    batch_size = widgets.IntSlider(
        min=1,
        max=min(20, max(5, n_samples // 5)),
        value=recommended_batch,
        description='Batch Size:',
        style={'description_width': '150px'},
        layout={'width': '400px'}
    )
    
    # Number of iterations
    iterations_slider = widgets.IntSlider(
        min=max_iterations['minimum_budget'],
        max=max_iterations['maximum_useful'],
        value=max_iterations['recommended_budget'],
        description='Max Iterations:',
        style={'description_width': '150px'},
        layout={'width': '400px'}
    )
    
    # Create explanations toggle functionality
    def create_explanations_toggle():
        """Creates checkbox to toggle detailed explanations"""
        show_explanations = widgets.Checkbox(
            value=False,
            description='Show detailed explanations',
            style={'description_width': 'initial'},
            tooltip='Toggle to show/hide detailed explanations and tips'
        )
        
        def toggle_explanations(change):
            """Toggles visibility of explanation sections"""
            show = change['new']
            
            # Update all conditional explanation widgets
            conditional_strategy_info.layout.display = 'block' if show else 'none'
            conditional_batch_info.layout.display = 'block' if show else 'none'
            conditional_iterations_info.layout.display = 'block' if show else 'none'
            conditional_advanced_options.layout.display = 'block' if show else 'none'
        
        show_explanations.observe(toggle_explanations, names='value')
        return show_explanations
    
    # Create conditional explanation widgets (hidden by default)
    conditional_strategy_info = widgets.HTML("""
    <div style='background-color: #f8f9fa; padding: 15px; border-radius: 8px; margin-bottom: 15px;'>
        <h4>🎯 Optimization Strategies</h4>
        <ul>
            <li><strong>Balanced (Recommended):</strong> Uses Expected Improvement (EI) acquisition function. 
                Balances exploration of unknown regions with exploitation of promising areas. 
                Best for most experimental scenarios.</li>
            <li><strong>Explorative:</strong> Uses Upper Confidence Bound (UCB) with high exploration parameter. 
                Prioritizes exploring the entire parameter space. Good when you suspect multiple optima 
                or have limited prior knowledge.</li>
            <li><strong>Exploitative:</strong> Uses Probability of Improvement (PI) acquisition function. 
                Focuses on refining around the best known regions. Use when you're confident about 
                the general location of the optimum.</li>
        </ul>
    </div>
    """)
    conditional_strategy_info.layout.display = 'none'  # Hidden by default
    
    conditional_batch_info = widgets.HTML(f"""
    <div style='background-color: #e8f5e8; padding: 12px; border-radius: 6px; margin-bottom: 10px;'>
        <h4>📊 Batch Size Recommendations</h4>
        <p><strong>Recommended for your data: {recommended_batch} experiments</strong></p>
        <ul>
            <li><strong>Small batches (1-3):</strong> More sequential learning, better for expensive experiments</li>
            <li><strong>Medium batches (4-8):</strong> Good balance of efficiency and learning (recommended)</li>
            <li><strong>Large batches (9+):</strong> Faster screening, but less adaptive learning</li>
        </ul>
        <p><em>Rule of thumb: Use ~5% of your total samples as batch size, minimum 3, maximum 10.</em></p>
    </div>
    """)
    conditional_batch_info.layout.display = 'none'  # Hidden by default
    
    conditional_iterations_info = widgets.HTML(f"""
    <div style='background-color: #fff3e0; padding: 12px; border-radius: 6px; margin-bottom: 10px;'>
        <h4>🔄 Iteration Budget Guidelines</h4>
        <p><strong>For {n_params} parameters:</strong></p>
        <ul>
            <li><strong>Minimum effective:</strong> {max_iterations['minimum_budget']} iterations 
                (basic parameter space coverage)</li>
            <li><strong>Recommended:</strong> {max_iterations['recommended_budget']} iterations 
                (good balance of exploration and convergence)</li>
            <li><strong>Maximum useful:</strong> {max_iterations['maximum_useful']} iterations 
                (diminishing returns beyond this point)</li>
        </ul>
        <p><em>Remember: You can always run optimization in stages and evaluate progress!</em></p>
    </div>
    """)
    conditional_iterations_info.layout.display = 'none'  # Hidden by default
    
    conditional_advanced_options = widgets.HTML("""
    <div style='background-color: #f0f8ff; padding: 12px; border-radius: 6px; margin-bottom: 15px;'>
        <h4>⚙️ Advanced Tips</h4>
        <ul>
            <li><strong>Start Conservative:</strong> Begin with fewer iterations and expand based on results</li>
            <li><strong>Resource Planning:</strong> Each iteration = batch_size × experimental_cost</li>
            <li><strong>Stopping Criteria:</strong> Monitor improvement rate - stop if no significant gains</li>
            <li><strong>Validation:</strong> Reserve 10-20% of suggested experiments for validation</li>
        </ul>
    </div>
    """)
    conditional_advanced_options.layout.display = 'none'  # Hidden by default
    
    # Optimization button
    optimize_button = widgets.Button(
        description='🚀 Generate Experiment Suggestions',
        button_style='primary',
        icon='flask',
        tooltip='Generate optimized experimental design based on your settings',
        layout={'width': '300px', 'height': '40px'}
    )
    
    # Results output
    optimization_output = widgets.Output()
    
    def on_optimize_clicked(b):
        """Handles optimization button click"""
        with optimization_output:
            optimization_output.clear_output(wait=True)
            
            display(Markdown("# 🧪 Experimental Design Generation"))
            
            # Display selected configuration
            display(Markdown("## 📋 Configuration Summary"))
            config_summary = f"""
            - **Strategy:** {strategy_dropdown.value.title()}
            - **Batch Size:** {batch_size.value} experiments
            - **Max Iterations:** {iterations_slider.value}
            - **Total Experimental Budget:** {batch_size.value * iterations_slider.value} experiments
            - **Data Quality:** {current_analyzer.adequacy_status}
            """
            display(Markdown(config_summary))
            
            # Show progress
            display(Markdown("## 🔄 Generating Suggestions..."))
            display(Markdown("Initializing Bayesian optimization..."))
            
            try:
                # Prepare data for optimization
                X = current_analyzer.clean_data[current_analyzer.parameter_columns].values
                y = current_analyzer.clean_data[current_analyzer.target_column].values
                
                display(Markdown(f"Training on {len(X)} samples with {len(current_analyzer.parameter_columns)} parameters"))
                display(Markdown(f"Target variable range: {y.min():.3f} to {y.max():.3f}"))
                
                # Generate suggestions using Bayesian optimization
                suggestions = generate_experiment_suggestions(
                    current_analyzer.optimization_space, 
                    X, 
                    y, 
                    batch_size.value, 
                    strategy_dropdown.value
                )
                
                # Find single optimum
                optimal_params, predicted_performance = find_single_optimum(
                    current_analyzer.optimization_space,
                    X,
                    y,
                    strategy_dropdown.value
                )
                
                display(Markdown("✅ Optimization complete!"))
                
                # Display results
                display_experiment_suggestions(suggestions, strategy_dropdown.value)
                
                # Display single optimum
                display_single_optimum(optimal_params, predicted_performance)
                
            except Exception as e:
                display(Markdown(f"❌ Error during optimization: {str(e)}"))
                display(Markdown("""
                ### Troubleshooting Tips:
                - Check that all selected parameters have valid numeric ranges
                - Ensure target variable has sufficient variation
                - Try reducing the number of parameters if you have limited data
                """))
    
    optimize_button.on_click(on_optimize_clicked)
    
    # Create interface layout
    controls = widgets.VBox([
        widgets.HTML("<h3>⚙️ Optimization Configuration</h3>"),
        
        # Add explanations toggle
        widgets.HTML("<div style='margin-bottom: 15px;'>"),
        create_explanations_toggle(),
        widgets.HTML("</div>"),
        
        # Conditional strategy info
        conditional_strategy_info,
        widgets.HTML("<h4>Select Strategy:</h4>"),
        strategy_dropdown,
        widgets.HTML("<br>"),
        
        # Conditional batch info
        conditional_batch_info,
        widgets.HTML("<h4>Configure Batch Size:</h4>"),
        batch_size,
        widgets.HTML("<br>"),
        
        # Conditional iterations info
        conditional_iterations_info,
        widgets.HTML("<h4>Set Iteration Budget:</h4>"),
        iterations_slider,
        widgets.HTML("<br>"),
        
        # Conditional advanced options
        conditional_advanced_options,
        optimize_button
    ])
    
    return widgets.VBox([controls, optimization_output])


def find_single_optimum(space, X, y, strategy='balanced'):
    """Finds single optimal parameter set using Bayesian optimization"""
    from skopt import gp_minimize
    from skopt.utils import use_named_args
    from sklearn.neighbors import NearestNeighbors
    
    # Use same prediction method as batch optimization
    nn_model = NearestNeighbors(n_neighbors=min(5, len(X)), metric='euclidean')
    nn_model.fit(X)
    
    @use_named_args(space)
    def objective(**params):
        param_array = np.array([params[dim.name] for dim in space]).reshape(1, -1)
        distances, indices = nn_model.kneighbors(param_array)
        weights = 1 / (distances[0] + 1e-6)
        weights = weights / weights.sum()
        predicted_performance = np.average(y[indices[0]], weights=weights)
        return -predicted_performance
    
    # Configure acquisition function
    acq_func_map = {
        'balanced': 'EI',
        'explorative': 'UCB',
        'exploitative': 'PI'
    }
    
    # Run optimization
    result = gp_minimize(
        func=objective,
        dimensions=space,
        n_calls=50,
        n_initial_points=min(10, len(X)),
        acq_func=acq_func_map.get(strategy, 'EI'),
        random_state=42
    )
    
    # Return best parameters
    optimal_params = dict(zip([dim.name for dim in space], result.x))
    predicted_performance = -result.fun
    
    return optimal_params, predicted_performance


def generate_experiment_suggestions(space, X, y, batch_size, strategy):
    """Generates experiment suggestions using Bayesian optimization"""
    from skopt import gp_minimize
    from skopt.utils import use_named_args
    from sklearn.neighbors import NearestNeighbors
    
    # Create a more sophisticated objective function based on existing data
    # Using k-nearest neighbors to predict performance of new parameter combinations
    nn_model = NearestNeighbors(n_neighbors=min(5, len(X)), metric='euclidean')
    nn_model.fit(X)
    
    @use_named_args(space)
    def objective(**params):
        # Convert parameters to array
        param_array = np.array([params[dim.name] for dim in space]).reshape(1, -1)
        
        # Find nearest neighbors and predict performance
        distances, indices = nn_model.kneighbors(param_array)
        
        # Weight by inverse distance (closer neighbors have more influence)
        weights = 1 / (distances[0] + 1e-6)  # Add small epsilon to avoid division by zero
        weights = weights / weights.sum()
        
        # Predict performance as weighted average of neighbors
        predicted_performance = np.average(y[indices[0]], weights=weights)
        
        # Return negative because skopt minimizes
        return -predicted_performance
    
    # Configure acquisition function based on strategy
    acq_func_map = {
        'balanced': 'EI',      # Expected Improvement
        'explorative': 'UCB',   # Upper Confidence Bound  
        'exploitative': 'PI'    # Probability of Improvement
    }
    
    acq_func = acq_func_map.get(strategy, 'EI')
    
    # Run optimization to get suggestions
    n_calls = min(batch_size * 3, 50)  # Generate more points to select diverse batch
    
    result = gp_minimize(
        func=objective,
        dimensions=space,
        n_calls=n_calls,
        n_initial_points=min(10, len(X)),
        acq_func=acq_func,
        random_state=42
    )
    
    # Select diverse batch from results
    suggestions = []
    selected_points = []
    
    # Sort by performance (best first)
    sorted_indices = np.argsort(result.func_vals)
    
    for i, idx in enumerate(sorted_indices):
        if len(suggestions) >= batch_size:
            break
            
        params = result.x_iters[idx]
        
        # Check diversity (avoid points too close to already selected ones)
        if selected_points:
            min_distance = min([np.linalg.norm(np.array(params) - np.array(sp)) 
                               for sp in selected_points])
            if min_distance < 0.1:  # Skip if too close to existing point
                continue
        
        selected_points.append(params)
        
        # Calculate confidence based on model uncertainty
        confidence_score = abs(result.func_vals[idx])
        if confidence_score > np.percentile(result.func_vals, 75):
            confidence = 'High'
        elif confidence_score > np.percentile(result.func_vals, 50):
            confidence = 'Medium'
        else:
            confidence = 'Low'
        
        suggestion = {
            'experiment_id': f'exp_{len(suggestions)+1:03d}',
            'parameters': dict(zip([dim.name for dim in space], params)),
            'expected_improvement': abs(result.func_vals[idx]),
            'confidence': confidence,
            'priority': len(suggestions) + 1
        }
        suggestions.append(suggestion)
    
    return suggestions


def display_experiment_suggestions(suggestions, strategy):
    """Displays experiment suggestions in a user-friendly format"""
    # Create DataFrame for display
    exp_data = []
    for exp in suggestions:
        row = {
            'Experiment_ID': exp['experiment_id'],
            'Priority': exp['priority'],
            'Expected_Improvement': f"{exp['expected_improvement']:.4f}",
            'Confidence': exp.get('confidence', 'Medium'),
            **exp['parameters']
        }
        exp_data.append(row)
    
    df_suggestions = pd.DataFrame(exp_data)
    
    # Create results explanations toggle
    results_explanations_toggle = widgets.Checkbox(
        value=False,
        description='Show detailed results explanations',
        style={'description_width': 'initial'},
        tooltip='Toggle to show/hide detailed explanations for optimization results'
    )
    
    results_explanations_output = widgets.Output()
    
    def toggle_results_explanations(change):
        """Toggles detailed results explanations"""
        with results_explanations_output:
            results_explanations_output.clear_output(wait=True)
            
            if change['new']:  # Show explanations
                # Display strategy explanation
                strategy_explanations = {
                    'balanced': "Using Expected Improvement (EI) - balancing exploration and exploitation",
                    'explorative': "Using Upper Confidence Bound (UCB) - focusing on unexplored regions",
                    'exploitative': "Using Probability of Improvement (PI) - focusing on promising areas"
                }
                
                display(Markdown(f"**Strategy Applied:** {strategy_explanations[strategy]}"))
                
                # Add interpretation guide
                display(Markdown("""
                ### 📖 How to Interpret Results:
                
                **Priority:** Lower numbers = higher priority. Run experiments in this order for best results.
                
                **Expected Improvement:** Higher values indicate experiments more likely to improve your target variable.
                
                **Parameter Values:** Specific settings for each experiment. These are optimized based on your historical data.
                
                **Confidence Levels:**
                - **High:** Strong evidence this experiment will be valuable
                - **Medium:** Moderate confidence based on available data  
                - **Low:** Exploratory experiment in uncertain regions
                """))
                
                # Success probability estimates
                display(Markdown("### 🎯 Success Predictions"))
                high_priority = len([exp for exp in suggestions if exp['priority'] <= 3])
                success_estimate = min(80, max(30, 60 + (current_analyzer.data_metrics['samples_per_parameter'] * 2)))
                
                display(Markdown(f"""
                - **High Priority Experiments (Top 3):** ~{success_estimate}% chance of improvement
                - **Medium Priority Experiments:** ~{success_estimate-15}% chance of improvement  
                - **Lower Priority Experiments:** ~{success_estimate-30}% chance of improvement
                
                *Success rates estimated based on your data quality and historical performance*
                """))
                
                # Resource planning
                display(Markdown("### 📊 Resource Planning"))
                total_experiments = len(suggestions)
                estimated_time = total_experiments * 2  # Assume 2 hours per experiment
                
                display(Markdown(f"""
                **Current Batch:** {total_experiments} experiments  
                **Estimated Time:** ~{estimated_time} hours (assuming 2h per experiment)  
                **Material Requirements:** Plan reagents for {total_experiments} samples plus 10% buffer  
                **Documentation:** Prepare {total_experiments} experiment sheets with parameter settings  
                """))
                
                # Add workflow guidance
                display(Markdown("""
                ### 🔄 Recommended Workflow
                1. **Start with high-priority experiments** (Priority 1-3) to get quick wins
                2. **Record all results** including failed experiments (crucial for learning!)
                3. **Update your dataset** with new experimental results
                4. **Re-run optimization** after every 3-5 experiments for adaptive learning
                5. **Monitor convergence** - stop when improvements become minimal
                
                ### ⚠️ Important Notes
                - Don't skip experiments - each one provides valuable information
                - Document experimental conditions carefully
                - Consider running duplicate experiments for validation
                - Update the optimization model regularly as you collect more data
                """))
    
    results_explanations_toggle.observe(toggle_results_explanations, names='value')
    
    # Display main results
    display(Markdown("## 📊 Optimization Results"))
    
    # Display suggestions table
    display(Markdown("### 📋 Suggested Experiments (In Order of Priority)"))
    display(df_suggestions)
    
    # Display toggle and explanations
    display(results_explanations_toggle)
    display(results_explanations_output)
    
    # Add download link functionality
    display(Markdown("### 💾 Export and Next Steps"))
    display(widgets.HTML("""
    <div style='background-color: #e8f5e8; padding: 15px; border-radius: 8px;'>
        <h4>📁 Export Options</h4>
        <p>Copy the table above to your experimental protocol or save as CSV for further analysis.</p>
        
        <h4>🔬 Quick Start Guide</h4>
        <ol>
            <li><strong>Copy parameter values</strong> from the table above</li>
            <li><strong>Set up experiments</strong> using the exact parameter combinations</li>
            <li><strong>Run experiments in priority order</strong> for best results</li>
            <li><strong>Record all outcomes</strong> and add to your dataset</li>
            <li><strong>Return for next batch</strong> after completing 3-5 experiments</li>
        </ol>
    </div>
    """))


def display_single_optimum(optimal_params, predicted_performance):
    """Displays the single optimal parameter set"""
    display(Markdown("## 🎯 Single Optimal Parameter Set"))
    
    # Create single optimum explanations toggle
    single_optimum_explanations_toggle = widgets.Checkbox(
        value=False,
        description='Show single optimum explanations',
        style={'description_width': 'initial'},
        tooltip='Toggle to show/hide detailed explanations for single optimum approach'
    )
    
    single_optimum_explanations_output = widgets.Output()
    
    def toggle_single_optimum_explanations(change):
        """Toggles single optimum explanations"""
        with single_optimum_explanations_output:
            single_optimum_explanations_output.clear_output(wait=True)
            
            if change['new']:  # Show explanations
                display(Markdown("""
                ### 🔬 How to Use This Result:
                
                **Traditional Bayesian Optimization Approach:**
                - Use these exact parameter values for your next experiment
                - This represents the algorithm's best guess for optimal performance
                - Run this experiment and add the result to your dataset
                - Re-run optimization to get the next optimal point
                
                **Comparison with Batch Approach:**
                - **Single Optimum:** Conservative, sequential approach
                - **Batch Suggestions:** Efficient, parallel approach for faster learning
                - **Recommendation:** Use batch approach for faster optimization, single optimum for validation
                
                ### 🎯 When to Use Single Optimum:
                - **Limited resources:** When you can only run one experiment at a time
                - **High-cost experiments:** When each experiment is expensive
                - **Validation:** To confirm batch optimization results
                - **Conservative approach:** When you want to minimize experimental risk
                """))
    
    single_optimum_explanations_toggle.observe(toggle_single_optimum_explanations, names='value')
    
    display(Markdown(f"""
    **Predicted Performance:** {predicted_performance:.4f}
    
    This represents the single best parameter combination based on Bayesian optimization analysis.
    """))
    
    # Create optimal parameters table
    optimal_data = []
    for param, value in optimal_params.items():
        # Format value based on type
        if isinstance(value, float):
            formatted_value = f"{value:.4f}"
        elif isinstance(value, int):
            formatted_value = str(value)
        else:
            formatted_value = str(value)
        
        optimal_data.append({
            'Parameter': param,
            'Optimal Value': formatted_value
        })
    
    optimal_df = pd.DataFrame(optimal_data)
    display(optimal_df)
    
    # Display toggle and explanations
    display(single_optimum_explanations_toggle)
    display(single_optimum_explanations_output)
    
    # Add copy-friendly format
    display(Markdown("### 📋 Copy-Friendly Format:"))
    copy_text = ", ".join([f"{param}={value:.4f}" if isinstance(value, float) else f"{param}={value}" 
                          for param, value in optimal_params.items()])
    display(widgets.HTML(f"<code>{copy_text}</code>"))


def on_load_data_clicked(batch_ids_selector):
    """Handles data loading with optimization preparation"""
    global data, original_data, optimization_ready
    dynamic_content.clear_output()
    
    with out:
        out.clear_output()
        print("Loading Data...")
        
        try:
            # Load data (your existing code)
            try_sample_ids = get_ids_in_batch(url, token, batch_ids_selector.value)
            identifiers = get_sample_description(url, token, list(try_sample_ids))
            data = get_ijp_data(try_sample_ids, identifiers)
            
            # Check if data was found
            if data is None:
                out.clear_output()
                print("The batches selected don't contain any relevant measurements")
                return
            
            # Store original data
            original_data = data.copy()
            optimization_ready = False
            
            out.clear_output()
            print("Data Loaded Successfully!")
            print(f"Loaded {len(data)} samples with {len(data.columns)} parameters")
            
        except Exception as e:
            out.clear_output()
            print(f"Error loading data: {str(e)}")
            return
    
    # Create optimization interface
    with dynamic_content:
        dynamic_content.clear_output(wait=True)
        
        if data is not None:
            # Display comprehensive data overview
            display(Markdown("# 🔬 Bayesian Optimization Interface"))
            
            # Data summary with insights
            display(Markdown("## 📊 Data Overview"))
            
            # Calculate data insights
            numeric_cols = len(data.select_dtypes(include=[np.number]).columns)
            categorical_cols = len(data.select_dtypes(include=['object']).columns)
            jv_params = ['efficiency', 'open_circuit_voltage', 'fill_factor', 
                        'short_circuit_current_density', 'series_resistance', 'shunt_resistance']
            jv_available = len([col for col in jv_params if col in data.columns])
            
            # Data quality assessment
            missing_data_pct = (data.isnull().sum().sum() / (len(data) * len(data.columns))) * 100
            
            data_summary = f"""
            **Dataset Characteristics:**
            - 📈 **Total Samples:** {len(data)}
            - 🔢 **Numeric Parameters:** {numeric_cols}
            - 📝 **Categorical Parameters:** {categorical_cols}
            - ⚡ **JV Performance Metrics:** {jv_available} available
            - 🎯 **Data Completeness:** {100-missing_data_pct:.1f}%
            
            **Optimization Readiness:**
            - ✅ **Ready for optimization** if you have >10 samples per parameter
            - ⚠️ **Proceed with caution** if you have 5-10 samples per parameter
            - ❌ **Collect more data** if you have <5 samples per parameter
            """
            
            display(Markdown(data_summary))
            
            # Show parameter selection interface
            display(create_parameter_selection_widgets())
        else:
            display(Markdown("❌ No data available. Please select valid batches containing experimental data."))
            
            # Help section for data loading
            display(Markdown("""
            ### 🔍 Troubleshooting Data Loading
            
            If no data is showing:
            1. **Check batch selection:** Ensure selected batches contain experimental data
            2. **Verify data format:** Data should include both process parameters and performance metrics
            3. **Check data completeness:** Samples with missing critical data are automatically filtered
            
            **Required data structure:**
            - Process parameters (temperature, pressure, concentrations, etc.)
            - Performance metrics (efficiency, voltage, current, etc.)
            - Sample identifiers for tracking
            """))


# Initialize global variables
optimization_ready = False
current_analyzer = None

# Main interface
if not BAYESIAN_AVAILABLE:
    display(widgets.HTML("""
    <div style='background-color: #fff3cd; border: 1px solid #ffeaa7; padding: 15px; border-radius: 5px;'>
        <h3>⚠️ Missing Dependencies</h3>
        <p>To use Bayesian optimization, please install the required packages:</p>
        <code>pip install scikit-optimize</code>
    </div>
    """))

display(batch_selection.create_batch_selection(url, token, on_load_data_clicked))
display(out)
display(dynamic_content)

VBox(children=(Text(value='', description='Search Batch'), SelectMultiple(description='Batches', layout=Layout…

Output()

Output()