# Hyperparameter Optimization (HPO) Notebook

This notebook provides comprehensive hyperparameter optimization using Optuna with auto-resume capabilities,
enhanced progress tracking, and visualization. It replaces the original hpo.py script.

## Setup and Imports

Import all necessary libraries for HPO.

In [None]:
import os
import sys
import json
import warnings
from pathlib import Path
from typing import Dict, Any, Optional, List, Tuple
from dataclasses import asdict
from datetime import datetime

import optuna
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner
import mlflow
import mlflow.pytorch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from tqdm.auto import tqdm
from IPython.display import display, HTML, clear_output
import ipywidgets as widgets

# Suppress warnings
warnings.filterwarnings('ignore')
optuna.logging.set_verbosity(optuna.logging.WARNING)

# Add src to path
if 'src' not in sys.path:
    sys.path.append('src')

print("‚úÖ All imports successful!")
print(f"Optuna version: {optuna.__version__}")

## Load Dependencies

Load configuration and checkpoint managers from other notebooks.

In [None]:
# Load configuration and checkpoint managers
%run 01_Configuration_Management.ipynb
%run 02_Enhanced_Checkpoint_System.ipynb

# Import training function from main training notebook
# We'll define a simplified version here for HPO
from src.train import train_loop

print("‚úÖ Dependencies loaded!")

## HPO Configuration and Search Space

Define the search space and HPO configuration.

In [None]:
class HPOSearchSpace:
    """Defines the hyperparameter search space for optimization."""
    
    @staticmethod
    def suggest_hyperparameters(trial: optuna.Trial, base_config: ExperimentConfig) -> ExperimentConfig:
        """Suggest hyperparameters for a trial."""
        
        # Create a copy of the base configuration
        config = ExperimentConfig(
            seed=base_config.seed,
            data=base_config.data,
            model=base_config.model,
            training=base_config.training,
            hpo=base_config.hpo,
            mlflow=base_config.mlflow
        )
        
        # Model hyperparameters
        config.model.encoder.type = trial.suggest_categorical(
            "model.encoder.type", ["roberta", "bert", "deberta"]
        )
        
        # Update model name based on type
        model_mapping = {
            "roberta": "roberta-base",
            "bert": "bert-base-uncased",
            "deberta": "microsoft/deberta-base"
        }
        config.model.encoder.pretrained_model_name_or_path = model_mapping[config.model.encoder.type]
        
        config.model.encoder.freeze_encoder = trial.suggest_categorical(
            "model.encoder.freeze_encoder", [False, True]
        )
        
        config.model.encoder.pooling = trial.suggest_categorical(
            "model.encoder.pooling", ["cls", "mean"]
        )
        
        config.model.encoder.output_dropout = trial.suggest_float(
            "model.encoder.output_dropout", 0.0, 0.5
        )
        
        config.model.encoder.gradient_checkpointing = trial.suggest_categorical(
            "model.encoder.gradient_checkpointing", [False, True]
        )
        
        # LoRA hyperparameters
        config.model.encoder.lora.enabled = trial.suggest_categorical(
            "model.encoder.lora.enabled", [False, True]
        )
        
        if config.model.encoder.lora.enabled:
            config.model.encoder.lora.r = trial.suggest_categorical(
                "model.encoder.lora.r", [8, 16, 32]
            )
            config.model.encoder.lora.alpha = trial.suggest_categorical(
                "model.encoder.lora.alpha", [16, 32, 64]
            )
            config.model.encoder.lora.dropout = trial.suggest_float(
                "model.encoder.lora.dropout", 0.0, 0.2
            )
        
        # Training hyperparameters
        config.training.batch_size = trial.suggest_categorical(
            "training.batch_size", [8, 16, 32, 64]
        )
        
        config.training.gradient_accumulation_steps = trial.suggest_categorical(
            "training.gradient_accumulation_steps", [1, 2, 4]
        )
        
        config.training.max_grad_norm = trial.suggest_float(
            "training.max_grad_norm", 0.5, 5.0
        )
        
        # Optimizer hyperparameters
        config.training.optimizer.name = trial.suggest_categorical(
            "training.optimizer.name", ["adamw", "lamb", "adafactor"]
        )
        
        config.training.optimizer.learning_rate = trial.suggest_float(
            "training.optimizer.learning_rate", 1e-6, 5e-5, log=True
        )
        
        config.training.optimizer.weight_decay = trial.suggest_float(
            "training.optimizer.weight_decay", 1e-5, 1e-1, log=True
        )
        
        config.training.optimizer.layerwise_lr_decay = trial.suggest_float(
            "training.optimizer.layerwise_lr_decay", 0.8, 1.0
        )
        
        # Scheduler hyperparameters
        config.training.scheduler.name = trial.suggest_categorical(
            "training.scheduler.name", ["linear", "cosine", "onecycle", "plateau"]
        )
        
        config.training.scheduler.warmup_ratio = trial.suggest_float(
            "training.scheduler.warmup_ratio", 0.0, 0.2
        )
        
        if config.training.scheduler.name == "cosine":
            config.training.scheduler.cosine_cycles = trial.suggest_float(
                "training.scheduler.cosine_cycles", 0.25, 1.0
            )
        elif config.training.scheduler.name == "onecycle":
            config.training.scheduler.onecycle_max_lr = trial.suggest_float(
                "training.scheduler.onecycle_max_lr", 1e-5, 1e-4, log=True
            )
            config.training.scheduler.onecycle_pct_start = trial.suggest_float(
                "training.scheduler.onecycle_pct_start", 0.1, 0.5
            )
        elif config.training.scheduler.name == "plateau":
            config.training.scheduler.plateau_patience = trial.suggest_categorical(
                "training.scheduler.plateau_patience", [1, 2, 3, 5]
            )
        
        # Focal loss hyperparameters
        config.training.focal.initial_gamma = trial.suggest_float(
            "training.focal.initial_gamma", 1.0, 5.0
        )
        
        config.training.focal.alpha = trial.suggest_float(
            "training.focal.alpha", 0.1, 0.5
        )
        
        # EMA hyperparameter
        config.training.ema_decay = trial.suggest_float(
            "training.ema_decay", 0.0, 0.9999
        )
        
        # Data hyperparameters
        config.data.max_length = trial.suggest_categorical(
            "data.max_length", [128, 256, 384, 512]
        )
        
        # Model head hyperparameters
        config.model.heads.symptom_labels.layers.activation = trial.suggest_categorical(
            "model.heads.symptom_labels.layers.activation", 
            ["tanh", "gelu", "leakyrelu", "relu", "silu", "mish", "elu"]
        )
        
        config.model.heads.symptom_labels.layers.dropout = trial.suggest_float(
            "model.heads.symptom_labels.layers.dropout", 0.0, 0.5
        )
        
        config.model.heads.symptom_labels.classifier_dropout = trial.suggest_float(
            "model.heads.symptom_labels.classifier_dropout", 0.0, 0.5
        )
        
        config.model.heads.symptom_labels.label_smoothing = trial.suggest_float(
            "model.heads.symptom_labels.label_smoothing", 0.0, 0.2
        )
        
        # Suggest thresholds for each symptom
        for symptom in config.data.multi_label_fields:
            config.model.heads.symptom_labels.thresholds[symptom] = trial.suggest_float(
                f"model.heads.symptom_labels.thresholds.{symptom}", 0.2, 0.8
            )
        
        return config

print("‚úÖ HPO search space defined!")

## Enhanced HPO Manager

Comprehensive HPO management with auto-resume and progress tracking.

In [None]:
class EnhancedHPOManager:
    """Enhanced HPO manager with auto-resume and comprehensive tracking."""
    
    def __init__(self, base_config: ExperimentConfig, study_name: str = None):
        self.base_config = base_config
        self.study_name = study_name or base_config.hpo.study_name
        self.hpo_checkpoint_manager = HPOCheckpointManager()
        self.study = None
        self.progress_callback = None
        
    def create_or_load_study(self) -> optuna.Study:
        """Create a new study or load existing one."""
        
        # Check for auto-resume
        if self.base_config.hpo.auto_resume:
            if self.hpo_checkpoint_manager.should_resume_hpo(self.study_name, asdict(self.base_config)):
                print(f"üîÑ Resuming HPO study: {self.study_name}")
                try:
                    hpo_state, _ = self.hpo_checkpoint_manager.load_hpo_state(self.study_name)
                    print(f"   Resuming from {hpo_state.n_trials_completed} completed trials")
                except Exception as e:
                    print(f"   Warning: Could not load HPO state: {e}")
                    print(f"   Starting fresh study")
        
        # Create sampler and pruner
        sampler = TPESampler(
            seed=self.base_config.hpo.sampler.seed,
            multivariate=self.base_config.hpo.sampler.multivariate
        )
        
        pruner = MedianPruner(
            n_startup_trials=self.base_config.hpo.pruner.n_startup_trials,
            n_warmup_steps=self.base_config.hpo.pruner.n_warmup_steps
        )
        
        # Create or load study
        self.study = optuna.create_study(
            study_name=self.study_name,
            storage=self.base_config.hpo.storage,
            load_if_exists=True,
            direction=self.base_config.hpo.direction,
            sampler=sampler,
            pruner=pruner,
        )
        
        print(f"üìä Study: {self.study_name}")
        print(f"   Direction: {self.base_config.hpo.direction}")
        print(f"   Storage: {self.base_config.hpo.storage}")
        print(f"   Existing trials: {len(self.study.trials)}")
        
        return self.study
    
    def objective(self, trial: optuna.Trial) -> float:
        """Objective function for optimization."""
        
        # Generate trial configuration
        trial_config = HPOSearchSpace.suggest_hyperparameters(trial, self.base_config)
        
        # Adjust settings for HPO (shorter training)
        trial_config.mlflow.nested = True
        trial_config.training.early_stopping.patience = max(
            2, trial_config.training.early_stopping.patience // 2
        )
        trial_config.training.max_epochs = min(
            trial_config.training.max_epochs, 20  # Limit epochs for HPO
        )
        
        # Disable auto-resume for individual trials
        trial_config.training.auto_resume = False
        
        # Run training with trial configuration
        with mlflow.start_run(run_name=f"trial_{trial.number}", nested=True):
            try:
                # Convert to dict for train_loop
                config_dict = asdict(trial_config)
                
                # Use simplified training for HPO
                result = train_loop(trial_config)
                metric = result["best_metric"]
                
                # Log trial results
                mlflow.log_metric("objective", metric, step=trial.number)
                mlflow.log_params({f"trial_{k}": v for k, v in trial.params.items()})
                
                # Report intermediate values for pruning
                trial.report(metric, step=trial_config.training.max_epochs)
                
                # Check if trial should be pruned
                if trial.should_prune():
                    raise optuna.TrialPruned()
                
                return metric
                
            except Exception as e:
                print(f"Trial {trial.number} failed: {e}")
                # Return a poor score for failed trials
                return -1.0 if self.base_config.hpo.direction == "maximize" else 1.0
    
    def run_optimization(self, n_trials: int = None, timeout: int = None) -> optuna.Study:
        """Run the optimization process."""
        
        if self.study is None:
            self.create_or_load_study()
        
        n_trials = n_trials or self.base_config.hpo.n_trials
        timeout = timeout or self.base_config.hpo.timeout
        
        # Calculate remaining trials
        completed_trials = len(self.study.trials)
        remaining_trials = max(0, n_trials - completed_trials)
        
        if remaining_trials == 0:
            print(f"‚úÖ Study already completed ({completed_trials}/{n_trials} trials)")
            return self.study
        
        print(f"üöÄ Starting HPO optimization")
        print(f"   Remaining trials: {remaining_trials}")
        print(f"   Timeout: {timeout} seconds" if timeout else "   No timeout")
        
        # Setup MLflow
        mlflow.set_tracking_uri(self.base_config.mlflow.tracking_uri)
        mlflow.set_experiment(self.base_config.mlflow.experiment_name)
        
        # Progress tracking
        self.progress_callback = TqdmCallback(
            n_trials=remaining_trials,
            desc=f"HPO ({self.study_name})"
        )
        
        try:
            with mlflow.start_run(run_name=f"hpo_{self.study_name}"):
                # Log HPO configuration
                hpo_params = {
                    "study_name": self.study_name,
                    "n_trials": n_trials,
                    "direction": self.base_config.hpo.direction,
                    "sampler": "TPESampler",
                    "pruner": "MedianPruner"
                }
                mlflow.log_params(hpo_params)
                
                # Run optimization
                self.study.optimize(
                    self.objective,
                    n_trials=remaining_trials,
                    timeout=timeout,
                    n_jobs=self.base_config.hpo.n_jobs,
                    callbacks=[self.progress_callback],
                )
                
                # Log final results
                if self.study.best_trial:
                    mlflow.log_metric("best_value", self.study.best_value)
                    mlflow.log_params({f"best_{k}": v for k, v in self.study.best_params.items()})
                
        except KeyboardInterrupt:
            print("\n‚èπÔ∏è  HPO interrupted by user")
        except Exception as e:
            print(f"\n‚ùå HPO failed: {e}")
        finally:
            if self.progress_callback:
                self.progress_callback.close()
            
            # Save HPO state
            self.hpo_checkpoint_manager.save_hpo_state(
                self.study, asdict(self.base_config), 
                notes=f"HPO run completed with {len(self.study.trials)} trials"
            )
        
        return self.study

print("‚úÖ Enhanced HPO manager defined!")

## Progress Tracking

Enhanced progress tracking for HPO with real-time updates.

In [None]:
class TqdmCallback:
    """Callback for progress tracking with tqdm."""
    
    def __init__(self, n_trials: int, desc: str = "HPO"):
        self.n_trials = n_trials
        self.desc = desc
        self.pbar = None
        self.best_value = None
        
    def __call__(self, study: optuna.Study, trial: optuna.Trial):
        """Called after each trial."""
        if self.pbar is None:
            self.pbar = tqdm(total=self.n_trials, desc=self.desc)
        
        # Update progress
        self.pbar.update(1)
        
        # Update best value
        if study.best_trial:
            self.best_value = study.best_value
            
            # Update progress bar description
            status = {
                "trial": trial.number,
                "best": f"{self.best_value:.4f}",
                "state": trial.state.name
            }
            
            if trial.value is not None:
                status["current"] = f"{trial.value:.4f}"
            
            self.pbar.set_postfix(status)
    
    def close(self):
        """Close the progress bar."""
        if self.pbar:
            self.pbar.close()

print("‚úÖ Progress tracking defined!")

## HPO Configuration Selection

Interactive selection of HPO configuration.

In [None]:
def create_hpo_config_selector():
    """Create an interactive HPO configuration selector."""
    
    available_configs = config_manager.list_configs()
    
    if not available_configs:
        print("No configurations found. Please run the Configuration Management notebook first.")
        return None
    
    config_dropdown = widgets.Dropdown(
        options=available_configs,
        value=available_configs[0] if available_configs else None,
        description='Base Config:'
    )
    
    study_name = widgets.Text(
        value='hpo_study',
        description='Study Name:'
    )
    
    n_trials = widgets.IntSlider(
        value=100,
        min=10,
        max=1000,
        step=10,
        description='Trials:'
    )
    
    n_jobs = widgets.IntSlider(
        value=1,
        min=1,
        max=8,
        step=1,
        description='Parallel Jobs:'
    )
    
    timeout = widgets.IntText(
        value=None,
        description='Timeout (s):',
        placeholder='None'
    )
    
    auto_resume = widgets.Checkbox(
        value=True,
        description='Auto Resume'
    )
    
    load_button = widgets.Button(
        description='Setup HPO',
        button_style='success'
    )
    
    output = widgets.Output()
    
    def on_load_clicked(b):
        with output:
            output.clear_output()
            
            try:
                # Load base configuration
                base_config = config_manager.load_config(config_dropdown.value)
                
                # Update HPO settings
                base_config.hpo.study_name = study_name.value
                base_config.hpo.n_trials = n_trials.value
                base_config.hpo.n_jobs = n_jobs.value
                base_config.hpo.timeout = timeout.value if timeout.value else None
                base_config.hpo.auto_resume = auto_resume.value
                
                # Store in global variable
                global current_hpo_config, current_hpo_manager
                current_hpo_config = base_config
                current_hpo_manager = EnhancedHPOManager(base_config, study_name.value)
                
                print(f"‚úÖ HPO configuration setup complete!")
                print(f"   Base config: {config_dropdown.value}")
                print(f"   Study name: {study_name.value}")
                print(f"   Trials: {n_trials.value}")
                print(f"   Parallel jobs: {n_jobs.value}")
                print(f"   Auto-resume: {auto_resume.value}")
                
                # Check for existing study
                existing_studies = hpo_checkpoint_manager.list_hpo_studies()
                if not existing_studies.empty:
                    matching_studies = existing_studies[existing_studies['study_name'] == study_name.value]
                    if not matching_studies.empty:
                        study_info = matching_studies.iloc[0]
                        print(f"\nüîÑ Found existing study:")
                        print(f"   Completed trials: {study_info['n_trials']}")
                        print(f"   Best value: {study_info['best_value']:.4f}" if study_info['best_value'] else "   No best value yet")
                        print(f"   Last updated: {study_info['last_updated']}")
                
            except Exception as e:
                print(f"‚ùå Error setting up HPO: {e}")
    
    load_button.on_click(on_load_clicked)
    
    layout = widgets.VBox([
        widgets.HTML("<h3>HPO Configuration</h3>"),
        config_dropdown,
        study_name,
        widgets.HBox([n_trials, n_jobs]),
        widgets.HBox([timeout, auto_resume]),
        load_button,
        output
    ])
    
    return layout

# Display HPO configuration selector
hpo_config_selector = create_hpo_config_selector()
if hpo_config_selector:
    display(hpo_config_selector)
else:
    # Fallback: create default HPO configuration
    current_hpo_config = ExperimentConfig()
    current_hpo_manager = EnhancedHPOManager(current_hpo_config)
    print("Using default HPO configuration")

## HPO Execution

Execute the hyperparameter optimization.

In [None]:
def start_hpo_optimization():
    """Start the HPO optimization process."""
    
    # Check if HPO configuration is loaded
    if 'current_hpo_manager' not in globals() or current_hpo_manager is None:
        print("‚ùå No HPO configuration loaded. Please run the HPO configuration selection cell first.")
        return
    
    try:
        print(f"\nüöÄ Starting HPO optimization")
        print("=" * 60)
        
        # Create or load study
        study = current_hpo_manager.create_or_load_study()
        
        # Run optimization
        study = current_hpo_manager.run_optimization()
        
        print("\n" + "=" * 60)
        print("üéâ HPO optimization completed!")
        
        # Display results
        if study.best_trial:
            print(f"\nüèÜ Best Trial Results:")
            print(f"   Trial number: {study.best_trial.number}")
            print(f"   Best value: {study.best_value:.4f}")
            print(f"   Best parameters:")
            for key, value in study.best_params.items():
                print(f"     {key}: {value}")
        else:
            print("\n‚ö†Ô∏è  No successful trials completed")
        
        # Display study statistics
        print(f"\nüìä Study Statistics:")
        print(f"   Total trials: {len(study.trials)}")
        
        # Count trials by state
        from collections import Counter
        state_counts = Counter([trial.state for trial in study.trials])
        for state, count in state_counts.items():
            print(f"   {state.name}: {count}")
        
        return study
        
    except KeyboardInterrupt:
        print("\n‚èπÔ∏è  HPO interrupted by user")
        print("   Study state has been saved and can be resumed")
    except Exception as e:
        print(f"\n‚ùå HPO failed with error: {e}")
        import traceback
        traceback.print_exc()

# Create HPO execution button
hpo_button = widgets.Button(
    description='üîç Start HPO',
    button_style='warning',
    layout=widgets.Layout(width='200px', height='40px')
)

def on_hpo_clicked(b):
    start_hpo_optimization()

hpo_button.on_click(on_hpo_clicked)

print("Click the button below to start HPO:")
display(hpo_button)

## HPO Visualization and Analysis

Comprehensive visualization and analysis of HPO results.

In [None]:
def create_hpo_analysis_dashboard():
    """Create an interactive HPO analysis dashboard."""
    
    # Get available HPO studies
    studies_df = hpo_checkpoint_manager.list_hpo_studies()
    
    if studies_df.empty:
        print("No HPO studies found. Run HPO optimization first.")
        return
    
    study_names = studies_df['study_name'].tolist()
    
    study_dropdown = widgets.Dropdown(
        options=study_names,
        value=study_names[0],
        description='Study:'
    )
    
    plot_button = widgets.Button(
        description='üìä Show Results',
        button_style='info'
    )
    
    export_button = widgets.Button(
        description='üíæ Export Best Config',
        button_style='success'
    )
    
    studies_button = widgets.Button(
        description='üìã Show Studies',
        button_style='info'
    )
    
    output = widgets.Output()
    
    def on_plot_clicked(b):
        with output:
            output.clear_output()
            try:
                hpo_state, config = hpo_checkpoint_manager.load_hpo_state(study_dropdown.value)
                
                print(f"üìä HPO Results for {study_dropdown.value}:")
                print(f"   Total trials: {hpo_state.n_trials_completed}")
                print(f"   Best value: {hpo_state.best_value:.4f}" if hpo_state.best_value else "   No best value")
                
                if hpo_state.best_trial:
                    print(f"\nüèÜ Best Trial Parameters:")
                    best_params = hpo_state.best_trial.get('params', {})
                    for key, value in best_params.items():
                        print(f"   {key}: {value}")
                
                # Show trial statistics
                if hpo_state.trials_history:
                    completed_trials = [t for t in hpo_state.trials_history if t.get('value') is not None]
                    failed_trials = [t for t in hpo_state.trials_history if t.get('state') == 'FAIL']
                    pruned_trials = [t for t in hpo_state.trials_history if t.get('state') == 'PRUNED']
                    
                    print(f"\nüìà Trial Statistics:")
                    print(f"   Completed: {len(completed_trials)}")
                    print(f"   Failed: {len(failed_trials)}")
                    print(f"   Pruned: {len(pruned_trials)}")
                    
                    if completed_trials:
                        values = [t['value'] for t in completed_trials]
                        print(f"   Best value: {max(values):.4f}")
                        print(f"   Worst value: {min(values):.4f}")
                        print(f"   Mean value: {np.mean(values):.4f}")
                        print(f"   Std value: {np.std(values):.4f}")
                
            except Exception as e:
                print(f"Error loading HPO results: {e}")
    
    def on_export_clicked(b):
        with output:
            output.clear_output()
            try:
                hpo_state, config = hpo_checkpoint_manager.load_hpo_state(study_dropdown.value)
                
                if hpo_state.best_trial:
                    # Create configuration with best parameters
                    best_config = ExperimentConfig(**config)
                    
                    print(f"‚úÖ Best configuration from {study_dropdown.value}:")
                    print(f"   Best value: {hpo_state.best_value:.4f}")
                    
                    # Save as new configuration
                    config_name = f"hpo_best_{study_dropdown.value}"
                    config_manager.save_config(best_config, config_name)
                    print(f"\nüíæ Saved as configuration: {config_name}")
                    
                    # Display best parameters
                    best_params = hpo_state.best_trial.get('params', {})
                    print(f"\nüèÜ Best parameters:")
                    for key, value in best_params.items():
                        print(f"   {key}: {value}")
                else:
                    print("No best trial found in study")
                    
            except Exception as e:
                print(f"Error exporting configuration: {e}")
    
    def on_studies_clicked(b):
        with output:
            output.clear_output()
            print("Available HPO Studies:")
            display(studies_df)
    
    plot_button.on_click(on_plot_clicked)
    export_button.on_click(on_export_clicked)
    studies_button.on_click(on_studies_clicked)
    
    dashboard = widgets.VBox([
        widgets.HTML("<h3>HPO Analysis Dashboard</h3>"),
        study_dropdown,
        widgets.HBox([plot_button, export_button, studies_button]),
        output
    ])
    
    return dashboard

# Display HPO analysis dashboard
print("\nHPO Analysis Dashboard:")
hpo_analysis_dashboard = create_hpo_analysis_dashboard()
if hpo_analysis_dashboard:
    display(hpo_analysis_dashboard)

## HPO Utilities

Additional utilities for HPO management.

In [None]:
def compare_hpo_studies(study_names: List[str]):
    """Compare multiple HPO studies."""
    
    comparison_data = []
    
    for study_name in study_names:
        try:
            hpo_state, config = hpo_checkpoint_manager.load_hpo_state(study_name)
            
            comparison_data.append({
                'study_name': study_name,
                'n_trials': hpo_state.n_trials_completed,
                'best_value': hpo_state.best_value,
                'created_at': hpo_state.created_at,
                'last_updated': hpo_state.last_updated
            })
        except Exception as e:
            print(f"Error loading study {study_name}: {e}")
    
    if comparison_data:
        df = pd.DataFrame(comparison_data)
        df = df.sort_values('best_value', ascending=False)
        
        print("üèÜ HPO Studies Comparison:")
        display(df)
        
        return df
    else:
        print("No valid studies found for comparison")
        return pd.DataFrame()

def cleanup_hpo_studies(keep_best_n: int = 3):
    """Clean up old HPO studies, keeping only the best N."""
    
    studies_df = hpo_checkpoint_manager.list_hpo_studies()
    
    if studies_df.empty:
        print("No HPO studies to clean up")
        return
    
    # Sort by best value and keep top N
    studies_df = studies_df.sort_values('best_value', ascending=False)
    
    if len(studies_df) > keep_best_n:
        to_delete = studies_df.iloc[keep_best_n:]
        
        print(f"\nüßπ Cleaning up HPO studies (keeping best {keep_best_n}):")
        for _, study in to_delete.iterrows():
            hpo_checkpoint_manager.delete_hpo_study(study['study_name'])
            print(f"   Deleted: {study['study_name']}")
    else:
        print(f"Only {len(studies_df)} studies found, no cleanup needed")

print("\n‚úÖ HPO notebook setup complete!")
print("\nTo run HPO:")
print("1. Select a base configuration using the HPO configuration selector above")
print("2. Click the 'Start HPO' button")
print("3. Monitor progress and analyze results using the analysis dashboard")
print("\nHPO will automatically resume from previous studies if interrupted.")