In [None]:
"""
BatteryMind - Reinforcement Learning Hyperparameter Tuning

Advanced hyperparameter optimization for battery charging optimization RL agents.
This notebook implements comprehensive hyperparameter tuning for PPO, DDPG, SAC, and DQN 
algorithms using Optuna for automated optimization.

Features:
- Multi-algorithm hyperparameter optimization
- Bayesian optimization with Optuna
- Parallel optimization across multiple trials
- Performance tracking and visualization
- Automated model selection based on performance metrics
- Physics-informed constraint validation

Author: BatteryMind Development Team
Version: 1.0.0
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import optuna
from optuna.visualization import plot_optimization_history, plot_param_importances
import logging
import warnings
from typing import Dict, List, Tuple, Any
import json
import pickle
from datetime import datetime
import multiprocessing as mp
from concurrent.futures import ThreadPoolExecutor

# BatteryMind imports
import sys
sys.path.append('../../')
from reinforcement_learning.agents.charging_agent import ChargingAgent
from reinforcement_learning.agents.thermal_agent import ThermalAgent
from reinforcement_learning.environments.battery_env import BatteryEnvironment
from reinforcement_learning.environments.charging_env import ChargingEnvironment
from reinforcement_learning.algorithms.ppo import PPOAlgorithm
from reinforcement_learning.algorithms.ddpg import DDPGAlgorithm
from reinforcement_learning.algorithms.sac import SACAlgorithm
from reinforcement_learning.algorithms.dqn import DQNAlgorithm
from reinforcement_learning.training.rl_trainer import RLTrainer
from training_data.generators.synthetic_generator import SyntheticDataGenerator
from training_data.generators.physics_simulator import BatteryPhysicsSimulator

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("🔋 BatteryMind RL Hyperparameter Tuning Notebook")
print("=" * 60)
print(f"Notebook initialized at: {datetime.now()}")
print()

# Configuration
OPTIMIZATION_CONFIG = {
    'n_trials': 100,
    'n_jobs': mp.cpu_count(),
    'study_name': 'batterymind_rl_optimization',
    'optimization_direction': 'maximize',
    'pruner': 'MedianPruner',
    'sampler': 'TPESampler',
    'algorithms': ['PPO', 'DDPG', 'SAC', 'DQN'],
    'environments': ['BatteryEnvironment', 'ChargingEnvironment'],
    'evaluation_episodes': 10,
    'max_training_steps': 50000,
    'early_stopping_patience': 10
}

# Hyperparameter search spaces for each algorithm
HYPERPARAMETER_SPACES = {
    'PPO': {
        'learning_rate': (1e-5, 1e-2),
        'batch_size': [32, 64, 128, 256],
        'n_epochs': [3, 5, 10, 20],
        'gamma': (0.9, 0.999),
        'gae_lambda': (0.9, 0.99),
        'clip_range': (0.1, 0.4),
        'entropy_coef': (0.0, 0.1),
        'vf_coef': (0.1, 1.0),
        'max_grad_norm': (0.3, 2.0),
        'n_steps': [1024, 2048, 4096],
        'target_kl': (0.01, 0.1)
    },
    'DDPG': {
        'learning_rate': (1e-5, 1e-2),
        'batch_size': [64, 128, 256, 512],
        'gamma': (0.9, 0.999),
        'tau': (0.001, 0.1),
        'buffer_size': [100000, 500000, 1000000],
        'exploration_noise': (0.1, 0.5),
        'policy_noise': (0.1, 0.5),
        'noise_clip': (0.1, 1.0),
        'policy_freq': [1, 2, 4],
        'learning_starts': [1000, 5000, 10000]
    },
    'SAC': {
        'learning_rate': (1e-5, 1e-2),
        'batch_size': [64, 128, 256, 512],
        'gamma': (0.9, 0.999),
        'tau': (0.001, 0.1),
        'buffer_size': [100000, 500000, 1000000],
        'target_update_interval': [1, 2, 4],
        'target_entropy': 'auto',
        'ent_coef': 'auto',
        'learning_starts': [1000, 5000, 10000],
        'train_freq': [1, 4, 8]
    },
    'DQN': {
        'learning_rate': (1e-5, 1e-2),
        'batch_size': [32, 64, 128, 256],
        'gamma': (0.9, 0.999),
        'buffer_size': [50000, 100000, 500000],
        'learning_starts': [1000, 5000, 10000],
        'target_update_interval': [1000, 5000, 10000],
        'train_freq': [1, 4, 8],
        'gradient_steps': [1, 2, 4],
        'exploration_fraction': (0.1, 0.5),
        'exploration_initial_eps': (0.5, 1.0),
        'exploration_final_eps': (0.01, 0.2)
    }
}

# Network architecture search spaces
NETWORK_ARCHITECTURES = {
    'hidden_layers': [2, 3, 4],
    'hidden_units': [64, 128, 256, 512],
    'activation': ['relu', 'tanh', 'elu'],
    'layer_norm': [True, False],
    'dropout': (0.0, 0.5)
}

class RLHyperparameterOptimizer:
    """
    Advanced hyperparameter optimizer for RL algorithms in battery management.
    """
    
    def __init__(self, config: Dict[str, Any]):
        self.config = config
        self.study = None
        self.best_params = {}
        self.optimization_history = []
        self.performance_metrics = {}
        
        # Initialize environment and data generators
        self.env_generators = {
            'BatteryEnvironment': lambda: BatteryEnvironment(),
            'ChargingEnvironment': lambda: ChargingEnvironment()
        }
        
        # Initialize physics simulator for realistic evaluation
        self.physics_sim = BatteryPhysicsSimulator()
        
        # Results storage
        self.results = {
            'trials': [],
            'best_params_per_algorithm': {},
            'performance_comparison': {},
            'optimization_plots': {}
        }
    
    def suggest_hyperparameters(self, trial: optuna.Trial, algorithm: str) -> Dict[str, Any]:
        """
        Suggest hyperparameters for a given algorithm using Optuna.
        
        Args:
            trial: Optuna trial object
            algorithm: RL algorithm name
            
        Returns:
            Dictionary of suggested hyperparameters
        """
        space = HYPERPARAMETER_SPACES[algorithm]
        params = {}
        
        for param_name, param_range in space.items():
            if isinstance(param_range, tuple):
                if isinstance(param_range[0], float):
                    params[param_name] = trial.suggest_float(param_name, *param_range)
                else:
                    params[param_name] = trial.suggest_int(param_name, *param_range)
            elif isinstance(param_range, list):
                params[param_name] = trial.suggest_categorical(param_name, param_range)
            elif param_range == 'auto':
                params[param_name] = 'auto'
        
        # Suggest network architecture
        params['network_arch'] = {
            'hidden_layers': trial.suggest_categorical('hidden_layers', NETWORK_ARCHITECTURES['hidden_layers']),
            'hidden_units': trial.suggest_categorical('hidden_units', NETWORK_ARCHITECTURES['hidden_units']),
            'activation': trial.suggest_categorical('activation', NETWORK_ARCHITECTURES['activation']),
            'layer_norm': trial.suggest_categorical('layer_norm', NETWORK_ARCHITECTURES['layer_norm']),
            'dropout': trial.suggest_float('dropout', *NETWORK_ARCHITECTURES['dropout'])
        }
        
        return params
    
    def create_algorithm(self, algorithm_name: str, params: Dict[str, Any], env: Any) -> Any:
        """
        Create RL algorithm instance with given hyperparameters.
        
        Args:
            algorithm_name: Name of the algorithm
            params: Hyperparameters
            env: Environment instance
            
        Returns:
            Algorithm instance
        """
        if algorithm_name == 'PPO':
            return PPOAlgorithm(
                env=env,
                learning_rate=params['learning_rate'],
                batch_size=params['batch_size'],
                n_epochs=params['n_epochs'],
                gamma=params['gamma'],
                gae_lambda=params['gae_lambda'],
                clip_range=params['clip_range'],
                entropy_coef=params['entropy_coef'],
                vf_coef=params['vf_coef'],
                max_grad_norm=params['max_grad_norm'],
                n_steps=params['n_steps'],
                target_kl=params['target_kl'],
                network_arch=params['network_arch']
            )
        elif algorithm_name == 'DDPG':
            return DDPGAlgorithm(
                env=env,
                learning_rate=params['learning_rate'],
                batch_size=params['batch_size'],
                gamma=params['gamma'],
                tau=params['tau'],
                buffer_size=params['buffer_size'],
                exploration_noise=params['exploration_noise'],
                policy_noise=params['policy_noise'],
                noise_clip=params['noise_clip'],
                policy_freq=params['policy_freq'],
                learning_starts=params['learning_starts'],
                network_arch=params['network_arch']
            )
        elif algorithm_name == 'SAC':
            return SACAlgorithm(
                env=env,
                learning_rate=params['learning_rate'],
                batch_size=params['batch_size'],
                gamma=params['gamma'],
                tau=params['tau'],
                buffer_size=params['buffer_size'],
                target_update_interval=params['target_update_interval'],
                target_entropy=params['target_entropy'],
                ent_coef=params['ent_coef'],
                learning_starts=params['learning_starts'],
                train_freq=params['train_freq'],
                network_arch=params['network_arch']
            )
        elif algorithm_name == 'DQN':
            return DQNAlgorithm(
                env=env,
                learning_rate=params['learning_rate'],
                batch_size=params['batch_size'],
                gamma=params['gamma'],
                buffer_size=params['buffer_size'],
                learning_starts=params['learning_starts'],
                target_update_interval=params['target_update_interval'],
                train_freq=params['train_freq'],
                gradient_steps=params['gradient_steps'],
                exploration_fraction=params['exploration_fraction'],
                exploration_initial_eps=params['exploration_initial_eps'],
                exploration_final_eps=params['exploration_final_eps'],
                network_arch=params['network_arch']
            )
        else:
            raise ValueError(f"Unknown algorithm: {algorithm_name}")
    
    def evaluate_agent(self, agent: Any, env: Any, n_episodes: int = 10) -> Dict[str, float]:
        """
        Evaluate RL agent performance.
        
        Args:
            agent: Trained RL agent
            env: Environment
            n_episodes: Number of evaluation episodes
            
        Returns:
            Dictionary of performance metrics
        """
        episode_rewards = []
        episode_lengths = []
        battery_health_improvements = []
        energy_efficiency_scores = []
        safety_violations = []
        
        for episode in range(n_episodes):
            obs = env.reset()
            total_reward = 0
            episode_length = 0
            initial_soh = obs.get('soh', 1.0)
            safety_violation_count = 0
            
            while True:
                action = agent.predict(obs)
                obs, reward, done, info = env.step(action)
                
                total_reward += reward
                episode_length += 1
                
                # Check for safety violations
                if info.get('safety_violation', False):
                    safety_violation_count += 1
                
                if done:
                    break
            
            episode_rewards.append(total_reward)
            episode_lengths.append(episode_length)
            
            # Calculate battery health improvement
            final_soh = obs.get('soh', 1.0)
            health_improvement = final_soh - initial_soh
            battery_health_improvements.append(health_improvement)
            
            # Calculate energy efficiency
            energy_efficiency = info.get('energy_efficiency', 0.0)
            energy_efficiency_scores.append(energy_efficiency)
            
            safety_violations.append(safety_violation_count)
        
        return {
            'mean_reward': np.mean(episode_rewards),
            'std_reward': np.std(episode_rewards),
            'mean_episode_length': np.mean(episode_lengths),
            'mean_battery_health_improvement': np.mean(battery_health_improvements),
            'mean_energy_efficiency': np.mean(energy_efficiency_scores),
            'safety_violation_rate': np.mean(safety_violations) / np.mean(episode_lengths),
            'success_rate': np.mean([r > 0 for r in episode_rewards])
        }
    
    def objective(self, trial: optuna.Trial) -> float:
        """
        Objective function for hyperparameter optimization.
        
        Args:
            trial: Optuna trial object
            
        Returns:
            Objective value (performance metric)
        """
        try:
            # Select algorithm and environment
            algorithm = trial.suggest_categorical('algorithm', self.config['algorithms'])
            env_name = trial.suggest_categorical('environment', self.config['environments'])
            
            # Create environment
            env = self.env_generators[env_name]()
            
            # Suggest hyperparameters
            params = self.suggest_hyperparameters(trial, algorithm)
            
            # Create algorithm instance
            agent = self.create_algorithm(algorithm, params, env)
            
            # Train agent
            trainer = RLTrainer(agent, env)
            training_metrics = trainer.train(
                max_steps=self.config['max_training_steps'],
                early_stopping_patience=self.config['early_stopping_patience']
            )
            
            # Evaluate agent
            eval_metrics = self.evaluate_agent(agent, env, self.config['evaluation_episodes'])
            
            # Calculate composite objective score
            objective_score = self._calculate_objective_score(eval_metrics, training_metrics)
            
            # Store trial results
            trial_result = {
                'trial_number': trial.number,
                'algorithm': algorithm,
                'environment': env_name,
                'params': params,
                'eval_metrics': eval_metrics,
                'training_metrics': training_metrics,
                'objective_score': objective_score
            }
            
            self.results['trials'].append(trial_result)
            
            return objective_score
            
        except Exception as e:
            logger.error(f"Trial {trial.number} failed: {str(e)}")
            return -np.inf
    
    def _calculate_objective_score(self, eval_metrics: Dict[str, float], 
                                 training_metrics: Dict[str, float]) -> float:
        """
        Calculate composite objective score from evaluation and training metrics.
        
        Args:
            eval_metrics: Evaluation metrics
            training_metrics: Training metrics
            
        Returns:
            Composite objective score
        """
        # Weights for different metrics
        weights = {
            'mean_reward': 0.3,
            'battery_health_improvement': 0.25,
            'energy_efficiency': 0.2,
            'safety_violation_rate': -0.15,  # Negative weight (penalty)
            'success_rate': 0.1,
            'training_stability': 0.1
        }
        
        # Normalize metrics to [0, 1] range
        normalized_metrics = {}
        
        # Reward normalization (assuming rewards are in [-1000, 1000] range)
        normalized_metrics['mean_reward'] = (eval_metrics['mean_reward'] + 1000) / 2000
        
        # Battery health improvement (assuming [-0.1, 0.1] range)
        normalized_metrics['battery_health_improvement'] = (
            eval_metrics['mean_battery_health_improvement'] + 0.1
        ) / 0.2
        
        # Energy efficiency (already in [0, 1] range)
        normalized_metrics['energy_efficiency'] = eval_metrics['mean_energy_efficiency']
        
        # Safety violation rate (invert and normalize)
        normalized_metrics['safety_violation_rate'] = 1.0 - min(1.0, eval_metrics['safety_violation_rate'])
        
        # Success rate (already in [0, 1] range)
        normalized_metrics['success_rate'] = eval_metrics['success_rate']
        
        # Training stability (based on reward variance)
        reward_stability = 1.0 / (1.0 + eval_metrics['std_reward'])
        normalized_metrics['training_stability'] = reward_stability
        
        # Calculate weighted sum
        objective_score = sum(
            weights[metric] * normalized_metrics[metric] 
            for metric in weights.keys()
        )
        
        return objective_score
    
    def optimize(self) -> Dict[str, Any]:
        """
        Run hyperparameter optimization.
        
        Returns:
            Optimization results
        """
        print("🚀 Starting RL Hyperparameter Optimization")
        print(f"Configuration: {self.config}")
        print()
        
        # Create study
        study = optuna.create_study(
            direction=self.config['optimization_direction'],
            study_name=self.config['study_name'],
            sampler=optuna.samplers.TPESampler(seed=42),
            pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=10)
        )
        
        # Run optimization
        study.optimize(
            self.objective,
            n_trials=self.config['n_trials'],
            n_jobs=self.config['n_jobs'],
            show_progress_bar=True
        )
        
        # Store results
        self.study = study
        self.best_params = study.best_params
        
        # Process results
        self._process_optimization_results()
        
        return self.results
    
    def _process_optimization_results(self):
        """Process optimization results and generate insights."""
        # Group results by algorithm
        algorithm_results = {}
        for trial in self.results['trials']:
            algorithm = trial['algorithm']
            if algorithm not in algorithm_results:
                algorithm_results[algorithm] = []
            algorithm_results[algorithm].append(trial)
        
        # Find best parameters for each algorithm
        for algorithm, trials in algorithm_results.items():
            if trials:
                best_trial = max(trials, key=lambda x: x['objective_score'])
                self.results['best_params_per_algorithm'][algorithm] = {
                    'params': best_trial['params'],
                    'performance': best_trial['eval_metrics'],
                    'objective_score': best_trial['objective_score']
                }
        
        # Create performance comparison
        self.results['performance_comparison'] = {
            algorithm: {
                'mean_objective_score': np.mean([t['objective_score'] for t in trials]),
                'std_objective_score': np.std([t['objective_score'] for t in trials]),
                'best_objective_score': max([t['objective_score'] for t in trials]),
                'num_trials': len(trials)
            }
            for algorithm, trials in algorithm_results.items() if trials
        }
    
    def visualize_results(self):
        """Create comprehensive visualization of optimization results."""
        if not self.study:
            print("❌ No optimization results to visualize. Run optimization first.")
            return
        
        # Create figure with subplots
        fig, axes = plt.subplots(2, 3, figsize=(20, 12))
        fig.suptitle('🔋 BatteryMind RL Hyperparameter Optimization Results', fontsize=16)
        
        # Plot 1: Optimization history
        ax1 = axes[0, 0]
        plot_optimization_history(self.study, ax=ax1)
        ax1.set_title('Optimization History')
        ax1.set_xlabel('Trial')
        ax1.set_ylabel('Objective Value')
        
        # Plot 2: Parameter importance
        ax2 = axes[0, 1]
        plot_param_importances(self.study, ax=ax2)
        ax2.set_title('Parameter Importance')
        
        # Plot 3: Algorithm comparison
        ax3 = axes[0, 2]
        algorithms = list(self.results['performance_comparison'].keys())
        scores = [self.results['performance_comparison'][alg]['mean_objective_score'] 
                 for alg in algorithms]
        errors = [self.results['performance_comparison'][alg]['std_objective_score'] 
                 for alg in algorithms]
        
        bars = ax3.bar(algorithms, scores, yerr=errors, capsize=5)
        ax3.set_title('Algorithm Performance Comparison')
        ax3.set_ylabel('Mean Objective Score')
        ax3.tick_params(axis='x', rotation=45)
        
        # Add value labels on bars
        for bar, score in zip(bars, scores):
            height = bar.get_height()
            ax3.text(bar.get_x() + bar.get_width()/2., height,
                    f'{score:.3f}', ha='center', va='bottom')
        
        # Plot 4: Trial distribution by algorithm
        ax4 = axes[1, 0]
        trial_counts = [self.results['performance_comparison'][alg]['num_trials'] 
                       for alg in algorithms]
        
        ax4.pie(trial_counts, labels=algorithms, autopct='%1.1f%%', startangle=90)
        ax4.set_title('Trial Distribution by Algorithm')
        
        # Plot 5: Performance metrics heatmap
        ax5 = axes[1, 1]
        metrics_data = []
        metric_names = ['mean_reward', 'battery_health_improvement', 'energy_efficiency', 
                       'safety_violation_rate', 'success_rate']
        
        for algorithm in algorithms:
            best_params = self.results['best_params_per_algorithm'][algorithm]
            metrics_row = []
            for metric in metric_names:
                if metric == 'safety_violation_rate':
                    # Invert safety violation rate for better visualization
                    metrics_row.append(1.0 - best_params['performance'][metric])
                else:
                    metrics_row.append(best_params['performance'][metric])
            metrics_data.append(metrics_row)
        
        im = ax5.imshow(metrics_data, cmap='RdYlGn', aspect='auto')
        ax5.set_xticks(range(len(metric_names)))
        ax5.set_xticklabels(metric_names, rotation=45, ha='right')
        ax5.set_yticks(range(len(algorithms)))
        ax5.set_yticklabels(algorithms)
        ax5.set_title('Performance Metrics Heatmap')
        
        # Add colorbar
        plt.colorbar(im, ax=ax5, shrink=0.8)
        
        # Plot 6: Hyperparameter correlation
        ax6 = axes[1, 2]
        # Extract numerical hyperparameters for correlation analysis
        param_data = []
        param_names = []
        
        for trial in self.results['trials']:
            if trial['objective_score'] > -np.inf:
                row = []
                for param, value in trial['params'].items():
                    if isinstance(value, (int, float)) and param != 'network_arch':
                        if param not in param_names:
                            param_names.append(param)
                        row.append(value)
                
                if len(row) == len(param_names):
                    param_data.append(row)
        
        if param_data and len(param_names) > 1:
            param_df = pd.DataFrame(param_data, columns=param_names)
            corr_matrix = param_df.corr()
            
            im = ax6.imshow(corr_matrix, cmap='coolwarm', vmin=-1, vmax=1)
            ax6.set_xticks(range(len(param_names)))
            ax6.set_xticklabels(param_names, rotation=45, ha='right')
            ax6.set_yticks(range(len(param_names)))
            ax6.set_yticklabels(param_names)
            ax6.set_title('Hyperparameter Correlation')
            
            # Add correlation values
            for i in range(len(param_names)):
                for j in range(len(param_names)):
                    text = ax6.text(j, i, f'{corr_matrix.iloc[i, j]:.2f}',
                                   ha="center", va="center", color="black")
            
            plt.colorbar(im, ax=ax6, shrink=0.8)
        else:
            ax6.text(0.5, 0.5, 'Insufficient data for correlation analysis', 
                    ha='center', va='center', transform=ax6.transAxes)
            ax6.set_title('Hyperparameter Correlation')
        
        plt.tight_layout()
        plt.show()
    
    def generate_report(self) -> str:
        """Generate comprehensive optimization report."""
        if not self.results['trials']:
            return "❌ No optimization results available. Run optimization first."
        
        report = []
        report.append("🔋 BatteryMind RL Hyperparameter Optimization Report")
        report.append("=" * 60)
        report.append(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        report.append(f"Total trials: {len(self.results['trials'])}")
        report.append(f"Best objective score: {self.study.best_value:.4f}")
        report.append()
        
        # Algorithm performance summary
        report.append("📊 Algorithm Performance Summary")
        report.append("-" * 40)
        performance_data = []
        
        for algorithm, perf in self.results['performance_comparison'].items():
            performance_data.append([
                algorithm,
                f"{perf['mean_objective_score']:.4f}",
                f"{perf['std_objective_score']:.4f}",
                f"{perf['best_objective_score']:.4f}",
                str(perf['num_trials'])
            ])
        
        # Create performance table
        headers = ['Algorithm', 'Mean Score', 'Std Score', 'Best Score', 'Trials']
        col_widths = [max(len(str(row[i])) for row in [headers] + performance_data) 
                     for i in range(len(headers))]
        
        # Print table header
        header_row = " | ".join(headers[i].ljust(col_widths[i]) for i in range(len(headers)))
        report.append(header_row)
        report.append("-" * len(header_row))
        
        # Print table rows
        for row in performance_data:
            data_row = " | ".join(str(row[i]).ljust(col_widths[i]) for i in range(len(row)))
            report.append(data_row)
        
        report.append()
        
        # Best parameters for each algorithm
        report.append("🏆 Best Parameters by Algorithm")
        report.append("-" * 40)
        
        for algorithm, best_data in self.results['best_params_per_algorithm'].items():
            report.append(f"\n{algorithm}:")
            report.append(f"  Objective Score: {best_data['objective_score']:.4f}")
            report.append(f"  Mean Reward: {best_data['performance']['mean_reward']:.2f}")
            report.append(f"  Battery Health Improvement: {best_data['performance']['mean_battery_health_improvement']:.4f}")
            report.append(f"  Energy Efficiency: {best_data['performance']['mean_energy_efficiency']:.4f}")
            report.append(f"  Safety Violation Rate: {best_data['performance']['safety_violation_rate']:.4f}")
            report.append(f"  Success Rate: {best_data['performance']['success_rate']:.4f}")
            
            report.append("  Key Hyperparameters:")
            for param, value in best_data['params'].items():
                if param != 'network_arch':
                    report.append(f"    {param}: {value}")
        
        report.append()
        
        # Optimization insights
        report.append("💡 Optimization Insights")
        report.append("-" * 40)
        
        # Best performing algorithm
        best_algorithm = max(self.results['performance_comparison'].keys(),
                           key=lambda x: self.results['performance_comparison'][x]['best_objective_score'])
        report.append(f"• Best performing algorithm: {best_algorithm}")
        
        # Parameter importance (if available)
        if self.study:
            importances = optuna.importance.get_param_importances(self.study)
            if importances:
                report.append("• Most important hyperparameters:")
                for param, importance in list(importances.items())[:5]:
                    report.append(f"  - {param}: {importance:.4f}")
        
        report.append()
        
        # Recommendations
        report.append("🎯 Recommendations")
        report.append("-" * 40)
        report.append(f"• Use {best_algorithm} algorithm for best performance")
        report.append("• Focus on tuning the most important hyperparameters")
        report.append("• Consider ensemble methods combining top algorithms")
        report.append("• Validate results on real-world battery data")
        report.append("• Monitor safety constraints in production deployment")
        
        return "\n".join(report)
    
    def save_results(self, filepath: str):
        """Save optimization results to file."""
        # Prepare serializable results
        serializable_results = {
            'config': self.config,
            'best_params': self.best_params,
            'results': self.results,
            'study_trials': [
                {
                    'number': trial.number,
                    'value': trial.value,
                    'params': trial.params,
                    'state': trial.state.name
                }
                for trial in self.study.trials
            ] if self.study else []
        }
        
        # Save to JSON
        with open(filepath, 'w') as f:
            json.dump(serializable_results, f, indent=2, default=str)
        
        print(f"✅ Results saved to {filepath}")

# Initialize and run optimization
print("🔧 Initializing RL Hyperparameter Optimizer...")
optimizer = RLHyperparameterOptimizer(OPTIMIZATION_CONFIG)

# Run optimization
print("🚀 Starting optimization process...")
results = optimizer.optimize()

# Generate and display report
print("\n" + "="*60)
print(optimizer.generate_report())
print("="*60)

# Visualize results
print("\n📊 Generating visualization...")
optimizer.visualize_results()

# Save results
results_filename = f"rl_hyperparameter_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
optimizer.save_results(results_filename)

print(f"\n✅ Hyperparameter optimization completed!")
print(f"📁 Results saved to: {results_filename}")
print(f"🏆 Best algorithm: {max(results['performance_comparison'].keys(), key=lambda x: results['performance_comparison'][x]['best_objective_score'])}")
print(f"🎯 Best objective score: {optimizer.study.best_value:.4f}")
