In [None]:
"""
BatteryMind - Transformer Hyperparameter Tuning

Advanced hyperparameter optimization for transformer-based battery health prediction models.
This notebook provides comprehensive tuning capabilities using Optuna, Ray Tune, and custom 
optimization strategies specifically designed for battery domain applications.

Features:
- Multi-objective optimization for accuracy and efficiency
- Bayesian optimization with domain-specific priors
- Automated hyperparameter search with early stopping
- Resource-aware optimization for different hardware constraints
- Integration with distributed training frameworks
- Custom battery-specific evaluation metrics

Author: BatteryMind Development Team
Version: 1.0.0
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List, Optional, Tuple, Any
import logging
import json
import yaml
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Optimization libraries
import optuna
from optuna.pruners import MedianPruner, HyperbandPruner
from optuna.samplers import TPESampler, CmaEsSampler
from optuna.integration import PyTorchLightningPruningCallback
import ray
from ray import tune
from ray.tune.schedulers import ASHAScheduler, PopulationBasedTraining
from ray.tune.suggest.optuna import OptunaSearch

# ML libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger

# Transformers and custom modules
from transformers import AutoTokenizer, AutoModel, AutoConfig
from transformers import TrainingArguments, Trainer
from transformers.optimization import get_linear_schedule_with_warmup

# BatteryMind specific imports
import sys
sys.path.append('../../')
from transformers.battery_health_predictor.model import BatteryHealthTransformer
from transformers.battery_health_predictor.trainer import BatteryTransformerTrainer
from transformers.battery_health_predictor.data_loader import BatteryDataLoader
from transformers.battery_health_predictor.preprocessing import BatteryDataPreprocessor
from training_data.synthetic_datasets import generate_battery_telemetry_data
from evaluation.metrics.accuracy_metrics import BatteryHealthMetrics
from utils.config_parser import ConfigParser
from utils.logging_utils import setup_logging

# Setup logging
logger = setup_logging(__name__)

class BatteryTransformerOptimizer:
    """
    Advanced hyperparameter optimizer for battery transformer models.
    """
    
    def __init__(self, 
                 data_config: Dict[str, Any],
                 base_model_config: Dict[str, Any],
                 optimization_config: Dict[str, Any]):
        """
        Initialize the optimizer with configurations.
        
        Args:
            data_config: Configuration for data loading and preprocessing
            base_model_config: Base model configuration
            optimization_config: Optimization-specific configuration
        """
        self.data_config = data_config
        self.base_model_config = base_model_config
        self.optimization_config = optimization_config
        
        # Initialize data components
        self.data_loader = BatteryDataLoader(data_config)
        self.preprocessor = BatteryDataPreprocessor(data_config)
        self.metrics = BatteryHealthMetrics()
        
        # Load datasets
        self.train_data, self.val_data, self.test_data = self._load_datasets()
        
        # Optimization tracking
        self.best_params = None
        self.best_score = float('-inf')
        self.optimization_history = []
        
        logger.info("BatteryTransformerOptimizer initialized")
    
    def _load_datasets(self) -> Tuple[Dataset, Dataset, Dataset]:
        """Load and preprocess datasets for optimization."""
        # Generate synthetic data for optimization
        logger.info("Loading datasets for hyperparameter optimization...")
        
        # Load training data
        train_df = generate_battery_telemetry_data(
            num_batteries=self.data_config.get('num_batteries', 100),
            duration_days=self.data_config.get('duration_days', 30)
        )
        
        # Preprocess data
        train_processed = self.preprocessor.preprocess(train_df)
        
        # Create train/validation/test splits
        train_size = int(0.7 * len(train_processed))
        val_size = int(0.2 * len(train_processed))
        
        train_data = train_processed[:train_size]
        val_data = train_processed[train_size:train_size + val_size]
        test_data = train_processed[train_size + val_size:]
        
        return train_data, val_data, test_data
    
    def _create_model_with_params(self, trial_params: Dict[str, Any]) -> BatteryHealthTransformer:
        """Create model with trial parameters."""
        model_config = self.base_model_config.copy()
        model_config.update(trial_params)
        
        return BatteryHealthTransformer(
            config=model_config,
            num_features=self.data_config['num_features'],
            num_classes=self.data_config['num_classes']
        )
    
    def _objective_function(self, trial) -> float:
        """
        Objective function for Optuna optimization.
        
        Args:
            trial: Optuna trial object
            
        Returns:
            float: Objective value to maximize
        """
        # Sample hyperparameters
        params = {
            # Architecture parameters
            'hidden_size': trial.suggest_categorical('hidden_size', [256, 512, 768, 1024]),
            'num_attention_heads': trial.suggest_categorical('num_attention_heads', [4, 8, 12, 16]),
            'num_hidden_layers': trial.suggest_int('num_hidden_layers', 6, 24),
            'intermediate_size': trial.suggest_categorical('intermediate_size', [1024, 2048, 3072, 4096]),
            
            # Training parameters
            'learning_rate': trial.suggest_float('learning_rate', 1e-6, 1e-3, log=True),
            'weight_decay': trial.suggest_float('weight_decay', 1e-6, 1e-1, log=True),
            'dropout_rate': trial.suggest_float('dropout_rate', 0.0, 0.5),
            'attention_dropout': trial.suggest_float('attention_dropout', 0.0, 0.3),
            
            # Regularization parameters
            'label_smoothing': trial.suggest_float('label_smoothing', 0.0, 0.2),
            'warmup_steps': trial.suggest_int('warmup_steps', 100, 2000),
            
            # Optimization parameters
            'batch_size': trial.suggest_categorical('batch_size', [16, 32, 64, 128]),
            'gradient_accumulation_steps': trial.suggest_categorical('gradient_accumulation_steps', [1, 2, 4, 8]),
            'max_grad_norm': trial.suggest_float('max_grad_norm', 0.1, 2.0),
            
            # Scheduler parameters
            'scheduler_type': trial.suggest_categorical('scheduler_type', ['linear', 'cosine', 'polynomial']),
            'num_cycles': trial.suggest_int('num_cycles', 1, 3) if params.get('scheduler_type') == 'cosine' else 1,
        }
        
        # Ensure architectural constraints
        if params['hidden_size'] % params['num_attention_heads'] != 0:
            params['num_attention_heads'] = self._find_valid_heads(params['hidden_size'])
        
        try:
            # Create model
            model = self._create_model_with_params(params)
            
            # Setup trainer
            trainer = BatteryTransformerTrainer(
                model=model,
                train_data=self.train_data,
                val_data=self.val_data,
                training_config=params,
                callbacks=[
                    PyTorchLightningPruningCallback(trial, monitor='val_loss')
                ]
            )
            
            # Train model
            trainer.fit(max_epochs=self.optimization_config.get('max_epochs', 50))
            
            # Evaluate model
            val_metrics = trainer.validate()
            
            # Calculate composite score
            composite_score = self._calculate_composite_score(val_metrics, params)
            
            # Store trial results
            trial_result = {
                'trial_number': trial.number,
                'params': params,
                'score': composite_score,
                'metrics': val_metrics,
                'model_size_mb': self._calculate_model_size(model),
                'training_time_minutes': trainer.get_training_time()
            }
            
            self.optimization_history.append(trial_result)
            
            # Update best parameters
            if composite_score > self.best_score:
                self.best_score = composite_score
                self.best_params = params.copy()
                
                # Save best model
                self._save_best_model(model, params, val_metrics)
            
            return composite_score
            
        except Exception as e:
            logger.error(f"Trial {trial.number} failed: {str(e)}")
            return float('-inf')
    
    def _find_valid_heads(self, hidden_size: int) -> int:
        """Find valid number of attention heads for given hidden size."""
        valid_heads = [h for h in [4, 8, 12, 16] if hidden_size % h == 0]
        return valid_heads[0] if valid_heads else 8
    
    def _calculate_composite_score(self, metrics: Dict[str, float], params: Dict[str, Any]) -> float:
        """
        Calculate composite score considering multiple objectives.
        
        Args:
            metrics: Validation metrics
            params: Model parameters
            
        Returns:
            float: Composite score
        """
        # Primary metrics (accuracy-based)
        accuracy_score = metrics.get('soh_accuracy', 0) * 0.3
        mae_score = max(0, 1 - metrics.get('soh_mae', 1)) * 0.25
        rul_accuracy = metrics.get('rul_accuracy_within_10_percent', 0) * 0.2
        anomaly_f1 = metrics.get('anomaly_f1', 0) * 0.15
        
        # Secondary metrics (efficiency-based)
        model_size_penalty = max(0, 1 - params.get('model_size_mb', 1000) / 1000) * 0.05
        inference_speed_bonus = max(0, 1 - metrics.get('inference_time_ms', 100) / 100) * 0.05
        
        composite_score = (
            accuracy_score + mae_score + rul_accuracy + anomaly_f1 + 
            model_size_penalty + inference_speed_bonus
        )
        
        return composite_score
    
    def _calculate_model_size(self, model) -> float:
        """Calculate model size in MB."""
        param_size = sum(p.numel() for p in model.parameters())
        buffer_size = sum(b.numel() for b in model.buffers())
        size_mb = (param_size + buffer_size) * 4 / (1024 * 1024)  # 4 bytes per float32
        return size_mb
    
    def _save_best_model(self, model, params: Dict[str, Any], metrics: Dict[str, float]):
        """Save the best model and its configuration."""
        save_dir = Path("../../model-artifacts/hyperparameter_tuning/transformer_best")
        save_dir.mkdir(parents=True, exist_ok=True)
        
        # Save model
        torch.save(model.state_dict(), save_dir / "best_model.pt")
        
        # Save configuration
        config = {
            'parameters': params,
            'metrics': metrics,
            'model_size_mb': self._calculate_model_size(model),
            'optimization_timestamp': pd.Timestamp.now().isoformat()
        }
        
        with open(save_dir / "best_config.json", 'w') as f:
            json.dump(config, f, indent=2)
        
        logger.info(f"Best model saved with score: {self.best_score:.4f}")
    
    def optimize_with_optuna(self, n_trials: int = 100) -> Dict[str, Any]:
        """
        Optimize hyperparameters using Optuna.
        
        Args:
            n_trials: Number of optimization trials
            
        Returns:
            Dict containing optimization results
        """
        logger.info(f"Starting Optuna optimization with {n_trials} trials")
        
        # Create study
        study = optuna.create_study(
            direction='maximize',
            sampler=TPESampler(seed=42),
            pruner=MedianPruner(n_startup_trials=5, n_warmup_steps=10)
        )
        
        # Add battery-specific constraints
        study.enqueue_trial({
            'hidden_size': 768,
            'num_attention_heads': 12,
            'num_hidden_layers': 12,
            'learning_rate': 2e-5,
            'batch_size': 32
        })
        
        # Optimize
        study.optimize(
            self._objective_function,
            n_trials=n_trials,
            timeout=self.optimization_config.get('timeout_hours', 24) * 3600,
            callbacks=[self._optuna_callback]
        )
        
        # Compile results
        results = {
            'best_params': study.best_params,
            'best_score': study.best_value,
            'n_trials': len(study.trials),
            'optimization_history': self.optimization_history,
            'study_statistics': self._get_study_statistics(study)
        }
        
        logger.info(f"Optuna optimization completed. Best score: {study.best_value:.4f}")
        return results
    
    def _optuna_callback(self, study, trial):
        """Callback for Optuna optimization."""
        if trial.number % 10 == 0:
            logger.info(f"Trial {trial.number}: Current best score = {study.best_value:.4f}")
    
    def _get_study_statistics(self, study) -> Dict[str, Any]:
        """Get comprehensive study statistics."""
        df = study.trials_dataframe()
        
        return {
            'total_trials': len(study.trials),
            'completed_trials': len([t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]),
            'failed_trials': len([t for t in study.trials if t.state == optuna.trial.TrialState.FAIL]),
            'pruned_trials': len([t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]),
            'best_trial_number': study.best_trial.number,
            'optimization_duration_hours': (study.trials[-1].datetime_complete - study.trials[0].datetime_start).total_seconds() / 3600,
            'parameter_importance': optuna.importance.get_param_importances(study) if len(study.trials) > 10 else {}
        }
    
    def optimize_with_ray_tune(self, n_trials: int = 50) -> Dict[str, Any]:
        """
        Optimize hyperparameters using Ray Tune.
        
        Args:
            n_trials: Number of optimization trials
            
        Returns:
            Dict containing optimization results
        """
        logger.info(f"Starting Ray Tune optimization with {n_trials} trials")
        
        # Initialize Ray
        ray.init(ignore_reinit_error=True)
        
        # Define search space
        search_space = {
            'hidden_size': tune.choice([256, 512, 768, 1024]),
            'num_attention_heads': tune.choice([4, 8, 12, 16]),
            'num_hidden_layers': tune.randint(6, 24),
            'learning_rate': tune.loguniform(1e-6, 1e-3),
            'weight_decay': tune.loguniform(1e-6, 1e-1),
            'dropout_rate': tune.uniform(0.0, 0.5),
            'batch_size': tune.choice([16, 32, 64, 128]),
            'warmup_steps': tune.randint(100, 2000)
        }
        
        # Setup scheduler
        scheduler = ASHAScheduler(
            max_t=self.optimization_config.get('max_epochs', 50),
            grace_period=5,
            reduction_factor=2
        )
        
        # Setup search algorithm
        search_alg = OptunaSearch(
            sampler=TPESampler(seed=42),
            metric='score',
            mode='max'
        )
        
        # Run optimization
        analysis = tune.run(
            self._ray_tune_trainable,
            config=search_space,
            num_samples=n_trials,
            scheduler=scheduler,
            search_alg=search_alg,
            resources_per_trial={'cpu': 4, 'gpu': 1},
            local_dir='./ray_results',
            name='battery_transformer_tuning'
        )
        
        # Get best result
        best_trial = analysis.get_best_trial('score', 'max')
        
        results = {
            'best_params': best_trial.config,
            'best_score': best_trial.metric_analysis['score']['last'],
            'n_trials': n_trials,
            'best_trial_path': best_trial.checkpoint.value,
            'analysis': analysis
        }
        
        ray.shutdown()
        logger.info(f"Ray Tune optimization completed. Best score: {results['best_score']:.4f}")
        return results
    
    def _ray_tune_trainable(self, config):
        """Trainable function for Ray Tune."""
        # Create model with config
        model = self._create_model_with_params(config)
        
        # Setup trainer
        trainer = BatteryTransformerTrainer(
            model=model,
            train_data=self.train_data,
            val_data=self.val_data,
            training_config=config
        )
        
        # Train with reporting
        for epoch in range(self.optimization_config.get('max_epochs', 50)):
            trainer.train_epoch()
            val_metrics = trainer.validate()
            
            score = self._calculate_composite_score(val_metrics, config)
            
            # Report to Ray Tune
            tune.report(score=score, **val_metrics)
    
    def analyze_optimization_results(self, results: Dict[str, Any]) -> Dict[str, Any]:
        """
        Analyze optimization results and provide insights.
        
        Args:
            results: Optimization results
            
        Returns:
            Dict containing analysis results
        """
        logger.info("Analyzing optimization results...")
        
        # Parameter importance analysis
        param_importance = self._analyze_parameter_importance()
        
        # Performance trends
        performance_trends = self._analyze_performance_trends()
        
        # Resource utilization
        resource_analysis = self._analyze_resource_utilization()
        
        # Recommendations
        recommendations = self._generate_recommendations()
        
        analysis = {
            'parameter_importance': param_importance,
            'performance_trends': performance_trends,
            'resource_analysis': resource_analysis,
            'recommendations': recommendations,
            'best_configuration': {
                'parameters': self.best_params,
                'score': self.best_score,
                'expected_performance': self._predict_performance(self.best_params)
            }
        }
        
        return analysis
    
    def _analyze_parameter_importance(self) -> Dict[str, float]:
        """Analyze parameter importance from optimization history."""
        if not self.optimization_history:
            return {}
        
        # Convert to DataFrame
        df = pd.DataFrame(self.optimization_history)
        
        # Calculate correlation with score
        param_importance = {}
        for param in ['hidden_size', 'num_attention_heads', 'learning_rate', 'dropout_rate']:
            if param in df.columns:
                correlation = df[param].corr(df['score'])
                param_importance[param] = abs(correlation)
        
        return param_importance
    
    def _analyze_performance_trends(self) -> Dict[str, Any]:
        """Analyze performance trends across trials."""
        if not self.optimization_history:
            return {}
        
        df = pd.DataFrame(self.optimization_history)
        
        return {
            'score_progression': df['score'].tolist(),
            'best_score_progression': df['score'].cummax().tolist(),
            'convergence_rate': self._calculate_convergence_rate(df['score']),
            'optimization_efficiency': len(df[df['score'] > df['score'].quantile(0.9)]) / len(df)
        }
    
    def _analyze_resource_utilization(self) -> Dict[str, Any]:
        """Analyze resource utilization patterns."""
        if not self.optimization_history:
            return {}
        
        df = pd.DataFrame(self.optimization_history)
        
        return {
            'average_model_size_mb': df['model_size_mb'].mean(),
            'average_training_time_minutes': df['training_time_minutes'].mean(),
            'size_performance_tradeoff': df['model_size_mb'].corr(df['score']),
            'time_performance_tradeoff': df['training_time_minutes'].corr(df['score'])
        }
    
    def _generate_recommendations(self) -> List[str]:
        """Generate optimization recommendations."""
        recommendations = []
        
        if self.best_params:
            # Architecture recommendations
            if self.best_params['hidden_size'] >= 768:
                recommendations.append("Large hidden size (â‰¥768) performs well for battery data")
            
            if self.best_params['num_hidden_layers'] >= 12:
                recommendations.append("Deep architectures (â‰¥12 layers) capture complex temporal patterns")
            
            # Training recommendations
            if self.best_params['learning_rate'] <= 5e-5:
                recommendations.append("Lower learning rates (â‰¤5e-5) provide better stability")
            
            if self.best_params['dropout_rate'] <= 0.2:
                recommendations.append("Conservative dropout (â‰¤0.2) prevents overfitting")
        
        return recommendations
    
    def _calculate_convergence_rate(self, scores: pd.Series) -> float:
        """Calculate convergence rate of optimization."""
        if len(scores) < 10:
            return 0.0
        
        # Calculate moving average
        window = min(10, len(scores) // 4)
        moving_avg = scores.rolling(window=window).mean()
        
        # Calculate convergence as stability of moving average
        convergence_rate = 1.0 - moving_avg.std() / moving_avg.mean()
        return max(0.0, convergence_rate)
    
    def _predict_performance(self, params: Dict[str, Any]) -> Dict[str, float]:
        """Predict performance metrics for given parameters."""
        # Simple heuristic-based prediction
        base_accuracy = 0.85
        
        # Adjust based on architecture
        if params['hidden_size'] >= 768:
            base_accuracy += 0.05
        if params['num_hidden_layers'] >= 12:
            base_accuracy += 0.03
        
        # Adjust based on training params
        if params['learning_rate'] <= 5e-5:
            base_accuracy += 0.02
        if params['dropout_rate'] <= 0.2:
            base_accuracy += 0.01
        
        return {
            'expected_accuracy': min(0.99, base_accuracy),
            'expected_mae': max(0.01, 0.15 - (base_accuracy - 0.85)),
            'expected_training_time_hours': params['num_hidden_layers'] * 0.5,
            'expected_model_size_mb': params['hidden_size'] * params['num_hidden_layers'] * 0.01
        }
    
    def visualize_optimization_results(self, results: Dict[str, Any]) -> None:
        """Create visualizations of optimization results."""
        if not self.optimization_history:
            logger.warning("No optimization history available for visualization")
            return
        
        df = pd.DataFrame(self.optimization_history)
        
        # Create subplots
        fig, axes = plt.subplots(2, 3, figsize=(18, 12))
        fig.suptitle('Transformer Hyperparameter Optimization Results', fontsize=16)
        
        # 1. Score progression
        axes[0, 0].plot(df['trial_number'], df['score'], 'b-', alpha=0.7, label='Trial Score')
        axes[0, 0].plot(df['trial_number'], df['score'].cummax(), 'r-', linewidth=2, label='Best Score')
        axes[0, 0].set_xlabel('Trial Number')
        axes[0, 0].set_ylabel('Composite Score')
        axes[0, 0].set_title('Optimization Progress')
        axes[0, 0].legend()
        axes[0, 0].grid(True, alpha=0.3)
        
        # 2. Parameter importance
        param_importance = self._analyze_parameter_importance()
        if param_importance:
            params = list(param_importance.keys())
            importance = list(param_importance.values())
            axes[0, 1].bar(params, importance, color='skyblue')
            axes[0, 1].set_xlabel('Parameters')
            axes[0, 1].set_ylabel('Importance Score')
            axes[0, 1].set_title('Parameter Importance')
            axes[0, 1].tick_params(axis='x', rotation=45)
        
        # 3. Score vs Model Size
        axes[0, 2].scatter(df['model_size_mb'], df['score'], alpha=0.6)
        axes[0, 2].set_xlabel('Model Size (MB)')
        axes[0, 2].set_ylabel('Score')
        axes[0, 2].set_title('Score vs Model Size')
        axes[0, 2].grid(True, alpha=0.3)
        
        # 4. Learning rate distribution
        if 'learning_rate' in df.columns:
            axes[1, 0].hist(df['learning_rate'], bins=20, alpha=0.7, color='green')
            axes[1, 0].set_xlabel('Learning Rate')
            axes[1, 0].set_ylabel('Frequency')
            axes[1, 0].set_title('Learning Rate Distribution')
            axes[1, 0].set_xscale('log')
        
        # 5. Hidden size vs Score
        if 'hidden_size' in df.columns:
            hidden_sizes = df['hidden_size'].unique()
            scores_by_size = [df[df['hidden_size'] == size]['score'].mean() for size in hidden_sizes]
            axes[1, 1].bar(hidden_sizes, scores_by_size, color='orange')
            axes[1, 1].set_xlabel('Hidden Size')
            axes[1, 1].set_ylabel('Average Score')
            axes[1, 1].set_title('Hidden Size vs Performance')
        
        # 6. Training time vs Score
        if 'training_time_minutes' in df.columns:
            axes[1, 2].scatter(df['training_time_minutes'], df['score'], alpha=0.6, color='red')
            axes[1, 2].set_xlabel('Training Time (minutes)')
            axes[1, 2].set_ylabel('Score')
            axes[1, 2].set_title('Training Time vs Performance')
            axes[1, 2].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig('transformer_optimization_results.png', dpi=300, bbox_inches='tight')
        plt.show()
    
    def export_optimization_report(self, results: Dict[str, Any], 
                                  filename: str = "transformer_optimization_report.html") -> str:
        """Export comprehensive optimization report."""
        logger.info(f"Exporting optimization report to {filename}")
        
        # Create HTML report
        html_template = """
        <!DOCTYPE html>
        <html>
        <head>
            <title>BatteryMind Transformer Optimization Report</title>
            <style>
                body { font-family: Arial, sans-serif; margin: 20px; }
                .header { background-color: #f0f0f0; padding: 20px; border-radius: 5px; }
                .section { margin: 20px 0; padding: 15px; border-left: 4px solid #007acc; }
                .metric { display: inline-block; margin: 10px; padding: 10px; background-color: #f9f9f9; border-radius: 3px; }
                .recommendation { background-color: #e8f5e8; padding: 10px; margin: 5px 0; border-radius: 3px; }
                table { border-collapse: collapse; width: 100%; }
                th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
                th { background-color: #f2f2f2; }
            </style>
        </head>
        <body>
            <div class="header">
                <h1>BatteryMind Transformer Optimization Report</h1>
                <p>Generated on: {timestamp}</p>
                <p>Total Trials: {total_trials}</p>
                <p>Best Score: {best_score:.4f}</p>
            </div>
            
            <div class="section">
                <h2>Best Configuration</h2>
                <table>
                    <tr><th>Parameter</th><th>Value</th></tr>
                    {best_params_table}
                </table>
            </div>
            
            <div class="section">
                <h2>Performance Analysis</h2>
                <div class="metric">
                    <strong>Convergence Rate:</strong> {convergence_rate:.3f}
                </div>
                <div class="metric">
                    <strong>Optimization Efficiency:</strong> {optimization_efficiency:.3f}
                </div>
                <div class="metric">
                    <strong>Best Model Size:</strong> {best_model_size:.1f} MB
                </div>
            </div>
            
            <div class="section">
                <h2>Recommendations</h2>
                {recommendations_html}
            </div>
            
            <div class="section">
                <h2>Parameter Importance</h2>
                <table>
                    <tr><th>Parameter</th><th>Importance</th></tr>
                    {param_importance_table}
                </table>
            </div>
        </body>
        </html>
        """
        
        # Prepare data for template
        analysis = self.analyze_optimization_results(results)
        
        # Best parameters table
        best_params_rows = ""
        if self.best_params:
            for param, value in self.best_params.items():
                best_params_rows += f"<tr><td>{param}</td><td>{value}</td></tr>"
        
        # Recommendations HTML
        recommendations_html = ""
        for rec in analysis.get('recommendations', []):
            recommendations_html += f'<div class="recommendation">{rec}</div>'
        
        # Parameter importance table
        param_importance_rows = ""
        for param, importance in analysis.get('parameter_importance', {}).items():
            param_importance_rows += f"<tr><td>{param}</td><td>{importance:.3f}</td></tr>"
        
        # Fill template
        html_content = html_template.format(
            timestamp=pd.Timestamp.now().isoformat(),
            total_trials=len(self.optimization_history),
            best_score=self.best_score,
            best_params_table=best_params_rows,
            convergence_rate=analysis.get('performance_trends', {}).get('convergence_rate', 0),
            optimization_efficiency=analysis.get('performance_trends', {}).get('optimization_efficiency', 0),
            best_model_size=analysis.get('resource_analysis', {}).get('average_model_size_mb', 0),
            recommendations_html=recommendations_html,
            param_importance_table=param_importance_rows
        )
        
        # Save report
        with open(filename, 'w') as f:
            f.write(html_content)
        
        logger.info(f"Optimization report saved to {filename}")
        return filename

# Main execution
if __name__ == "__main__":
    # Configuration
    data_config = {
        'num_batteries': 200,
        'duration_days': 60,
        'num_features': 23,
        'num_classes': 4,
        'sequence_length': 256,
        'batch_size': 32
    }
    
    base_model_config = {
        'hidden_size': 768,
        'num_attention_heads': 12,
        'num_hidden_layers': 12,
        'max_position_embeddings': 512,
        'type_vocab_size': 2
    }
    
    optimization_config = {
        'max_epochs': 30,
        'timeout_hours': 12,
        'n_trials': 100,
        'optimization_method': 'optuna'  # or 'ray_tune'
    }
    
    # Initialize optimizer
    optimizer = BatteryTransformerOptimizer(
        data_config=data_config,
        base_model_config=base_model_config,
        optimization_config=optimization_config
    )
    
    # Run optimization
    print("ðŸš€ Starting Transformer Hyperparameter Optimization...")
    print(f"Configuration: {optimization_config['n_trials']} trials, {optimization_config['max_epochs']} epochs")
    
    if optimization_config['optimization_method'] == 'optuna':
        results = optimizer.optimize_with_optuna(n_trials=optimization_config['n_trials'])
    else:
        results = optimizer.optimize_with_ray_tune(n_trials=optimization_config['n_trials'])
    
    # Analyze results
    print("\nðŸ“Š Analyzing optimization results...")
    analysis = optimizer.analyze_optimization_results(results)
    
    # Display results
    print(f"\nðŸŽ¯ Best Score: {optimizer.best_score:.4f}")
    print(f"ðŸ“‹ Best Parameters:")
    for param, value in optimizer.best_params.items():
        print(f"  {param}: {value}")
    
    print(f"\nðŸ’¡ Recommendations:")
    for rec in analysis['recommendations']:
        print(f"  â€¢ {rec}")
    
    # Create visualizations
    print("\nðŸ“ˆ Creating visualizations...")
    optimizer.visualize_optimization_results(results)
    
    # Export report
    print("\nðŸ“„ Exporting optimization report...")
    report_path = optimizer.export_optimization_report(results)
    print(f"Report saved to: {report_path}")
    
    print("\nâœ… Transformer hyperparameter optimization completed successfully!")
