In [85]:
import gymnasium as gym
from gymnasium import spaces
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from stable_baselines3 import PPO, A2C, DDPG, SAC, DQN
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize, VecMonitor
from stable_baselines3.common.vec_env.subproc_vec_env import SubprocVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import BaseCallback
import torch
import optuna
from IPython.display import display
import ipywidgets as widgets
from ipywidgets import IntProgress, HTML, VBox
import math
import os

In [86]:
# Check for and use a CUDA-enabled GPU if available.
if torch.cuda.is_available():
    device = "cuda"
    print(f"CUDA GPU is available. Using the GPU for training and evaluation: {torch.cuda.get_device_name(0)}")
else:
    device = "cpu"
    print("CUDA GPU not found. Using the CPU for training and evaluation.")

# --- Environment for Continuous Action Spaces (PPO, A2C, DDPG, SAC) ---
# NOTE: This is the user-provided TEGEnvironment class which fixed the NaN problem,
# with updates to the `reset` and `step` methods to be compatible with Gym v0.26+.
class TEGEnvironment(gym.Env):
    """
    A custom Gym environment for a Thermoelectric Generator (TEG) system with a continuous action space.
    """
    def __init__(self):
        super(TEGEnvironment, self).__init__()

        # Action space: Charge rate, Store rate, Idle rate (continuous, proportions)
        self.action_space = spaces.Box(low=0, high=1, shape=(3,), dtype=np.float32)

        # State space: Voltage, Current, Battery Level, Buffer Level, Temp Gradient,
        # Energy Demand, Battery Health
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0]),
            high=np.array([10, 10, 100, 100, 100, 50, 100]),
            dtype=np.float32,
        )
        self.logs = []
        self.np_random = np.random.RandomState()
        self.max_steps = 1000  # Define a maximum number of steps for truncation

    def reset(self, seed=None, options=None):
        """
        Resets the environment. Updated to return `observation, info` as per Gym v0.26+.
        """
        if seed is not None:
            self.np_random.seed(seed)
            random.seed(seed)
            pass

        self.voltage = random.uniform(5, 7)
        self.current = self.voltage / 10
        self.battery_level = 50
        self.buffer_level = 10
        self.temperature_gradient = random.uniform(40, 60)
        self.energy_demand = 10
        self.battery_health = 100
        self.current_step = 0
        self.logs = []
        
        # Return observation and an empty info dictionary
        return self._get_observation(), {}

    def step(self, action):
        """
        Takes a step in the environment. Updated to return 5 values as per Gym v0.26+.
        """
        self.current_step += 1

        # Normalize actions
        total_action = np.sum(action)
        if total_action > 1:
            action = action / total_action

        charge_action, store_action, idle_action = action
        available_energy = self.voltage * self.current

        # Loss factors and efficiencies
        charge_efficiency = 0.85
        store_efficiency = 0.80
        load_efficiency = 0.75
        system_losses = 0.1

        # Calculate energy allocations
        energy_to_charge = available_energy * charge_action * charge_efficiency
        energy_to_store = available_energy * store_action * store_efficiency
        delivered_energy = available_energy * idle_action * load_efficiency

        # Update battery and buffer levels
        self.battery_level += energy_to_charge
        self.battery_level = np.clip(self.battery_level, 0, 100)

        self.buffer_level += energy_to_store
        self.buffer_level = np.clip(self.buffer_level, 0, 100)

        # Update battery health
        self.battery_health -= charge_action * 0.2
        self.battery_health = np.clip(self.battery_health, 0, 100)

        # Calculate efficiency (Carnot limit)
        thot = self.temperature_gradient
        tcold = thot - 5
        carnot_efficiency = (thot - tcold) / thot
        net_efficiency = carnot_efficiency * load_efficiency * (1 - system_losses)

        # Calculate reward
        unmet_demand = max(self.energy_demand - delivered_energy, 0)
        reward = -unmet_demand
        reward += net_efficiency * 10
        reward -= max(0, self.battery_level - 95)

        # Calculate maximum possible reward (assumes perfect efficiency and no unmet demand)
        max_possible_reward = 0
        max_possible_reward += 10 * net_efficiency

        # Calculate regret (difference between max possible and actual reward)
        regret = max_possible_reward - reward

        # Update environment variables
        self.voltage = max(5 + random.gauss(0, 0.5), 0)
        self.temperature_gradient = max(50 + random.gauss(0, 5), 0)
        self.energy_demand = max(10 + random.gauss(0, 2), 0)

        self._log_data(net_efficiency, reward, regret, delivered_energy)

        # Check termination conditions
        terminated = self.battery_health <= 0 or self.battery_level <= 0
        truncated = self.current_step >= self.max_steps
        
        # Return observation, reward, terminated, truncated, and an empty info dictionary
        return self._get_observation(), reward, terminated, truncated, {}

    def _get_observation(self):
        return np.array([
            self.voltage, self.current, self.battery_level,
            self.buffer_level, self.temperature_gradient,
            self.energy_demand, self.battery_health
        ], dtype=np.float32) # 

    def _log_data(self, efficiency, reward, regret, delivered_energy):
        log_entry = {
            "Thot": self.temperature_gradient,
            "Power": self.voltage * self.current,
            "Qhot": self.battery_level,
            "Qcold": self.buffer_level,
            "Efficiency": efficiency * 100,
            "Reward": reward,
            "Regret": regret,
            "Battery Health": self.battery_health,
            # "Energy Demand Fulfilled": max(self.energy_demand - reward, 0),
            "Energy Demand": self.energy_demand, # Log energy demand to calculate fulfillment rate per episode
            "Energy Demand Fulfilled": delivered_energy
        }
        self.logs.append(log_entry)

    def get_logs(self):
        return pd.DataFrame(self.logs)

# --- Environment for Discrete Action Space (DQN) ---
# This is a new version of the discrete environment based on your new TEG logic.
class TEGDiscreteEnvironment(gym.Env):
    """
    A custom Gym environment for the TEG system with a discrete action space,
    designed to be compatible with DQN.
    """
    def __init__(self):
        super(TEGDiscreteEnvironment, self).__init__()
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 0, 0, 0]),
            high=np.array([10, 10, 100, 100, 100, 50, 100]),
            dtype=np.float32,
        )
        # Discrete action space: 0 = high charge, 1 = low charge, 2 = idle
        self.action_space = spaces.Discrete(3)
        self.logs = []
        self.np_random = np.random.RandomState()
        self.max_steps = 1000

        # Map discrete actions to continuous rates.
        self.action_map = {
            0: [0.8, 0.1, 0.1],  # High charge rate
            1: [0.3, 0.4, 0.3],  # Low charge rate
            2: [0.1, 0.1, 0.8]   # Idle (prioritize buffering)
        }
        self.reset()


    def reset(self, seed=None, options=None):
        if seed is not None:
            self.np_random.seed(seed)
            random.seed(seed)

        self.voltage = random.uniform(5, 7)
        self.current = self.voltage / 10
        self.battery_level = 50
        self.buffer_level = 10
        self.temperature_gradient = random.uniform(40, 60)
        self.energy_demand = 10
        self.battery_health = 100
        self.current_step = 0
        self.logs = []
        
        return self._get_observation(), {}


    def step(self, action):
        self.current_step += 1
        
        # Get the continuous action from the discrete action map
        charge_action, store_action, idle_action = self.action_map[action]
        available_energy = self.voltage * self.current

        # Loss factors and efficiencies
        charge_efficiency = 0.85
        store_efficiency = 0.80
        load_efficiency = 0.75
        system_losses = 0.1

        # Calculate energy allocations
        energy_to_charge = available_energy * charge_action * charge_efficiency
        energy_to_store = available_energy * store_action * store_efficiency
        delivered_energy = available_energy * idle_action * load_efficiency

        # Update battery and buffer levels
        self.battery_level += energy_to_charge
        self.battery_level = np.clip(self.battery_level, 0, 100)

        self.buffer_level += energy_to_store
        self.buffer_level = np.clip(self.buffer_level, 0, 100)

        # Update battery health
        self.battery_health -= charge_action * 0.2
        self.battery_health = np.clip(self.battery_health, 0, 100)

        # Calculate efficiency (Carnot limit)
        thot = self.temperature_gradient
        tcold = thot - 5
        carnot_efficiency = (thot - tcold) / thot
        net_efficiency = carnot_efficiency * load_efficiency * (1 - system_losses)

        # Calculate reward
        unmet_demand = max(self.energy_demand - delivered_energy, 0)
        reward = -unmet_demand
        reward += net_efficiency * 10
        reward -= max(0, self.battery_level - 95)

        # Calculate maximum possible reward (assumes perfect efficiency and no unmet demand)
        max_possible_reward = 0
        max_possible_reward += 10 * net_efficiency

        # Calculate regret (difference between max possible and actual reward)
        regret = max_possible_reward - reward

        # Update environment variables
        self.voltage = max(5 + random.gauss(0, 0.5), 0)
        self.temperature_gradient = max(50 + random.gauss(0, 5), 0)
        self.energy_demand = max(10 + random.gauss(0, 2), 0)

        self._log_data(net_efficiency, reward, regret, delivered_energy)

        terminated = self.battery_health <= 0 or self.battery_level <= 0
        truncated = self.current_step >= self.max_steps
        
        return self._get_observation(), reward, terminated, truncated, {}


    def _get_observation(self):
        return np.array([
            self.voltage, self.current, self.battery_level,
            self.buffer_level, self.temperature_gradient,
            self.energy_demand, self.battery_health
        ], dtype=np.float32)

    def _log_data(self, efficiency, reward, regret, delivered_energy):
        log_entry = {
            "Thot": self.temperature_gradient,
            "Power": self.voltage * self.current,
            "Qhot": self.battery_level,
            "Qcold": self.buffer_level,
            "Efficiency": efficiency * 100,
            "Reward": reward,
            "Regret": regret,
            "Battery Health": self.battery_health,
            #"Energy Demand Fulfilled": max(self.energy_demand - reward, 0)
            "Energy Demand": self.energy_demand, # Log energy demand to calculate fulfillment rate per episode
            "Energy Demand Fulfilled": delivered_energy
        }
        self.logs.append(log_entry)

    def get_logs(self):
        return pd.DataFrame(self.logs)

CUDA GPU is available. Using the GPU for training and evaluation: NVIDIA GeForce RTX 5070


In [None]:
def calculate_metrics(df):
    """Calculates key performance metrics from a DataFrame of evaluation logs."""
    metrics = {
        "Average Reward": df["Reward"].mean() if "Reward" in df.columns else None,
        "Average Battery Health": df["Battery Health"].mean() if "Battery Health" in df.columns else None,
        "Average Efficiency": df["Efficiency"].mean() if "Efficiency" in df.columns else None,
        "Average Regret": df["Regret"].mean() if "Regret" in df.columns else None,
        "Energy Fulfillment Rate": (df["Energy Demand Fulfilled"].sum() / df["Energy Demand"].sum()) * 100 if "Energy Demand" in df.columns and "Energy Demand Fulfilled" in df.columns and df["Energy Demand"].sum() > 0 else None,
    }
    return metrics

def plot_results(df: pd.DataFrame, title_prefix: str, save_path: str, max_steps: int):
    """Generates a series of plots from evaluation logs and saves them."""
    plt.style.use('seaborn-v0_8-whitegrid')

    x = [i for i in range(len(df))]

    # Plot 1: Efficiency Over Evaluation Steps
    plt.figure(figsize=(12, 8))
    plt.plot(x,  df["Efficiency"], label="Efficiency (%)", color='b', linewidth=0.5)
    plt.title(f"Efficiency Over Evaluation Steps ({title_prefix})", fontsize=16)
    plt.xlabel("Steps", fontsize=14)
    plt.ylabel("Efficiency (%)", fontsize=14)
    plt.legend(fontsize=12)
    plt.tight_layout()
    plt.savefig(os.path.join(save_path, "efficiency_plot.png"))
    plt.close()

    # Plot 2: Power vs. Temperature Gradient (remains a scatter plot)
    plt.figure(figsize=(12, 8))
    plt.scatter(df["Thot"], df["Power"], label="Power (W)", c="r", alpha=0.7, s=50)
    plt.title(f"Power vs. Temperature Gradient ({title_prefix})", fontsize=16)
    plt.xlabel("Temperature Gradient (Thot)", fontsize=14)
    plt.ylabel("Power (W)", fontsize=14)
    plt.legend(fontsize=12)
    plt.tight_layout()
    plt.savefig(os.path.join(save_path, "power_vs_temp_plot.png"))
    plt.close()

    # Plot 3: Battery and Buffer Levels Over Evaluation Steps
    plt.figure(figsize=(12, 8))
    plt.plot(x, df["Qhot"], label="Battery Level", color='g', linewidth=2)
    plt.plot(x, df["Qcold"], label="Buffer Level", color='orange', linewidth=2)
    plt.title(f"Battery and Buffer Levels Over Evaluation Steps ({title_prefix})", fontsize=16)
    plt.xlabel("Steps", fontsize=14)
    plt.ylabel("Energy Levels", fontsize=14)
    plt.legend(fontsize=12)
    plt.tight_layout()
    plt.savefig(os.path.join(save_path, "battery_buffer_levels_plot.png"))
    plt.close()

    # Plot 4: Cumulative Reward Over Evaluation Steps (remains cumulative)
    plt.figure(figsize=(12, 8))
    plt.plot([i for i in range(len(df["Reward"].cumsum()))], df["Reward"].cumsum(), label="Cumulative Reward", color='b', alpha=0.8, linewidth=2)
    plt.title(f"Cumulative Reward Over Evaluation Steps ({title_prefix})", fontsize=16)
    plt.xlabel("Steps", fontsize=14)
    plt.ylabel("Cumulative Reward", fontsize=14)
    plt.legend(fontsize=12)
    plt.tight_layout()
    plt.savefig(os.path.join(save_path, "cumulative_reward_plot.png"))
    plt.close()

    # Plot 5: Battery Health Over Evaluation Steps
    plt.figure(figsize=(12, 8))
    plt.plot(x, df["Battery Health"], label="Battery Health", color='purple', linewidth=2)
    plt.title(f"Battery Health Over Evaluation Steps ({title_prefix})", fontsize=16)
    plt.xlabel("Steps", fontsize=14)
    plt.ylabel("Battery Health (%)", fontsize=14)
    plt.legend(fontsize=12)
    plt.tight_layout()
    plt.savefig(os.path.join(save_path, "battery_health_plot.png"))
    plt.close()

    # Plot 6: Regret Over Evaluation Steps
    plt.figure(figsize=(12, 8))
    plt.plot(x, df["Regret"], label="Regret", color='r', linewidth=0.5)
    plt.fill_between(x, df["Regret"], color='red', alpha=0.4)
    plt.title(f"Regret Over Time ({title_prefix})", fontsize=16)
    plt.xlabel("Steps", fontsize=14)
    plt.ylabel("Regret", fontsize=14)
    plt.legend(fontsize=12)
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.tight_layout()
    plt.savefig(os.path.join(save_path, "regret_over_time_plot.png"))
    plt.close()

    # Plot 7: Energy Demand Fulfillment Rate Over Evaluation Steps
    # Calculate the fulfillment rate for each step
    fulfillment_rate_per_step = (df["Energy Demand Fulfilled"] / df["Energy Demand"]) * 100
    fulfillment_rate_per_step.replace([np.inf, -np.inf], np.nan, inplace=True)
    fulfillment_rate_per_step.fillna(100, inplace=True) # Assume 100% fulfillment if no demand
    
    plt.figure(figsize=(12, 8))
    plt.plot(x, fulfillment_rate_per_step, color='cyan', label='Fulfillment Rate', linewidth=0.5)
    plt.fill_between(x, fulfillment_rate_per_step, color='cyan', alpha=0.4)
    plt.title(f"Energy Demand Fulfillment Rate Over Evaluation Steps ({title_prefix})", fontsize=16)
    plt.xlabel("Steps", fontsize=14)
    plt.ylabel("Fulfillment Rate (%)", fontsize=14)
    plt.ylim(0)
    plt.legend(fontsize=12)
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.tight_layout()
    plt.savefig(os.path.join(save_path, "demand_fulfillment_rate_plot.png"))
    plt.close()

    # Plot 8: Battery Level vs. Time (Single Episode)
    single_episode_df = df.iloc[:max_steps]
    plt.figure(figsize=(12, 8))
    plt.plot(single_episode_df.index, single_episode_df["Qhot"], label="Battery Level", color='green', linewidth=2)
    plt.title(f"Battery Level Over a Single Episode ({title_prefix})", fontsize=16)
    plt.xlabel("Steps", fontsize=14)
    plt.ylabel("Battery Level", fontsize=14)
    plt.legend(fontsize=12)
    plt.tight_layout()
    plt.savefig(os.path.join(save_path, "single_episode_battery_level_plot.png"))
    plt.close()

In [None]:
def plot_comparison_results(df_tuned: pd.DataFrame, df_benchmark: pd.DataFrame, title_prefix: str, save_path: str):
    """Generates a series of comparison plots from evaluation logs and saves them."""
    plt.style.use('seaborn-v0_8-whitegrid')

    # Ensure the save path for comparison plots exists
    comparison_save_path = os.path.join(save_path, "comparison_plots")
    os.makedirs(comparison_save_path, exist_ok=True)

    # Common x-axis for time-series plots
    len_tuned = len(df_tuned)
    len_benchmark = len(df_benchmark)
    x_tuned = range(len_tuned)
    x_benchmark = range(len_benchmark)

    # Plot 1: Cumulative Reward Over Evaluation Steps
    plt.figure(figsize=(12, 8))
    plt.plot(x_tuned, df_tuned["Reward"].cumsum(), label="Tuned Model", color='blue', linewidth=2)
    plt.plot(x_benchmark, df_benchmark["Reward"].cumsum(), label="Benchmark Model", color='red', linestyle='--', linewidth=2)
    plt.title(f"Comparison: Cumulative Reward ({title_prefix})", fontsize=16)
    plt.xlabel("Steps", fontsize=14)
    plt.ylabel("Cumulative Reward", fontsize=14)
    plt.legend(fontsize=12)
    plt.tight_layout()
    plt.savefig(os.path.join(comparison_save_path, "comparison_cumulative_reward_plot.png"))
    plt.close()

    # Plot 2: Battery Health Over Evaluation Steps
    plt.figure(figsize=(12, 8))
    plt.plot(x_tuned, df_tuned["Battery Health"], label="Tuned Model", color='green', linewidth=1.5)
    plt.plot(x_benchmark, df_benchmark["Battery Health"], label="Benchmark Model", color='orange', linestyle='--', linewidth=1.5)
    plt.title(f"Comparison: Battery Health Over Steps ({title_prefix})", fontsize=16)
    plt.xlabel("Steps", fontsize=14)
    plt.ylabel("Battery Health (%)", fontsize=14)
    plt.legend(fontsize=12)
    plt.tight_layout()
    plt.savefig(os.path.join(comparison_save_path, "comparison_battery_health_plot.png"))
    plt.close()

    # Plot 3: Efficiency Over Evaluation Steps
    plt.figure(figsize=(12, 8))
    plt.plot(x_tuned, df_tuned["Efficiency"], label="Tuned Model", color='purple', linewidth=1)
    plt.plot(x_benchmark, df_benchmark["Efficiency"], label="Benchmark Model", color='brown', linestyle='--', linewidth=1)
    plt.title(f"Comparison: Efficiency Over Steps ({title_prefix})", fontsize=16)
    plt.xlabel("Steps", fontsize=14)
    plt.ylabel("Efficiency (%)", fontsize=14)
    plt.legend(fontsize=12)
    plt.tight_layout()
    plt.savefig(os.path.join(comparison_save_path, "comparison_efficiency_plot.png"))
    plt.close()
    
    # Plot 4: Energy Demand Fulfillment Rate
    fulfillment_tuned = (df_tuned["Energy Demand Fulfilled"] / df_tuned["Energy Demand"]).fillna(1) * 100
    fulfillment_benchmark = (df_benchmark["Energy Demand Fulfilled"] / df_benchmark["Energy Demand"]).fillna(1) * 100
    plt.figure(figsize=(12, 8))
    plt.plot(x_tuned, fulfillment_tuned.rolling(window=50).mean(), label="Tuned Model (50-step avg)", color='cyan', linewidth=2)
    plt.plot(x_benchmark, fulfillment_benchmark.rolling(window=50).mean(), label="Benchmark Model (50-step avg)", color='magenta', linestyle='--', linewidth=2)
    plt.title(f"Comparison: Energy Demand Fulfillment Rate ({title_prefix})", fontsize=16)
    plt.xlabel("Steps", fontsize=14)
    plt.ylabel("Fulfillment Rate (%)", fontsize=14)
    plt.ylim(0, 105)
    plt.legend(fontsize=12)
    plt.tight_layout()
    plt.savefig(os.path.join(comparison_save_path, "comparison_fulfillment_rate_plot.png"))
    plt.close()
    
    print(f"Comparison plots saved to: {comparison_save_path}")


In [88]:
HYPERPARAMETER_SPACES = {
    "PPO": lambda trial: {
        'learning_rate': trial.suggest_float('learning_rate', 1e-5, 1e-3, log=True),
        'n_steps': trial.suggest_categorical('n_steps', [512, 1024, 2048, 4096]),
        'batch_size': trial.suggest_categorical('batch_size', [64, 128, 256, 512]),
        'gamma': trial.suggest_float('gamma', 0.9, 0.999),
        'gae_lambda': trial.suggest_float('gae_lambda', 0.9, 0.99),
        'clip_range': trial.suggest_float('clip_range', 0.1, 0.4),
        'n_epochs': trial.suggest_int('n_epochs', 5, 20)
        # 'learning_rate': 0.0003, 'n_steps': 2048, 'batch_size': 64, 'ent_coef': 0.01
    },
    "A2C": lambda trial: {
        'learning_rate': trial.suggest_float('learning_rate', 1e-5, 1e-3, log=True),
        'n_steps': trial.suggest_int('n_steps', 5, 50),
        'gamma': trial.suggest_float('gamma', 0.9, 0.999),
        'gae_lambda': trial.suggest_float('gae_lambda', 0.9, 1.0),
        'ent_coef': trial.suggest_float('ent_coef', 1e-8, 1e-1, log=True),
        'vf_coef': trial.suggest_float('vf_coef', 0.1, 1.0)
    },
    "DDPG": lambda trial: {
        'learning_rate': trial.suggest_float('learning_rate', 1e-5, 1e-3, log=True),
        'buffer_size': trial.suggest_int('buffer_size', 10000, 100000),
        'learning_starts': trial.suggest_int('learning_starts', 100, 1000),
        'tau': trial.suggest_float('tau', 0.001, 0.01),
        'gamma': trial.suggest_float('gamma', 0.9, 0.999)
    },
    "SAC": lambda trial: {
        'learning_rate': trial.suggest_float('learning_rate', 1e-5, 1e-3, log=True),
        'buffer_size': trial.suggest_int('buffer_size', 10000, 100000),
        'learning_starts': trial.suggest_int('learning_starts', 100, 1000),
        'gamma': trial.suggest_float('gamma', 0.9, 0.999),
        'tau': trial.suggest_float('tau', 0.001, 0.01),
        'ent_coef': trial.suggest_float('ent_coef', 1e-8, 1e-1, log=True)
    },
    "DQN": lambda trial: {
        'learning_rate': trial.suggest_float('learning_rate', 1e-5, 1e-3, log=True),
        'buffer_size': trial.suggest_int('buffer_size', 10000, 100000),
        'learning_starts': trial.suggest_int('learning_starts', 100, 1000),
        'gamma': trial.suggest_float('gamma', 0.9, 0.999),
        'exploration_fraction': trial.suggest_float('exploration_fraction', 0.1, 0.5),
        'exploration_final_eps': trial.suggest_float('exploration_final_eps', 0.01, 0.1),
        'train_freq': trial.suggest_int('train_freq', 1, 10),
        'target_update_interval': trial.suggest_int('target_update_interval', 100, 1000)
    }
}

def objective(trial: optuna.Trial, model_name: str, timesteps: int) -> float:
    """
    Defines the objective function for Optuna to optimize a given RL model.
    It suggests hyperparameters, trains a model, and returns its average reward.
    """
    if model_name == "DQN":
        env_class = TEGDiscreteEnvironment
        policy_name = "MlpPolicy"
    else:
        env_class = TEGEnvironment
        policy_name = "MlpPolicy"
        
    hyperparams = HYPERPARAMETER_SPACES[model_name](trial)

    # PPO-specific check to ensure batch_size is a factor of n_steps
    if model_name == "PPO":
        n_steps = hyperparams['n_steps']
        batch_size = hyperparams['batch_size']
        if n_steps % batch_size != 0:
            return -np.inf # Prune this trial
    
    model_class = globals()[model_name]

    try:
        env = make_vec_env(lambda: env_class(), n_envs=1)
        
        model = model_class(
            policy_name,
            env,
            **hyperparams,
            verbose=0,
            device=device
        )

        model.learn(total_timesteps=timesteps)

        eval_env = make_vec_env(lambda: env_class(), n_envs=1)
        mean_reward, _ = evaluate_policy(model, eval_env, n_eval_episodes=10)

    except Exception as e:
        print(f"Trial for {model_name} failed with error: {e}")
        return -np.inf

    return mean_reward

In [None]:
# --- Default Hyperparameters for Benchmark Models ---
DEFAULT_HYPERPARAMS = {
    "PPO": {},
    "A2C": {},
    "DDPG": {},
    "SAC": {},
    "DQN": {}
}


In [89]:
class OptunaProgressCallback:
    """Callback to update a progress bar during Optuna optimization."""
    def __init__(self, progress_bar, desc_widget, total_trials):
        self.progress_bar = progress_bar
        self.desc_widget = desc_widget
        self.total_trials = total_trials

    def __call__(self, study, trial):
        self.progress_bar.value = trial.number + 1
        self.desc_widget.value = f'Tuning model: <b>{study.study_name.split("_")[0]}</b>, Trial {trial.number + 1}/{self.total_trials}'

class TrainingProgressCallback(BaseCallback):
    """
    Callback to update a progress bar during Stable-Baselines3 training.
    The max value is set in _on_training_start, where total_timesteps is available.
    """
    def __init__(self, progress_bar, verbose=0):
        super(TrainingProgressCallback, self).__init__(verbose)
        self.progress_bar = progress_bar

    def _on_training_start(self) -> None:
        """Called once at the beginning of training."""
        self.progress_bar.max = self.locals['total_timesteps']
        self.progress_bar.value = self.num_timesteps

    def _on_step(self) -> bool:
        """Called every step."""
        self.progress_bar.value = self.num_timesteps
        return True

In [None]:
if __name__ == "__main__":
    models_to_tune = ['PPO', 'A2C', 'DDPG', 'SAC', 'DQN']
    n_trials = 50
    timesteps_per_trial = 10000
    evaluation_timesteps = 10000

    # Create the main output directory
    output_dir = "Output"
    os.makedirs(output_dir, exist_ok=True)

    # --- Setup Overall Progress Bar ---
    overall_progress = IntProgress(min=0, max=len(models_to_tune) * 2, description='Overall Progress:')
    overall_description = HTML('Overall Progress: <b>0</b>/10 models completed.')
    overall_vbox = VBox([overall_description, overall_progress])
    display(overall_vbox)

    all_metrics = {}
    all_dfs = {}

    for i, model_name in enumerate(models_to_tune):
        all_metrics[model_name] = {}
        all_dfs[model_name] = {}
        # Create a specific directory for the current model
        model_output_dir = os.path.join(output_dir, model_name)
        os.makedirs(model_output_dir, exist_ok=True)

        overall_description.value = f'Overall Progress: Currently processing <b>{model_name} (Tuned)</b> ({i*2+1}/{len(models_to_tune)*2})'
        
        print(f"--- Starting Optuna optimization for {model_name} with {n_trials} trials ---")

        optuna_progress = IntProgress(min=0, max=n_trials, description=f'Tuning {model_name}:')
        optuna_description = HTML(f'Tuning model: <b>{model_name}</b>, Trial 0/{n_trials}')
        optuna_vbox = VBox([optuna_description, optuna_progress])
        display(optuna_vbox)

        study = optuna.create_study(
            direction='maximize',
            study_name=f'{model_name}_optimization'
        )
        # Pass the timesteps variable to the objective function
        func = lambda trial: objective(trial, model_name, timesteps_per_trial)
        study.optimize(func, n_trials=n_trials, callbacks=[OptunaProgressCallback(optuna_progress, optuna_description, n_trials)])
        
        optuna_progress.value = n_trials
        optuna_description.value = f'Tuning model: <b>{model_name}</b>, Trial {n_trials}/{n_trials} - Complete!'

        print(f"\n--- {model_name} Optimization Finished ---")
        print("Number of finished trials: ", len(study.trials))
        best_trial = study.best_trial
        best_trial_params = best_trial.params

        print("Best trial:")
        print("  Value (Average Reward): ", best_trial.value)
        print("  Params: ")
        for key, value in best_trial_params.items():
            print(f"    {key}: {value}")

        print("\n" + "="*50 + "\n")

        print(f"--- Training the final {model_name} model with best hyperparameters ---")

        training_progress = IntProgress(min=0, max=evaluation_timesteps, description=f'Training {model_name}:')
        training_description = HTML(f'Training final model: <b>{model_name}</b>')
        training_vbox = VBox([training_description, training_progress])
        display(training_vbox)

        if model_name == "DQN":
            env_class = TEGDiscreteEnvironment
            policy_name = "MlpPolicy"
        else:
            env_class = TEGEnvironment
            policy_name = "MlpPolicy"

        model_class = globals()[model_name]

        final_env = make_vec_env(lambda: env_class(), n_envs=1)
        final_model = model_class(
            policy_name,
            final_env,
            **best_trial_params,
            verbose=0,
            device=device
        )

        final_model.learn(total_timesteps=evaluation_timesteps, callback=TrainingProgressCallback(training_progress))

        training_progress.value = evaluation_timesteps
        training_description.value = f'Training final model: <b>{model_name}</b> - Complete!'

        # Save the best model
        model_save_path = os.path.join(model_output_dir, "best_model.pt")
        final_model.save(model_save_path)
        print(f"Best model saved to: {model_save_path}")

        print(f"\n--- Evaluating the final {model_name} model ---")
        eval_env_final = make_vec_env(lambda: env_class(), n_envs=1)
        
        reset_output = eval_env_final.reset()
        if isinstance(reset_output, tuple):
            obs, info = reset_output
        else:
            obs = reset_output
            info = {}
        
        all_logs = []
        for _ in range(evaluation_timesteps):
            action, _states = final_model.predict(obs, deterministic=True)
            
            step_output = eval_env_final.step(action)
            
            if len(step_output) == 5:
                obs, reward, terminated, truncated, info = step_output
            else:
                obs, reward, done, info = step_output
                terminated = done
                truncated = False

            df_logs = eval_env_final.envs[0].env.get_logs()
            if not df_logs.empty:
                all_logs.append(df_logs.iloc[-1])
            
            if terminated or truncated:
                reset_output = eval_env_final.reset()
                if isinstance(reset_output, tuple):
                    obs, info = reset_output
                else:
                    obs = reset_output
                    info = {}
        
        if all_logs:
            df_final = pd.DataFrame(all_logs)
            all_dfs[model_name]["tuned"] = df_final

            # Save the final logs to a CSV file
            logs_save_path = os.path.join(model_output_dir, "evaluation_logs.csv")
            df_final.to_csv(logs_save_path, index=False)
            print(f"Evaluation logs saved to: {logs_save_path}")

            metrics_final = calculate_metrics(df_final)
            all_metrics[model_name]["tuned"] = metrics_final

            print(f"\nFinal {model_name} Metrics:")
            for key, value in metrics_final.items():
                if value is not None:
                    print(f"{key}: {value:.4f}")
            
            # Save the plots
            plot_results(df_final, f"Final Optimized {model_name}", model_output_dir, eval_env_final.envs[0].env.max_steps)
            print(f"Evaluation plots saved to: {model_output_dir}")
        else:
            print(f"No logs were collected for {model_name} due to early termination.")

        print("\n" + "="*50 + "\n")
        
        overall_progress.value += 1
        overall_description.value = f'Overall Progress: Finished <b>{model_name} (Tuned)</b> ({i*2+1}/{len(models_to_tune)*2})'

        # --- BENCHMARK MODEL ---
        overall_description.value = f'Overall Progress: Currently processing <b>{model_name} (Benchmark)</b> ({i*2+2}/{len(models_to_tune)*2})'
        
        print(f"--- Training the benchmark {model_name} model with default hyperparameters ---")
        
        benchmark_output_dir = os.path.join(output_dir, f"{model_name}_Benchmark")
        os.makedirs(benchmark_output_dir, exist_ok=True)

        training_progress = IntProgress(min=0, max=evaluation_timesteps, description=f'Training {model_name} (Benchmark):')
        training_description = HTML(f'Training benchmark model: <b>{model_name}</b>')
        training_vbox = VBox([training_description, training_progress])
        display(training_vbox)

        benchmark_env = make_vec_env(lambda: env_class(), n_envs=1)
        benchmark_model = model_class(
            policy_name,
            benchmark_env,
            **DEFAULT_HYPERPARAMS[model_name],
            verbose=0,
            device=device
        )

        benchmark_model.learn(total_timesteps=evaluation_timesteps, callback=TrainingProgressCallback(training_progress))

        training_progress.value = evaluation_timesteps
        training_description.value = f'Training benchmark model: <b>{model_name}</b> - Complete!'

        # Save the benchmark model
        model_save_path = os.path.join(benchmark_output_dir, "benchmark_model.pt")
        benchmark_model.save(model_save_path)
        print(f"Benchmark model saved to: {model_save_path}")

        print(f"\n--- Evaluating the benchmark {model_name} model ---")
        eval_env_benchmark = make_vec_env(lambda: env_class(), n_envs=1)
        
        reset_output = eval_env_benchmark.reset()
        if isinstance(reset_output, tuple):
            obs, info = reset_output
        else:
            obs = reset_output
            info = {}
            
        all_logs = []
        for _ in range(evaluation_timesteps):
            action, _states = benchmark_model.predict(obs, deterministic=True)
            
            step_output = eval_env_benchmark.step(action)
            
            if len(step_output) == 5:
                obs, reward, terminated, truncated, info = step_output
            else:
                obs, reward, done, info = step_output
                terminated = done
                truncated = False

            df_logs = eval_env_benchmark.envs[0].env.get_logs()
            if not df_logs.empty:
                all_logs.append(df_logs.iloc[-1])
            
            if terminated or truncated:
                reset_output = eval_env_benchmark.reset()
                if isinstance(reset_output, tuple):
                    obs, info = reset_output
                else:
                    obs = reset_output
                    info = {}

        if all_logs:
            df_benchmark = pd.DataFrame(all_logs)
            all_dfs[model_name]["benchmark"] = df_benchmark

            # Save the benchmark logs to a CSV file
            logs_save_path = os.path.join(benchmark_output_dir, "benchmark_evaluation_logs.csv")
            df_benchmark.to_csv(logs_save_path, index=False)
            print(f"Benchmark evaluation logs saved to: {logs_save_path}")

            metrics_benchmark = calculate_metrics(df_benchmark)
            all_metrics[model_name]["benchmark"] = metrics_benchmark

            print(f"\nBenchmark {model_name} Metrics:")
            for key, value in metrics_benchmark.items():
                if value is not None:
                    print(f"{key}: {value:.4f}")
            
            # Save the plots
            plot_results(df_benchmark, f"Benchmark {model_name}", benchmark_output_dir, eval_env_benchmark.envs[0].env.max_steps)
            print(f"Benchmark evaluation plots saved to: {benchmark_output_dir}")
        else:
            print(f"No logs were collected for benchmark {model_name} due to early termination.")

        print("\n" + "="*50 + "\n")
        
        overall_progress.value += 1
        overall_description.value = f'Overall Progress: Finished <b>{model_name} (Benchmark)</b> ({i*2+2}/{len(models_to_tune)*2})'

        # --- Generate Comparison Plots ---
        if "tuned" in all_dfs[model_name] and "benchmark" in all_dfs[model_name]:
            plot_comparison_results(
                all_dfs[model_name]["tuned"],
                all_dfs[model_name]["benchmark"],
                model_name,
                output_dir
            )

    overall_description.value = f'Overall Progress: All {len(models_to_tune)} models completed!'
    print("\n--- All models have been processed. The script has completed. ---")


VBox(children=(HTML(value='Overall Progress: <b>0</b>/5 models completed.'), IntProgress(value=0, description=…

--- Starting Optuna optimization for PPO with 50 trials ---


VBox(children=(HTML(value='Tuning model: <b>PPO</b>, Trial 0/50'), IntProgress(value=0, description='Tuning PP…

[I 2025-08-08 14:39:26,830] A new study created in memory with name: PPO_optimization
[I 2025-08-08 14:39:57,172] Trial 0 finished with value: -9311.428436499998 and parameters: {'learning_rate': 0.0003698918825149074, 'n_steps': 4096, 'batch_size': 64, 'gamma': 0.9345577526796096, 'gae_lambda': 0.9148641183683719, 'clip_range': 0.11266113105354143, 'n_epochs': 15}. Best is trial 0 with value: -9311.428436499998.
[I 2025-08-08 14:40:15,682] Trial 1 finished with value: -8994.1130931 and parameters: {'learning_rate': 0.000436938990553852, 'n_steps': 2048, 'batch_size': 64, 'gamma': 0.9466581992922739, 'gae_lambda': 0.9753799099343455, 'clip_range': 0.34568782800139725, 'n_epochs': 5}. Best is trial 1 with value: -8994.1130931.
[I 2025-08-08 14:40:33,629] Trial 2 finished with value: -9284.945012700002 and parameters: {'learning_rate': 1.4764379768804826e-05, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.953886759437041, 'gae_lambda': 0.9748548774041051, 'clip_range': 0.1787042804335597


--- PPO Optimization Finished ---
Number of finished trials:  50
Best trial:
  Value (Average Reward):  -7011.537005900001
  Params: 
    learning_rate: 0.0008604066316960873
    n_steps: 512
    batch_size: 256
    gamma: 0.9295150242395958
    gae_lambda: 0.9626841661543701
    clip_range: 0.3905623505793241
    n_epochs: 13


--- Training the final PPO model with best hyperparameters ---


VBox(children=(HTML(value='Training final model: <b>PPO</b>'), IntProgress(value=0, description='Training PPO:…

Best model saved to: Output/PPO/best_model.pt

--- Evaluating the final PPO model ---
Evaluation logs saved to: Output/PPO/evaluation_logs.csv

Final PPO Metrics:
Average Reward: -7.1392
Average Battery Health: 100.0000
Average Efficiency: 6.8251
Average Regret: 7.8217
Energy Fulfillment Rate: 21.8949
Evaluation plots saved to: Output/PPO


--- Starting Optuna optimization for A2C with 50 trials ---


VBox(children=(HTML(value='Tuning model: <b>A2C</b>, Trial 0/50'), IntProgress(value=0, description='Tuning A2…

[I 2025-08-08 14:55:48,471] A new study created in memory with name: A2C_optimization
[I 2025-08-08 14:56:05,052] Trial 0 finished with value: -9229.933847600001 and parameters: {'learning_rate': 3.433938326839536e-05, 'n_steps': 25, 'gamma': 0.9370882484703047, 'gae_lambda': 0.9713879162852699, 'ent_coef': 9.441063000227592e-06, 'vf_coef': 0.25770411864692044}. Best is trial 0 with value: -9229.933847600001.
[I 2025-08-08 14:56:20,252] Trial 1 finished with value: -9324.2141225 and parameters: {'learning_rate': 0.0001939414707220086, 'n_steps': 48, 'gamma': 0.9409891592899519, 'gae_lambda': 0.9786975529423152, 'ent_coef': 0.00010289443206054509, 'vf_coef': 0.9863923979481194}. Best is trial 0 with value: -9229.933847600001.
[I 2025-08-08 14:56:36,331] Trial 2 finished with value: -9312.53586 and parameters: {'learning_rate': 1.0930808230032253e-05, 'n_steps': 31, 'gamma': 0.9893242253455418, 'gae_lambda': 0.9010467521923611, 'ent_coef': 0.0014721191710618854, 'vf_coef': 0.669413440191


--- A2C Optimization Finished ---
Number of finished trials:  50
Best trial:
  Value (Average Reward):  -6974.2404259
  Params: 
    learning_rate: 0.0009822653834528215
    n_steps: 11
    gamma: 0.9018261059252334
    gae_lambda: 0.9207349091931982
    ent_coef: 1.3805247094965036e-07
    vf_coef: 0.769143360103185


--- Training the final A2C model with best hyperparameters ---


VBox(children=(HTML(value='Training final model: <b>A2C</b>'), IntProgress(value=0, description='Training A2C:…

Best model saved to: Output/A2C/best_model.pt

--- Evaluating the final A2C model ---
Evaluation logs saved to: Output/A2C/evaluation_logs.csv

Final A2C Metrics:
Average Reward: -7.1789
Average Battery Health: 100.0000
Average Efficiency: 6.8247
Average Regret: 7.8614
Energy Fulfillment Rate: 21.6044
Evaluation plots saved to: Output/A2C


--- Starting Optuna optimization for DDPG with 50 trials ---


VBox(children=(HTML(value='Tuning model: <b>DDPG</b>, Trial 0/50'), IntProgress(value=0, description='Tuning D…

[I 2025-08-08 15:10:29,329] A new study created in memory with name: DDPG_optimization
[I 2025-08-08 15:11:17,723] Trial 0 finished with value: -7080.514686499999 and parameters: {'learning_rate': 0.0006359545559868068, 'buffer_size': 59484, 'learning_starts': 601, 'tau': 0.005049254628082494, 'gamma': 0.9418483000241269}. Best is trial 0 with value: -7080.514686499999.
[I 2025-08-08 15:12:05,958] Trial 1 finished with value: -7016.016583499999 and parameters: {'learning_rate': 4.5241176846644624e-05, 'buffer_size': 36407, 'learning_starts': 238, 'tau': 0.0048649185482042175, 'gamma': 0.9188743930756865}. Best is trial 1 with value: -7016.016583499999.
[I 2025-08-08 15:12:52,867] Trial 2 finished with value: -7066.707883699999 and parameters: {'learning_rate': 1.0535119158558493e-05, 'buffer_size': 94451, 'learning_starts': 511, 'tau': 0.006663484686319702, 'gamma': 0.9127118753098777}. Best is trial 1 with value: -7016.016583499999.
[I 2025-08-08 15:13:43,217] Trial 3 finished with va


--- DDPG Optimization Finished ---
Number of finished trials:  50
Best trial:
  Value (Average Reward):  -6909.856274
  Params: 
    learning_rate: 9.412968774710501e-05
    buffer_size: 66311
    learning_starts: 625
    tau: 0.0073437024888160195
    gamma: 0.9375535753039177


--- Training the final DDPG model with best hyperparameters ---


VBox(children=(HTML(value='Training final model: <b>DDPG</b>'), IntProgress(value=0, description='Training DDP…

Best model saved to: Output/DDPG/best_model.pt

--- Evaluating the final DDPG model ---
Evaluation logs saved to: Output/DDPG/evaluation_logs.csv

Final DDPG Metrics:
Average Reward: -9.3090
Average Battery Health: 100.0000
Average Efficiency: 6.8060
Average Regret: 9.9896
Energy Fulfillment Rate: 0.0000
Evaluation plots saved to: Output/DDPG


--- Starting Optuna optimization for SAC with 50 trials ---


VBox(children=(HTML(value='Tuning model: <b>SAC</b>, Trial 0/50'), IntProgress(value=0, description='Tuning SA…

[I 2025-08-08 15:51:26,444] A new study created in memory with name: SAC_optimization
[I 2025-08-08 15:53:05,321] Trial 0 finished with value: -7110.785841900001 and parameters: {'learning_rate': 0.00029524309463140123, 'buffer_size': 36897, 'learning_starts': 288, 'gamma': 0.9018592218493237, 'tau': 0.008611970989570523, 'ent_coef': 9.295132565782162e-05}. Best is trial 0 with value: -7110.785841900001.
[I 2025-08-08 15:54:35,690] Trial 1 finished with value: -7090.2311755 and parameters: {'learning_rate': 0.0007180784114380303, 'buffer_size': 81137, 'learning_starts': 776, 'gamma': 0.9356968182604202, 'tau': 0.009724506074419052, 'ent_coef': 0.014324215057871901}. Best is trial 1 with value: -7090.2311755.
[I 2025-08-08 15:56:00,755] Trial 2 finished with value: -6990.517753 and parameters: {'learning_rate': 2.8353152937301472e-05, 'buffer_size': 17440, 'learning_starts': 649, 'gamma': 0.9547376545129562, 'tau': 0.005347052470720036, 'ent_coef': 2.942978160017717e-06}. Best is trial 


--- SAC Optimization Finished ---
Number of finished trials:  50
Best trial:
  Value (Average Reward):  -6950.158318900001
  Params: 
    learning_rate: 0.00021505121545666557
    buffer_size: 44746
    learning_starts: 219
    gamma: 0.960093224402277
    tau: 0.008823414463173434
    ent_coef: 7.849174917815664e-07


--- Training the final SAC model with best hyperparameters ---


VBox(children=(HTML(value='Training final model: <b>SAC</b>'), IntProgress(value=0, description='Training SAC:…

Best model saved to: Output/SAC/best_model.pt

--- Evaluating the final SAC model ---
Evaluation logs saved to: Output/SAC/evaluation_logs.csv

Final SAC Metrics:
Average Reward: -7.0480
Average Battery Health: 100.0000
Average Efficiency: 6.8174
Average Regret: 7.7297
Energy Fulfillment Rate: 22.8305
Evaluation plots saved to: Output/SAC


--- Starting Optuna optimization for DQN with 50 trials ---


VBox(children=(HTML(value='Tuning model: <b>DQN</b>, Trial 0/50'), IntProgress(value=0, description='Tuning DQ…

[I 2025-08-08 17:09:55,875] A new study created in memory with name: DQN_optimization
[I 2025-08-08 17:10:02,877] Trial 0 finished with value: -12462.0847219 and parameters: {'learning_rate': 3.8554966439221894e-05, 'buffer_size': 95838, 'learning_starts': 802, 'gamma': 0.9493648614126189, 'exploration_fraction': 0.24383315711118325, 'exploration_final_eps': 0.08596279723202192, 'train_freq': 10, 'target_update_interval': 654}. Best is trial 0 with value: -12462.0847219.
[I 2025-08-08 17:10:10,415] Trial 1 finished with value: -12121.6643026 and parameters: {'learning_rate': 1.5187998933053223e-05, 'buffer_size': 17854, 'learning_starts': 610, 'gamma': 0.9798725582002886, 'exploration_fraction': 0.3760887360627516, 'exploration_final_eps': 0.08753848931350781, 'train_freq': 9, 'target_update_interval': 796}. Best is trial 1 with value: -12121.6643026.
[I 2025-08-08 17:10:19,054] Trial 2 finished with value: -12974.8621457 and parameters: {'learning_rate': 0.00012313943472490777, 'buffe


--- DQN Optimization Finished ---
Number of finished trials:  50
Best trial:
  Value (Average Reward):  -9983.8597712
  Params: 
    learning_rate: 0.00039846792501639156
    buffer_size: 30551
    learning_starts: 898
    gamma: 0.9704260371026998
    exploration_fraction: 0.42882447366004917
    exploration_final_eps: 0.06704927882225381
    train_freq: 6
    target_update_interval: 428


--- Training the final DQN model with best hyperparameters ---


VBox(children=(HTML(value='Training final model: <b>DQN</b>'), IntProgress(value=0, description='Training DQN:…

Best model saved to: Output/DQN/best_model.pt

--- Evaluating the final DQN model ---
Evaluation logs saved to: Output/DQN/evaluation_logs.csv

Final DQN Metrics:
Average Reward: -12.4476
Average Battery Health: 74.9739
Average Efficiency: 6.8221
Average Regret: 13.1298
Energy Fulfillment Rate: 9.9105
Evaluation plots saved to: Output/DQN



--- All models have been processed. The script has completed. ---


In [None]:
# --- Final Comparison of All Models ---

def generate_comparison_table(metrics_data):
    """Generates a formatted DataFrame to compare tuned and benchmark models."""
    comparison_list = []
    for model, types in metrics_data.items():
        if "tuned" in types and "benchmark" in types:
            tuned_metrics = types["tuned"]
            benchmark_metrics = types["benchmark"]
            
            for metric_name in tuned_metrics.keys():
                if tuned_metrics[metric_name] is not None and benchmark_metrics.get(metric_name) is not None:
                    improvement = ((tuned_metrics[metric_name] - benchmark_metrics[metric_name]) / abs(benchmark_metrics[metric_name])) * 100 if benchmark_metrics[metric_name] != 0 else float('inf')
                    comparison_list.append({
                        "Model": model,
                        "Metric": metric_name,
                        "Tuned": tuned_metrics[metric_name],
                        "Benchmark": benchmark_metrics[metric_name],
                        "Improvement (%)": improvement
                    })

    if not comparison_list:
        return pd.DataFrame()

    df_comparison = pd.DataFrame(comparison_list)
    
    # Pivot table for better readability
    df_pivot = df_comparison.pivot(index='Model', columns='Metric', values=['Tuned', 'Benchmark', 'Improvement (%)'])
    
    # Style the DataFrame for better visualization
    styled_df = df_pivot.style.background_gradient(
        cmap='RdYlGn',
        subset=[(col[0], col[1]) for col in df_pivot.columns if col[0] == 'Improvement (%)'],
        axis=1
    ).format("{:.2f}")
    
    return styled_df

print("--- Comparison of Tuned vs. Benchmark Models ---")
comparison_table = generate_comparison_table(all_metrics)
display(comparison_table)
