In [16]:
%%bash
tensorboard --logdir ./sb3_logs/A2C_PongNoFrameskip-v4_10800/

TensorFlow installation not found - running with reduced feature set.
Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.17.0 at http://localhost:6010/ (Press CTRL+C to quit)


Error while terminating subprocess (pid=19529): 


In [6]:
import pandas as pd
import numpy as np
import json
import glob
import os
from tensorboard.backend.event_processing import event_accumulator

In [7]:
def load_sb3_data(tensorboard_log_dir, json_path, algorithm_name, duration, color, algorithm_type):
    """
    Load SB3 implementation data from TensorBoard logs.
    Calculates the total training time across multiple event files by using the min and max timestamps.
    Adjusts total training time to match expected duration if within a tolerance of 1 minute.
    Adds wall times for each episode reward as Episode Times.
    """
    # Define expected durations in seconds
    duration_map = {'3h': 3 * 3600, '6h': 6 * 3600, '9h': 9 * 3600}
    expected_duration = duration_map.get(duration, None)

    event_files = glob.glob(os.path.join(tensorboard_log_dir, '**', 'events.*'), recursive=True)
    rewards = []
    rewards_timesteps = []
    rewards_times = []  # To store wall times for episode rewards
    losses = []
    losses_timesteps = []
    all_timestamps = []

    # Load evaluation data from JSON
    with open(json_path, 'r') as f:
        data = json.load(f)

    # Process each event file to extract relevant metrics and timestamps
    for event_file in event_files:
        ea = event_accumulator.EventAccumulator(event_file)
        ea.Reload()

        # Rewards
        if 'rollout/ep_rew_mean' in ea.Tags().get('scalars', []):
            events = ea.Scalars('rollout/ep_rew_mean')
            rewards.extend([e.value for e in events])
            rewards_timesteps.extend([e.step for e in events])
            rewards_times.extend([e.wall_time for e in events])  # Collect wall times for rewards
            all_timestamps.extend([e.wall_time for e in events])

        # Losses
        if 'train/loss' in ea.Tags().get('scalars', []):
            events = ea.Scalars('train/loss')
            losses.extend([e.value for e in events])
            losses_timesteps.extend([e.step for e in events])
            all_timestamps.extend([e.wall_time for e in events])

    # Calculate total training time using the earliest and latest timestamps across all files
    if all_timestamps:
        calculated_training_time = max(all_timestamps) - min(all_timestamps)
        # Adjust training time to match expected duration if within tolerance
        if expected_duration and abs(calculated_training_time - expected_duration) <= 60:
            total_training_time = expected_duration  # Use expected duration if close enough
        else:
            total_training_time = calculated_training_time  # Use calculated time otherwise
    else:
        total_training_time = None

    # Calculate average timesteps per episode
    if len(rewards_timesteps) > 1:
        diffs = np.diff(rewards_timesteps)
        avg_timesteps = np.mean(diffs).tolist()
    else:
        avg_timesteps = []

    # Create DataFrame
    df = pd.DataFrame({
        'Algorithm Name': [algorithm_name],
        'Duration': [duration],
        'Type': [algorithm_type],
        'Color': [color],
        'Episode Rewards': [rewards],
        'Episode Times': [rewards_times],  # Add wall times for each reward
        'Episode Timesteps': [rewards_timesteps],
        'Losses': [losses],
        'Loss Times': [None],  # Keeping Loss Times empty for now
        'Loss Timesteps': [losses_timesteps],
        'Total Training Time': [total_training_time],  # Adjusted total training time in seconds
        'Total Episodes': [len(rewards)],
        'Average Timesteps per Episode': [avg_timesteps],
        'Mean Evaluation Reward': [data.get('mean_reward', None)],
        'Std Evaluation Reward': [data.get('std_reward', None)]
    })

    return df



In [9]:
# Initialize an empty list to hold all dataframes
dfs = []

# Specify the algorithms you want to load
algorithms = [
    {'type': 'sb3', 'name': 'DQN_SB3_3h', 'log_dir': './sb3_logs/DQN_PongNoFrameskip-v4_10800/', 'json_path': './metrics/sb3_eval/dqn_model_10800/metrics.json', 'duration': '3h', 'color': '#80DEEA'}, 
    {'type': 'sb3', 'name': 'A2C_SB3_3h', 'log_dir': './sb3_logs/A2C_PongNoFrameskip-v4_10800/', 'json_path': './metrics/sb3_eval/a2c_model_10800/metrics.json', 'duration': '3h', 'color': '#F8BBD0'},
    {'type': 'sb3', 'name': 'PPO_SB3_3h', 'log_dir': './sb3_logs/PPO_PongNoFrameskip-v4_10800/', 'json_path': './metrics/sb3_eval/ppo_model_10800/metrics.json', 'duration': '3h', 'color': '#C5E1A5'},
    {'type': 'sb3', 'name': 'DQN_SB3_6h', 'log_dir': './sb3_logs/DQN_PongNoFrameskip-v4_21600/', 'json_path': './metrics/sb3_eval/dqn_model_21600/metrics.json', 'duration': '6h', 'color': '#00ACC1'},
    {'type': 'sb3', 'name': 'A2C_SB3_6h', 'log_dir': './sb3_logs/A2C_PongNoFrameskip-v4_21600/', 'json_path': './metrics/sb3_eval/a2c_model_21600/metrics.json', 'duration': '6h', 'color': '#EC407A'},
    {'type': 'sb3', 'name': 'PPO_SB3_6h', 'log_dir': './sb3_logs/PPO_PongNoFrameskip-v4_21600/', 'json_path': './metrics/sb3_eval/ppo_model_21600/metrics.json', 'duration': '6h', 'color': '#7CB342'},
    {'type': 'sb3', 'name': 'DQN_SB3_9h', 'log_dir': './sb3_logs/DQN_PongNoFrameskip-v4_32400/', 'json_path': './metrics/sb3_eval/dqn_model_32400/metrics.json', 'duration': '9h', 'color': '#00838F'},
    {'type': 'sb3', 'name': 'A2C_SB3_9h', 'log_dir': './sb3_logs/A2C_PongNoFrameskip-v4_32400/', 'json_path': './metrics/sb3_eval/a2c_model_32400/metrics.json', 'duration': '9h', 'color': '#AD1457'},
    {'type': 'sb3', 'name': 'PPO_SB3_9h', 'log_dir': './sb3_logs/PPO_PongNoFrameskip-v4_32400/', 'json_path': './metrics/sb3_eval/ppo_model_32400/metrics.json', 'duration': '9h', 'color': '#33691E'},
]

# Load data for each algorithm
for algo in algorithms:
    df = load_sb3_data(
        tensorboard_log_dir=algo['log_dir'], 
        json_path=algo['json_path'],
        algorithm_name=algo['name'], 
        duration=algo['duration'], 
        color=algo['color'], 
        algorithm_type='SB3'
    )
    dfs.append(df)

# Concatenate all DataFrames into a single DataFrame
data = pd.concat(dfs, ignore_index=True)

TensorFlow installation not found - running with reduced feature set.
Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.17.0 at http://localhost:6008/ (Press CTRL+C to quit)


Error while terminating subprocess (pid=18683): 


W1026 08:03:02.026597 6147665920 plugin_event_multiplexer.py:267] Deleting accumulator '.'
