In [None]:
import os
from collections import defaultdict

import matplotlib.pyplot as plt
from matplotlib import rcParams

import pandas as pd

from tensorboard.backend.event_processing.event_accumulator import EventAccumulator

In [None]:
# Define a scaling factor
scale_factor = 1.5

# Update default font sizes by multiplying with the scaling factor
rcParams['font.size'] *= scale_factor       # Default font size

In [None]:
training_seed = 1
env_id = "CARLPendulum"
methods = ["vanilla", "stacked", "oracle", "oracle_stacked", "osi", "dm"]
method_names = {
    "vanilla": "Vanilla",
    "stacked": "Stacked",
    "oracle": "Oracle",
    "oracle_stacked": "Oracle + Stacked",
    "osi": "OSI",
    "dm": "DM"
}

In [None]:
env_config_id = "default"

steps = defaultdict(list)
values = defaultdict(list)

for method in methods:
    checkpoint_directory = f"../runs/training/seed_{training_seed}/{env_id}/{env_config_id}/{method}"
    if not os.path.exists(checkpoint_directory):
        continue
    filenames = os.listdir(checkpoint_directory)
    for filename in filenames:
        if filename.startswith("event"):
            event_filename = filename
            break
    event_filename = os.path.join(checkpoint_directory, event_filename)

    event_acc = EventAccumulator(event_filename)
    event_acc.Reload()

    # print(event_acc.Tags())

    for e in event_acc.Scalars("charts/episodic_return"):
        steps[method].append(e.step)
        values[method].append(e.value)

# Plot setup
plt.figure(figsize=(12, 6))

# Combine into a dictionary
data_dict = values.copy()
data_dict["training_steps"] = steps["vanilla"]

# Convert dictionary to DataFrame
data = pd.DataFrame(data_dict)

# Apply a rolling average to smooth the reward curves
window_size = 10  # Adjust the window size to your preference
for method in values:
    data[method] = data[method].rolling(window=window_size).mean()
    data[method + '_std'] = data[method].rolling(window=window_size).std()

# Plot each method
for method in values:
    plt.plot(data['training_steps'], data[method], label=method_names[method], alpha=0.8, linewidth=2)
    # Shaded region for variance
    plt.fill_between(
        data['training_steps'],
        data[method] - data[method + '_std'],
        data[method] + data[method + '_std'],
        alpha=0.2
    )

# Labels and Title
plt.xlabel('Training Steps')
plt.ylabel('Episode Reward')
plt.title('Training Reward')

# Legend
plt.legend()

# Show plot
os.makedirs(f"output", exist_ok=True)
plt.savefig(f'output/{env_id}_{env_config_id}.pdf')
plt.show()

In [None]:
env_config_id = "train"

steps = defaultdict(list)
values = defaultdict(list)

for method in methods:
    checkpoint_directory = f"../runs/training/seed_{training_seed}/{env_id}/{env_config_id}/{method}"
    if not os.path.exists(checkpoint_directory):
        continue
    filenames = os.listdir(checkpoint_directory)
    for filename in filenames:
        if filename.startswith("event"):
            event_filename = filename
            break
    event_filename = os.path.join(checkpoint_directory, event_filename)

    event_acc = EventAccumulator(event_filename)
    event_acc.Reload()

    # print(event_acc.Tags())

    for e in event_acc.Scalars("charts/episodic_return"):
        steps[method].append(e.step)
        values[method].append(e.value)

# Plot setup
plt.figure(figsize=(12, 6))

# Combine into a dictionary
data_dict = values.copy()
data_dict["training_steps"] = steps["vanilla"]

# Convert dictionary to DataFrame
data = pd.DataFrame(data_dict)

# Apply a rolling average to smooth the reward curves
window_size = 10  # Adjust the window size to your preference
for method in values:
    data[method] = data[method].rolling(window=window_size).mean()
    data[method + '_std'] = data[method].rolling(window=window_size).std()

# Plot each method
for method in values:
    plt.plot(data['training_steps'], data[method], label=method_names[method], alpha=0.8, linewidth=2)
    # Shaded region for variance
    plt.fill_between(
        data['training_steps'],
        data[method] - data[method + '_std'],
        data[method] + data[method + '_std'],
        alpha=0.2
    )

# Labels and Title
plt.xlabel('Training Steps')
plt.ylabel('Episode Reward')
plt.title('Training Reward')

# Legend
plt.legend()

# Show plot
plt.savefig(f'output/{env_id}_{env_config_id}.pdf')
plt.show()