In [None]:
import os
from collections import defaultdict

import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib.patches import Rectangle

import numpy as np
import pandas as pd

from tensorboard.backend.event_processing.event_accumulator import EventAccumulator

# Define a scaling factor
scale_factor = 1.5

# Update default font sizes by multiplying with the scaling factor
rcParams['font.size'] *= scale_factor       # Default font size

In [None]:
training_seed = 1
test_seed = 101
env_id = "CARLPendulum"
test_env_config_ids = [f"test_{i}" for i in range(1, 25+1)]
c1_values = [0.1, 0.5, 1.0, 1.5, 2.0]
c2_values = [0.1, 0.5, 1.0, 1.5, 2.0]

In [None]:
mean_er = defaultdict(list)
mean_er_std = defaultdict(list)
min_er = defaultdict(list)
worst_k_mean_er = defaultdict(list)
worst_k_mean_er_std = defaultdict(list)

for checkpoint_idx in range(5000, 50001, 5000):
    print(checkpoint_idx)

    return_means = defaultdict(list)
    returns = defaultdict(list)

    env_config_id = "train"

    for method in ["stacked", "stacked_bdr"]:
        for test_env_config_id in test_env_config_ids:
            checkpoint_directory = f"../../runs/test/seed_{training_seed}/{env_id}/{test_env_config_id}/{env_config_id}/{method}/checkpoint_{checkpoint_idx}"
            if not os.path.exists(checkpoint_directory):
                continue
            filenames = os.listdir(checkpoint_directory)
            for filename in filenames:
                if filename.startswith("event"):
                    event_filename = filename
                    break
            event_filename = os.path.join(checkpoint_directory, event_filename)

            event_acc = EventAccumulator(event_filename)
            event_acc.Reload()

            for e in event_acc.Scalars(f"charts/checkpoint_{checkpoint_idx}/episodic_return"):
                # checkpoint_idxs[method].append(e.step)
                returns[method].append(e.value)

            for e in event_acc.Scalars(f"evaluation/seed_{test_seed}/episodic_return_mean"):
                # checkpoint_idxs[method].append(e.step)
                return_means[method].append(e.value)
        
    for method in returns:
        if method == 'optimal':
            continue
        
        print(method)
        k = 0.1
        worst_k = int(len(returns[method])*k)
        worst_k_returns = sorted(returns[method])[:worst_k]

        print(f'Mean Episodic Reward: {np.mean(return_means[method])} +- {np.std(return_means[method])}')
        print(f'Minimum Episodic Reward: {np.min(return_means[method])}')
        print(f'Mean Episodic Reward (Worst {int(k*100)}%): {np.mean(worst_k_returns)} +- {np.std(worst_k_returns)}')
        print('='*40)

        mean_er[method].append(np.mean(return_means[method]))
        mean_er_std[method].append(np.std(return_means[method]))
        min_er[method].append(np.min(return_means[method]))
        worst_k_mean_er[method].append(np.mean(worst_k_returns))
        worst_k_mean_er_std[method].append(np.std(worst_k_returns))
    print('='*40)

for method in mean_er:
    mean_er[method] = np.array(mean_er[method])
    mean_er_std[method] = np.array(mean_er_std[method])
    min_er[method] = np.array(min_er[method])
    worst_k_mean_er[method] = np.array(worst_k_mean_er[method])
    worst_k_mean_er_std[method] = np.array(worst_k_mean_er_std[method])

In [None]:
import matplotlib.pyplot as plt
import numpy as np

methods = ['Baseline', 'BDR']
markers = ['o', 's']
metrics = ['Minimum Episodic Return', 'Mean Episodic Return (Worst 10%)', 'Mean Episodic Return']

fig, axs = plt.subplots(1, 3, figsize=(14, 6))

for j, method in enumerate(['stacked', 'stacked_bdr']):
    x = np.arange(1, len(mean_er[method])+1)

    mean_er[method][:2] = np.nan
    mean_er_std[method][:2] = np.nan
    min_er[method][:2] = np.nan
    worst_k_mean_er[method][:2] = np.nan
    worst_k_mean_er_std[method][:2] = np.nan

    # Minimum Episodic Return
    axs[0].plot(x, min_er[method], marker=markers[j], label=methods[j])
    
    # Mean Episodic Return (Worst 10%)
    axs[1].plot(x, worst_k_mean_er[method], marker=markers[j], label=methods[j])
    # axs[1].fill_between(np.arange(len(mean_er[method])), worst_k_mean_er[method] - worst_k_mean_er_std[method], worst_k_mean_er[method] + worst_k_mean_er_std[method], alpha=0.2)

    # Mean Episodic Return
    axs[2].plot(x, mean_er[method], marker=markers[j], label=methods[j])
    # axs[2].fill_between(np.arange(len(mean_er[method])), mean_er[method] - mean_er_std[method], mean_er[method] + mean_er_std[method], alpha=0.2)
    
    for i in range(3):
        axs[i].legend(loc=4)
        axs[i].set_xlabel('Training Time', fontsize=18)
        axs[i].set_ylabel('Episodic Return', fontsize=18)
        axs[i].set_title(metrics[i])
        axs[i].set_xlim(1-0.2, len(x)+0.2)
        if i == 2:
            axs[i].set_ylim(-850, -350)
        else:
            axs[i].set_ylim(-1550, -1050)
        axs[i].set_xticks(x)
        axs[i].axvline(x=3, linestyle='--', color='gray')

fig.suptitle('Pendulum - Checkpoint Evaluation Performance Comparison')
fig.tight_layout(rect=[0, 0.03, 1, 0.95])

plt.savefig('output/CARLPendulum/checkpoint_evaluation.pdf', bbox_inches='tight')
plt.show()

In [None]:
training_seed = 1
test_seed = 101
env_id = "CARLDmcWalkerEnv"
test_env_config_ids = [f"test_{i}" for i in range(1, 25+1)]
c1_values = [0.1, 0.5, 1.0, 1.5, 2.0]
c2_values = [1, 5, 10, 15, 20]

In [None]:
mean_er = defaultdict(list)
mean_er_std = defaultdict(list)
min_er = defaultdict(list)
worst_k_mean_er = defaultdict(list)
worst_k_mean_er_std = defaultdict(list)

for checkpoint_idx in range(50000, 500001, 50000):
    print(checkpoint_idx)

    return_means = defaultdict(list)
    returns = defaultdict(list)

    env_config_id = "train"

    for method in ["stacked", "stacked_bdr"]:
        for test_env_config_id in test_env_config_ids:
            checkpoint_directory = f"../../runs/test/seed_{training_seed}/{env_id}/{test_env_config_id}/{env_config_id}/{method}/checkpoint_{checkpoint_idx}"
            if not os.path.exists(checkpoint_directory):
                continue
            filenames = os.listdir(checkpoint_directory)
            for filename in filenames:
                if filename.startswith("event"):
                    event_filename = filename
                    break
            event_filename = os.path.join(checkpoint_directory, event_filename)

            event_acc = EventAccumulator(event_filename)
            event_acc.Reload()

            for e in event_acc.Scalars(f"charts/checkpoint_{checkpoint_idx}/episodic_return"):
                # checkpoint_idxs[method].append(e.step)
                returns[method].append(e.value)

            for e in event_acc.Scalars(f"evaluation/seed_{test_seed}/episodic_return_mean"):
                # checkpoint_idxs[method].append(e.step)
                return_means[method].append(e.value)
        
    for method in returns:
        if method == 'optimal':
            continue
        
        print(method)
        k = 0.1
        worst_k = int(len(returns[method])*k)
        worst_k_returns = sorted(returns[method])[:worst_k]

        print(f'Mean Episodic Reward: {np.mean(return_means[method])} +- {np.std(return_means[method])}')
        print(f'Minimum Episodic Reward: {np.min(return_means[method])}')
        print(f'Mean Episodic Reward (Worst {int(k*100)}%): {np.mean(worst_k_returns)} +- {np.std(worst_k_returns)}')
        print('='*40)

        mean_er[method].append(np.mean(return_means[method]))
        mean_er_std[method].append(np.std(return_means[method]))
        min_er[method].append(np.min(return_means[method]))
        worst_k_mean_er[method].append(np.mean(worst_k_returns))
        worst_k_mean_er_std[method].append(np.std(worst_k_returns))
    print('='*40)

for method in mean_er:
    mean_er[method] = np.array(mean_er[method])
    mean_er_std[method] = np.array(mean_er_std[method])
    min_er[method] = np.array(min_er[method])
    worst_k_mean_er[method] = np.array(worst_k_mean_er[method])
    worst_k_mean_er_std[method] = np.array(worst_k_mean_er_std[method])

In [None]:
import matplotlib.pyplot as plt
import numpy as np

methods = ['Baseline', 'BDR']
markers = ['o', 's']
metrics = ['Minimum Episodic Return', 'Mean Episodic Return (Worst 10%)', 'Mean Episodic Return']

fig, axs = plt.subplots(1, 3, figsize=(14, 6))

for j, method in enumerate(['stacked', 'stacked_bdr']):
    x = np.arange(1, len(mean_er[method])+1)

    # Minimum Episodic Return
    axs[0].plot(x, min_er[method], marker=markers[j], label=methods[j])
    
    # Mean Episodic Return (Worst 10%)
    axs[1].plot(x, worst_k_mean_er[method], marker=markers[j], label=methods[j])
    # axs[1].fill_between(np.arange(len(mean_er[method])), worst_k_mean_er[method] - worst_k_mean_er_std[method], worst_k_mean_er[method] + worst_k_mean_er_std[method], alpha=0.2)

    # Mean Episodic Return
    axs[2].plot(x, mean_er[method], marker=markers[j], label=methods[j])
    # axs[2].fill_between(np.arange(len(mean_er[method])), mean_er[method] - mean_er_std[method], mean_er[method] + mean_er_std[method], alpha=0.2)
    
    for i in range(3):
        axs[i].legend(loc=4)
        axs[i].set_xlabel('Training Time', fontsize=18)
        axs[i].set_ylabel('Episodic Return', fontsize=18)
        axs[i].set_title(metrics[i])
        axs[i].set_xlim(1-0.2, len(x)+0.2)
        if i == 2:
            axs[i].set_ylim(1, 950)
        else:
            axs[i].set_ylim(1, 950)
        axs[i].set_xticks(x)
        axs[i].axvline(x=3, linestyle='--', color='gray')

fig.suptitle('Walker - Checkpoint Evaluation Performance Comparison')
fig.tight_layout(rect=[0, 0.03, 1, 0.95])

plt.savefig('output/CARLDmcWalkerEnv/checkpoint_evaluation.pdf', bbox_inches='tight')
plt.show()