In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os

In [2]:
plots_arrays = []
for i in range(1, 8):
    path = f'plots/agent{i}/plot_arrays.npz' if i != 7 else f'plots/agent1_stoch/plot_arrays.npz'
    plots_arrays.append(np.load(path))

In [18]:
def aggregate_plots(iterations, array, agents, ylabel, title, name, log_scale=False):
    plt.figure(figsize=(10, 5))  # Adjust the figure size as needed
    
    for i in range(len(agents)):
        min_values = np.min(array[i], axis=0)
        max_values = np.max(array[i], axis=0)
        avg_values = np.mean(array[i], axis=0)

        plt.fill_between(iterations, min_values, max_values, alpha=0.5, label=f'Agent {agents[i]} Min-Max Range')
        plt.plot(iterations, avg_values, label=f'Agent {agents[i]} Average')

    plt.xlabel('Time Steps')
    plt.ylabel(ylabel)
    if log_scale:
        plt.yscale('log')
    plt.title(title)
    plt.legend()
    plt.grid(True)

    path = 'plots'
    os.makedirs(path, exist_ok=True)

    filename = f'{path}/{name}.png'
    plt.savefig(filename)
    plt.close()
    print(f"Plot saved as {filename}")
    
tr_iterations = [1000 * i for i in range(1, len(plots_arrays[0]['tr_returns'][0]))]
eval_iterations = [20000 * i for i in range(1, len(plots_arrays[0]['eval_returns'][0]))]

tr_returns_arrays = [plots_arrays[i]['tr_returns'] for i in range(7)]
eval_returns_arrays = [plots_arrays[i]['eval_returns'] for i in range(7)]
eval_trajec_values_arrays = [plots_arrays[i]['eval_trajec_values'] for i in range(7)]
actor_arrays = [plots_arrays[i]['actor_losses'] for i in range(7)]
critic_arrays = [plots_arrays[i]['critic_losses'] for i in range(7)]

aggregate_plots(
    eval_iterations,
    [eval_returns_arrays[-1][:, :-1], eval_returns_arrays[1], eval_returns_arrays[2], eval_returns_arrays[3]],
    ['1_stoch', '2', '3', '4'],
    'Undiscounted Return',
    'Evaluation episodic undiscounted return for agents 1 to 4 with reward masking',
    'eval_returns',
    log_scale=False
)

aggregate_plots(
    eval_iterations,
    [eval_trajec_values_arrays[-1][:, :-1], eval_trajec_values_arrays[1], eval_trajec_values_arrays[2], eval_trajec_values_arrays[3]],
    ['1_stoch', '2', '3', '4'],
    'Critic value',
    'Mean critic value during sampled evaluation trajectories for agents 1 to 4 with reward masking',
    'eval_trajec_values',
    log_scale=False
)

Plot saved as plots/eval_returns.png
Plot saved as plots/eval_trajec_values.png
