In [645]:
import numpy as np
import matplotlib.pyplot as plt
import os

In [646]:
plots_arrays = []
for i in range(1, 8):
    path = f'plots/agent{i}/plot_arrays.npz' if i != 7 else f'plots/agent1_stoch/plot_arrays.npz'
    plots_arrays.append(np.load(path))

In [647]:
def exponential_moving_average(data, alpha=0.3):
    ema = np.zeros_like(data)
    ema[0] = data[0]
    for t in range(1, len(data)):
        ema[t] = alpha * data[t] + (1 - alpha) * ema[t - 1]
    return ema

In [648]:
def aggregate_plots(tr_iterations,eval_iterations, array, agents, ylabel, title, name, log_scale=False , smoothing = False, plot_eval=False):
    plt.figure(figsize=(10, 5))  # Adjust the figure size as needed
    
    
        
    if smoothing:
        # Apply smoothing
        min_values = np.min(array[0], axis=0)
        max_values = np.max(array[0], axis=0)
        avg_values = np.mean(array[0], axis=0)
        
        alpha = 0.1 
        
        smoothed_min_values = exponential_moving_average(min_values, alpha)
        smoothed_max_values = exponential_moving_average(max_values, alpha)
        smoothed_avg_values = exponential_moving_average(avg_values, alpha)
    
        adjusted_iterations = tr_iterations[:len(smoothed_avg_values)]
    
        #plt.fill_between(tr_iterations, min_values, max_values, alpha=0.3, color='lightblue', label='Min-Max Range (Raw)')
        plt.plot(tr_iterations, avg_values, label='Average (Raw)', color='steelblue')
    
        plt.fill_between(adjusted_iterations, smoothed_min_values, smoothed_max_values, alpha=0.5, color='lightsalmon', label='Min-Max Range (Smoothed)')
        plt.plot(adjusted_iterations, smoothed_avg_values, label='Average (Smoothed)', color='red')
        
        
        #plt.plot(adjusted_iterations, smoothed_min_values, label='Min (Smoothed)', color='red', linestyle='dashed')
        #plt.plot(adjusted_iterations, smoothed_max_values, label='Max (Smoothed)', color='red', linestyle='dashed')
        if plot_eval:

            
            min_values = np.min(array[1], axis=0)
            max_values = np.max(array[1], axis=0)
            avg_values = np.mean(array[1], axis=0)
            plt.fill_between(eval_iterations, min_values, max_values, alpha=0.5, label=f'Agent {agents[1]} Min-Max Range', color='thistle')
            plt.plot(eval_iterations, avg_values, label=f'Agent {agents[1]} Average', color='violet', linestyle='-.')
    else:
        for i in range(len(agents)):
            min_values = np.min(array[i], axis=0)
            max_values = np.max(array[i], axis=0)
            avg_values = np.mean(array[i], axis=0)
    
            plt.fill_between(eval_iterations, min_values, max_values, alpha=0.5, label=f'Agent {agents[i]} Min-Max Range')
            plt.plot(eval_iterations, avg_values, label=f'Agent {agents[i]} Average')

    plt.xlabel('Time Steps')
    plt.ylabel(ylabel)
    if log_scale:
        plt.yscale('log')
    plt.title(title)
    plt.legend()
    plt.grid(True)

    path = 'plots'
    os.makedirs(path, exist_ok=True)

    filename = f'{path}/{name}.png'
    plt.savefig(filename)
    plt.close()
    print(f"Plot saved as {filename}")

tr_iterations = [1000 * i for i in range(1, len(plots_arrays[0]['tr_returns'][0]))]
eval_iterations = [20000 * i for i in range(1, len(plots_arrays[0]['eval_returns'][0]))]

tr_returns_arrays = [plots_arrays[i]['tr_returns'] for i in range(7)]
eval_returns_arrays = [plots_arrays[i]['eval_returns'] for i in range(7)]
eval_trajec_values_arrays = [plots_arrays[i]['eval_trajec_values'] for i in range(7)]
actor_arrays = [plots_arrays[i]['actor_losses'] for i in range(7)]
critic_arrays = [plots_arrays[i]['critic_losses'] for i in range(7)]



In [649]:
aggregate_plots(
    tr_iterations,
    eval_iterations,
    [eval_trajec_values_arrays[-1][:, :-1], eval_trajec_values_arrays[1], eval_trajec_values_arrays[2], eval_trajec_values_arrays[3]],
    ['1_stoch', '2', '3', '4'],
    'Critic value',
    None,
    'eval_trajec_values_1_2_3_4',
    log_scale=False
)

Plot saved as plots/eval_trajec_values_1_2_3_4.png


In [650]:
aggregate_plots(
    tr_iterations,
    eval_iterations,
    [eval_trajec_values_arrays[4][:, :-1], eval_trajec_values_arrays[5]],
    ['5', '6'],
    'Critic value',
    None,
    'eval_trajec_values_5_6',
    log_scale=False
)

Plot saved as plots/eval_trajec_values_5_6.png


In [651]:
aggregate_plots(
    tr_iterations,
    eval_iterations,
    [eval_trajec_values_arrays[0][:,:24]],
    ['1'],
    'Critic value',
    None,
    'eval_trajec_values_1',
    log_scale=False
)

Plot saved as plots/eval_trajec_values_1.png


In [652]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [actor_arrays[0][:,:499]],
                ['1'],
                'Actor loss',
                None,
                'actor_loss_1',
                log_scale=False,
                smoothing=True)           

Plot saved as plots/actor_loss_1.png


In [653]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [actor_arrays[-1][:,:499]],
                ['1 stoch'],
                'Actor loss',
                None,
                'actor_loss_1_stoch',
                log_scale=False,
                smoothing=True)

Plot saved as plots/actor_loss_1_stoch.png


In [654]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [actor_arrays[1][:,:499]],
                ['2'],
                'Actor loss',
                None,
                'actor_loss_2',
                log_scale=False,
                smoothing=True) 

Plot saved as plots/actor_loss_2.png


In [655]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [actor_arrays[2][:,:499]],
                ['3'],
                'Actor loss',
                None,
                'actor_loss_3',
                log_scale=False,
                smoothing=True) 

Plot saved as plots/actor_loss_3.png


In [656]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [actor_arrays[3][:,:499]],
                ['4'],
                'Actor loss',
                None,
                'actor_loss_4',
                log_scale=False,
                smoothing=True)

Plot saved as plots/actor_loss_4.png


In [657]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [actor_arrays[4][:,:499]],
                ['5'],
                'Actor loss',
                None,
                'actor_loss_5',
                log_scale=False,
                smoothing=True)

Plot saved as plots/actor_loss_5.png


In [658]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [actor_arrays[5][:,:499]],
                ['6'],
                'Actor loss',
                None,
                'actor_loss_6',
                log_scale=False,
                smoothing=True)

Plot saved as plots/actor_loss_6.png


In [659]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [critic_arrays[0][:,:499]],
                ['1'],
                'Critic loss',
                None,
                'critic_loss_1',
                log_scale=True,
                smoothing=True)

Plot saved as plots/critic_loss_1.png


In [660]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [critic_arrays[1][:,:499]],
                ['2'],
                'Critic loss',
                None,
                'critic_loss_2',
                log_scale=True,
                smoothing=True)

Plot saved as plots/critic_loss_2.png


In [661]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [critic_arrays[2][:,:499]],
                ['3'],
                'Critic loss',
                None,
                'critic_loss_3',
                log_scale=True,
                smoothing=True)

Plot saved as plots/critic_loss_3.png


In [662]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [critic_arrays[3][:,:499]],
                ['4'],
                'Critic loss',
                None,
                'critic_loss_4',
                log_scale=True,
                smoothing=True)

Plot saved as plots/critic_loss_4.png


In [663]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [critic_arrays[4][:,:499]],
                ['5'],
                'Critic loss',
                None,
                'critic_loss_5',
                log_scale=True,
                smoothing=True)

Plot saved as plots/critic_loss_5.png


In [None]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [critic_arrays[5][:,:499]],
                ['6'],
                'Critic loss',
                None,
                'critic_loss_6',
                log_scale=True,
                smoothing=True)

In [None]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [critic_arrays[6][:,:499]],
                ['1_stoch'],
                'Critic loss',
                None,
                'critic_loss_1_stoch',
                log_scale=True,
                smoothing=True)

In [None]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [tr_returns_arrays[0][:,:499], eval_returns_arrays[0][:,:24]],
                ['1 training', '1 evaluation'],
                'Undiscounted return',
                None,
                'tr_return_1',
                log_scale=False,
                smoothing=True,
                plot_eval=True
                )

In [None]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [tr_returns_arrays[1][:,:499], eval_returns_arrays[1][:,:24]],
                ['2 training', '2 evaluation'],
                'Undiscounted return',
                None,
                'tr_return_2',
                log_scale=False,
                smoothing=True,
                plot_eval=True
                )

In [None]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [tr_returns_arrays[2][:,:499], eval_returns_arrays[2][:,:24]],
                ['3 stoch training', '3 stoch evaluation'],
                'Undiscounted return',
                None,
                'tr_return_3',
                log_scale=False,
                smoothing=True,
                plot_eval=True
                )

In [None]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [tr_returns_arrays[3][:,:499], eval_returns_arrays[3][:,:24]],
                ['4 training', '4 evaluation'],
                'Undiscounted return',
                None,
                'tr_return_4',
                log_scale=False,
                smoothing=True,
                plot_eval=True
                )

In [None]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [tr_returns_arrays[4][:,:499], eval_returns_arrays[4][:,:24]],
                ['5 training', '5 evaluation'],
                'Undiscounted return',
                None,
                'tr_return_5',
                log_scale=False,
                smoothing=True,
                plot_eval=True
                )

In [None]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [tr_returns_arrays[5][:,:499], eval_returns_arrays[5][:,:24]],
                ['6 training', '6 evaluation'],
                'Undiscounted return',
                None,
                'tr_return_6',
                log_scale=False,
                smoothing=True,
                plot_eval=True
                )

In [None]:
aggregate_plots(tr_iterations,
                eval_iterations,
                [tr_returns_arrays[6][:,:499], eval_returns_arrays[6][:,:24]],
                ['1 stoch training', '1 stoch evaluation'],
                'Undiscounted return',
                None,
                'tr_return_1_stoch',
                log_scale=False,
                smoothing=True,
                plot_eval=True
                )