# Imports

In [None]:
import os
import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
from glob import glob

# Load Data

In [None]:
# Determine the run ids by the directories in the current location
run_ids = [name for name in os.listdir("./") if os.path.isdir(os.path.join("./", name))]
# Retrieve the paths to the tensorboard summaries
data_paths = {}
for id in run_ids:
    data_paths[id] = glob(os.path.join(id, "*", "*"))

In [None]:
# Check available keys
summary = EventAccumulator(data_paths[run_ids[0]][0])
summary.Reload()
print(summary.scalars.Keys())

In [None]:
# Initialize Tensorboard EventAccumulators
data_summaries = {}
for key in data_paths:
    data_summaries[key] = [EventAccumulator(path) for path in data_paths[key]]

In [None]:
# Load data into a nested dictionary
desired_tags = summary.scalars.Keys()[:-3] # -3 removes the data for the decaying hyperparameters

steps = {} # steps have to be saved for each summary tag, because some might not be of the same length
data = {}
for tag in desired_tags:
    data[tag] = {}
    for run_id in data_summaries.keys():
        data[tag][run_id] = []
        for run in data_summaries[run_id]:
            run.Reload()
            _, t, values = zip(*run.Scalars(tag))
            data[tag][run_id].append(values)
        data[tag][run_id] = np.asarray(data[tag][run_id])
        steps[tag] = np.asarray(t)

# Process data

In [None]:
# Function to calculate the asymmetric standard deviation
def asymmetric_std(data):
  mean = np.mean(data)

  x_up = np.where(data >= mean)[0] # returns indices
  x_up = data[x_up.tolist()]
  k = x_up.shape[0]

  x_down = np.where(data <= mean)[0] # returns indices
  x_down = data[x_down.tolist()]
  l = x_down.shape[0]

  std_up = np.sqrt((1/(k)) * np.sum((x_up-mean)**2))
  std_down = np.sqrt((1/(l)) * np.sum((x_down-mean)**2))

  return std_up, std_down

In [None]:
# Process the data by computing several outputs for each run id
# This aggregates all training runs for one run id
# The leaves of the nested dictionary are numpy arrays now
desired_outputs = ["mean", "std"]

results = {}
for tag in desired_tags:
    results[tag] = {}
    for run_id in run_ids:
        results[tag][run_id] = {}
        for output in desired_outputs:
            if output == "mean":
                results[tag][run_id][output] = np.mean(data[tag][run_id], axis=0)
            elif output == "std":
                results[tag][run_id][output] = np.std(data[tag][run_id], axis=0)
            elif output == "astd":
                std_up, std_down = asymmetric_std(data[tag][run_id])
                results[tag][run_id][output] = (std_up, std_down)

# Plotting

In [None]:
def plot(data, steps, tag, run_ids, save = False, title = "default"):
    plt.rcParams.update({'font.size': 12})
    fig, ax = plt.subplots()
    fig.set_size_inches(7, 5.5)
    for run_id in run_ids:
        ax.plot(steps[tag], data[tag][run_id]["mean"])
        ax.fill_between(steps[tag], data[tag][run_id]["mean"] - data[tag][run_id]["std"], data[tag][run_id]["mean"] + data[tag][run_id]["std"], alpha=0.3)
    ax.set_xlabel("PPO Updates")
    ax.set_ylabel(tag)
    ax.set_title(title)
    plt.legend(run_ids)
    if save:
        plt.savefig(title + ".pdf")
    else:
        plt.show()

# Plot Standard Deviation

## Mean Reward

In [None]:
plot(results, steps, "episode/reward_mean", run_ids, save=False, title="Training - Mean Reward and Std")

## Mean Length

In [None]:
plot(results, steps, "episode/length_mean", run_ids, save=False, title="Training - Mean Length and Std")

## Plot all tags to file

In [None]:
for tag in desired_tags:
    title = tag.replace("/", "-")
    plot(results, steps, tag, run_ids, save=True, title=title)