In [None]:
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import torchvision
from tqdm.auto import tqdm

from read_all_as_df import read_all_as_df

In [None]:
pd.options.mode.chained_assignment = None
sns.set_style("darkgrid")

In [None]:
PATH = os.path.join("runs", "keep", "paper_runs")

df_runs = read_all_as_df(PATH)
df_runs

In [None]:
print(*df_runs["run_name"].unique(), sep=", ")
print(*df_runs["metric"].unique(), sep=", ")

In [None]:
df_runs["original_run_name"] = df_runs["run_name"]

In [None]:
def parse_run_name(name: str):
    parts = name.split("_")[:-1]
    if len(parts) > 1:
        parts[0] = parts[0].upper()
        parts[1] = parts[1][0].upper() + parts[1][1:]
    else:
        parts[0] = parts[0][0].upper() + parts[0][1:]
    if len(parts) > 2:
        parts[2] = parts[2].upper()
    return " ".join(parts)

In [None]:
df_runs["run_name"] = (
    df_runs["original_run_name"].map(parse_run_name)
)
df_runs["agent_type"] = df_runs["run_name"].str.split(" ").apply(lambda l: l[0]).astype("category")
df_runs["network_type"] = df_runs["run_name"].str.split(" ").apply(lambda l: l[min(len(l) - 1, 1)]).astype("category")
df_runs["has_lstm"] = df_runs["run_name"].str.contains("LSTM")
df_runs["memory"] = df_runs["agent"].map(lambda a: "Reward Prioritized Memory" if a == "pursuer_1" else "Normal Memory")

df_runs

In [None]:
# Truncate steps
min_max_steps = min(df_runs["original_run_name"].str.split("_").map(lambda l: int(l[-1][:-3])))
df_runs.query("step <= @min_max_steps", inplace=True)

In [None]:
print(*df_runs["run_name"].unique(), sep=", ")

# QNN vs A2C vs DDPG vs Controls vs Random

In [None]:
to_plot = (
    df_runs.query("metric == 'test_total_reward'")
    .query("agent_type != 'CPT'")
    .query("run_name != 'DDPG Distance CPT'")
)
to_plot["agent_type"] = to_plot["agent_type"].cat.remove_unused_categories()

means = to_plot.groupby(by="agent_type")["value"].mean()
print(means)

to_plot["smoothed_value"] = to_plot["value"].ewm(alpha=1 - 0.65).mean()
plot = sns.lineplot(data=to_plot, x="step", y="smoothed_value", hue="agent_type")
plot.set_xlabel("Episode")
plot.set_ylabel("Reward")
plot.set_title("Rewards by Agent")
plot.legend(title="Agent Type")
plot.figure.savefig("rewards-by-agent.eps", format="eps")

# RPM vs Normal Memory

In [None]:
to_plot = (
    df_runs.query("metric == 'loss' or metric == 'actor_loss'")
    .query("agent_type != 'CPT'")
    .query("run_name != 'DDPG Distance CPT'")
)
to_plot["agent_type"] = to_plot["agent_type"].cat.remove_unused_categories()

to_plot["Smoothed Loss"] = (
    to_plot.groupby(by=["agent_type", "memory"])["value"]
    .ewm(alpha=1 - 0.999)
    .mean()
    .reset_index()
    .set_index("level_2")
    .drop(columns=["agent_type", "memory"])
)

for type_ in to_plot["agent_type"].unique():
    plot = sns.lineplot(
        data=to_plot.query("agent_type == @type_"),
        x="step",
        y="Smoothed Loss",
        hue="memory",
    )
    plot.set_xlabel("Episode")
    plot.set_ylabel("(Actor) Loss")
    plot.set_title("Loss by Memory Type")
    plot.legend(title="Memory Type")
    plot.figure.savefig(f"loss-by-memory-{type_}.eps", format="eps")
    plt.close()

# Simple vs Distance vs Simple LSTM vs Distance LSTM

In [None]:
to_plot = (
    df_runs.query("metric == 'test_total_reward'")
    .query("agent_type != 'CPT'")
    .query("run_name != 'DDPG Distance CPT'")
    .query("agent_type != 'Controls'")
    .query("agent_type != 'Random'")
)
to_plot["agent_type"] = to_plot["agent_type"].cat.remove_unused_categories()
to_plot["network_type"] = to_plot["network_type"].cat.remove_unused_categories()

to_plot["type"] = to_plot["agent_type"].astype(str) + " " + to_plot["network_type"].astype(str) + to_plot["has_lstm"].map(
    lambda lstm: " LSTM" if lstm else ""
)

means = to_plot.groupby(by=["type"])["value"].mean()
print(means)
to_plot["smoothed_value"] = to_plot["value"].ewm(alpha=1 - 0.999).mean()

for type_ in to_plot["agent_type"].unique():
    plot = sns.lineplot(
        data=to_plot.query("agent_type == @type_"),
        x="step",
        y="smoothed_value",
        hue="type",
    )
    plot.set_xlabel("Episode")
    plot.set_ylabel("Reward")
    plot.set_title("Rewards by Agent and Network Type")
    plot.legend(title="Architecture")
    plot.figure.savefig(f"rewards-by-architecture-{type_}.eps", format="eps")
    plt.close()

# DDPG vs CPT DDPG

In [None]:
to_plot = (
    df_runs.query("metric == 'test_total_reward'")
    .query("run_name == 'DDPG Distance CPT' or run_name == 'DDPG Distance'")
)
to_plot["run_name"] = to_plot["run_name"].cat.remove_unused_categories()

means = to_plot.groupby(by="run_name")["value"].mean()
print(means)

to_plot["smoothed_value"] = to_plot["value"].ewm(alpha=1 - 0.999).mean()
plot = sns.lineplot(data=to_plot, x="step", y="smoothed_value", hue="run_name")
plot.set_xlabel("Episode")
plot.set_ylabel("Reward")
plot.set_title("Controls Policy Trainer DDPG vs DDPG Reward")
plot.legend(title="Agent Type")
plot.figure.savefig("rewards-by-agent-cpt.eps", format="eps");

In [None]:
to_plot = (
    df_runs.query("metric == 'actor_loss'")
    .query("run_name == 'DDPG Distance CPT' or run_name == 'DDPG Distance'")
)
to_plot["run_name"] = to_plot["run_name"].cat.remove_unused_categories()

to_plot["smoothed_value"] = to_plot["value"]
plot = sns.lineplot(data=to_plot, x="step", y="smoothed_value", hue="run_name")
plot.set_xlabel("Episode")
plot.set_ylabel("(Actor) Loss")
plot.set_title("Controls Policy Trainer DDPG vs DDPG Loss")
plot.legend(title="Agent Type")
plot.figure.savefig("loss-by-agent-cpt.eps", format="eps");