In [14]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import numpy as np

In [15]:
env_name = "CartPole-v1"
model_type = "parallel_ger"
df = pd.read_csv(f"../logs/{env_name}_{model_type}.csv", index_col=0)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,5110,5111,5112,5113,5114,5115,5116,5117,5118,5119
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.0,13.0,50.0,21.0,10.0,13.0,50.0,21.0,10.0,13.0
1,13.0,36.0,46.0,24.0,13.0,36.0,46.0,24.0,13.0,36.0,...,30.0,32.0,24.0,19.0,30.0,32.0,24.0,19.0,30.0,32.0
2,23.0,15.0,17.0,22.0,23.0,15.0,17.0,22.0,23.0,15.0,...,35.0,15.0,22.0,40.0,35.0,15.0,22.0,40.0,35.0,15.0
3,19.0,13.0,20.0,30.0,19.0,13.0,20.0,30.0,19.0,13.0,...,38.0,27.0,16.0,25.0,38.0,27.0,16.0,25.0,38.0,27.0
4,17.0,50.0,11.0,101.0,17.0,20.0,11.0,101.0,17.0,20.0,...,37.0,28.0,87.0,27.0,37.0,28.0,87.0,27.0,37.0,28.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
971,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,...,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0
972,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,...,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0
973,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,...,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0
974,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,...,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0


In [16]:
def plot_performance(env_name:str, model_type:str) -> None:
    df = pd.read_csv(f"../logs/{env_name}_{model_type}.csv", index_col=0)
    n_steps_per_batch = 128
    n_parallel_runs_per_batch = 4
    n_agents = int(df.shape[1] / (n_steps_per_batch * n_parallel_runs_per_batch))

    avg_rewards = df.mean(axis=1)
    std_rewards = df.std(axis=1)

    x = df.index
    upper_bound = avg_rewards + std_rewards
    lower_bound = avg_rewards - std_rewards

    fig = go.Figure()

    fig.add_trace(
        go.Scatter(
            x=x,
            y=avg_rewards,
            mode="lines",
            name=f"Average return per episode",
        )
    )

    fig.add_trace(
        go.Scatter(
            x=np.concatenate([x, x[::-1]]),  # x, then x reversed
            y=np.concatenate(
                [upper_bound, lower_bound[::-1]]
            ),  # upper, then lower reversed
            fill="toself",
            fillcolor="rgba(141,20,198,0.2)",
            line=dict(color="rgba(255,255,255,0)"),
            hoverinfo="skip",
            showlegend=False,
        )
    )

    fig.update_layout(
        title=f"Returns over {df.shape[0]} episodes, averaged across {n_agents} agents",
        xaxis_title="X Axis",
        yaxis_title="Y Axis",
        legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
    )

    fig.show()

In [17]:
def compare_models(env_name: str, model_1: str, model_2: str = "base") -> None:
    fig = go.Figure()

    df1 = pd.read_csv(f"../logs/{env_name}_{model_1}.csv", index_col=0)
    df2 = pd.read_csv(f"../logs/{env_name}_{model_2}.csv", index_col=0)
    n_steps_per_batch = 128
    n_parallel_runs_per_batch = 4
    n_agents = int(df.shape[1] / (n_steps_per_batch * n_parallel_runs_per_batch))

    avg_rewards1 = df1.mean(axis=1)
    avg_rewards2 = df2.mean(axis=1)

    fig.add_trace(
        go.Scatter(
            x=df1.index,
            y=avg_rewards1,
            mode="lines",
            name=f"{model_1}",
        )
    )
    fig.add_trace(
        go.Scatter(
            x=df2.index,
            y=avg_rewards2,
            mode="lines",
            name=f"{model_2}",
        )
    )

    fig.update_layout(
        title=f"Returns over {df.shape[0]} episodes, averaged across {n_agents} agents",
        xaxis_title="X Axis",
        yaxis_title="Y Axis",
        legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
    )

    fig.show()

In [18]:
plot_performance("CartPole-v1", "parallel_ger")

In [19]:
plot_performance("CartPole-v1", "base")

In [20]:
compare_models("CartPole-v1", "parallel_ger")