In [1]:
import pandas as pd
from pathlib import Path
import plotly.express as px

from plotly.subplots import make_subplots

In [19]:
csv_dir = Path("./results")
csv_files = list(csv_dir.glob("*.csv"))


dataframes = []
for file in csv_files:
    name = str(file).split("/")[-1].split(".")[0]
    df = pd.read_csv(file)
    df['id'] = name
    dataframes.append(df)

df = pd.concat(dataframes, ignore_index=True)

df['reward_100ma'] = df.groupby('id')['reward'].transform(lambda x: x.rolling(window=100).mean())
df['cumls_steps'] = df.groupby('id')['steps'].transform(lambda x: x.cumsum())
df['cumls_time'] = df.groupby('id')['time'].transform(lambda x: x.cumsum())
df['variance'] = df.groupby('id')['reward'].transform(lambda x: x.rolling(window=500).var())
df = df.dropna()
data = df

df

Unnamed: 0,episode,reward,time,steps,id,reward_100ma,cumls_steps,cumls_time,variance
499,500,290.143197,0.209580,262,duel_dqn,224.076115,332190,282.118121,29911.260306
500,501,284.665734,0.218758,265,duel_dqn,224.383592,332455,282.336879,29788.582836
501,502,247.114973,0.275629,330,duel_dqn,224.394233,332785,282.612508,29535.912099
502,503,309.163070,0.190214,231,duel_dqn,225.080038,333016,282.802722,29339.347040
503,504,254.971172,0.248549,312,duel_dqn,225.424684,333328,283.051270,28896.696053
...,...,...,...,...,...,...,...,...,...
17995,2996,248.036122,0.121844,234,dqn,267.421074,1418299,816.721640,844.541590
17996,2997,294.541150,0.121728,217,dqn,267.466951,1418516,816.843368,844.470135
17997,2998,284.795915,0.147319,275,dqn,267.448142,1418791,816.990687,843.109674
17998,2999,270.240625,0.131607,243,dqn,267.222778,1419034,817.122294,843.017440


In [20]:
# px.line(df, x='episode', y='reward_100ma', color='id', facet_col='id', facet_col_wrap=3, width=1600, height=800)


In [23]:
df = data.copy()

final_values = df.groupby('id')['reward_100ma'].last().sort_values(ascending=False)
category_orders = {'id': final_values.index.tolist()}

fig = px.line(df, x='episode', y='variance', color='id', title='Variance (500ma)', category_orders=category_orders, width=1300, height=650)
x_min = df['episode'].min()
x_max = df['episode'].max()


fig.add_annotation(
    x=x_max, y=200, text="Human Level", showarrow=False,
    xanchor="left", xshift=5, yshift=10,  
    font=dict(color="black", size=12),
    row=1, col=1
)

fig.add_hline(
    y=200,
    line=dict(color="black", width=1.5, dash="dash"),
    row=1, col=1
)

In [22]:
df = data.copy()

final_values = df.groupby('id')['reward_100ma'].last().sort_values(ascending=False)
category_orders = {'id': final_values.index.tolist()}

fig = px.line(df, x='episode', y='reward_100ma', color='id', title='Reward (100ma)', category_orders=category_orders, width=1300, height=650)
x_min = df['episode'].min()
x_max = df['episode'].max()


fig.add_annotation(
    x=x_max, y=200, text="Human Level", showarrow=False,
    xanchor="left", xshift=5, yshift=10,  
    font=dict(color="black", size=12),
    row=1, col=1
)

fig.add_hline(
    y=200,
    line=dict(color="black", width=1.5, dash="dash"),
    row=1, col=1
)

In [11]:
df = data.copy()

model_name = 'PPO'
filer = 'ppo'

df = df[df['id'].str.contains(filer)]
x_min = df[df.id.str.contains(filer)]['episode'].min()
x_max = df[df.id.str.contains(filer)]['episode'].max()

final_values = df.groupby('id')['reward_100ma'].last().sort_values(ascending=False)
category_orders = {'id': final_values.index.tolist()}

dqn_reward = px.line(df, x='episode', y='reward_100ma', color='id', 
               title="Model Performance Comparison", width=1200, height=600,
               category_orders=category_orders)



fig = make_subplots(rows=2, cols=2, 
                   subplot_titles=("Reward (100ma)", "Cumulative Steps", "Cumulative Time"),
                   vertical_spacing=0.1,
                   column_widths=[0.5, 0.5],
                   row_heights=[0.65, 0.35],  
                   specs=[[{"colspan": 2}, None], [{}, {}]])

# Fix the typo in the filter: 'dnq' should be 'dqn'
fig_ppo_reward = px.line(df, x='episode', y='reward_100ma', color='id', title=f'{model_name} Reward (100ma)', width=1000, height=500)
fig_ppo_cumls_steps = px.line(df, x='episode', y='cumls_steps', color='id', title=f'{model_name} Cumulative Steps', width=1000, height=500)
fig_ppo_cumls_time = px.line(df, x='episode', y='cumls_time', color='id', title=f'{model_name} Cumulative Time', width=1000, height=500)

used_names = set()  
for trace in dqn_reward.data:
    if trace.name in used_names:
        trace.showlegend = False
    else:
        used_names.add(trace.name)
    fig.add_trace(trace, row=1, col=1)
    
for trace in fig_ppo_cumls_steps.data:
    trace.showlegend = False
    fig.add_trace(trace, row=2, col=1)
    
for trace in fig_ppo_cumls_time.data:
    trace.showlegend = False
    fig.add_trace(trace, row=2, col=2)


fig.add_annotation(
    x=x_max, y=200, text="Human Level", showarrow=False,
    xanchor="left", xshift=5, yshift=10,  
    font=dict(color="black", size=12),
    row=1, col=1
)

fig.add_hline(
    y=200,
    line=dict(color="black", width=1.5, dash="dash"),
    row=1, col=1
)

# Update axis titles for better clarity
fig.update_xaxes(title_text="Episode", row=2, col=1)
fig.update_xaxes(title_text="Episode", row=2, col=2)
fig.update_yaxes(title_text="Cumulative Steps", row=2, col=1)
fig.update_yaxes(title_text="Cumulative Time (s)", row=2, col=2)

fig.update_layout(
    height=850,
    width=1300,
    title_text=f"{model_name} Performance Report",
    hovermode="x unified", 
    margin=dict(t=80, l=50, r=50, b=50)
)