In [1]:
import plotly.graph_objects as go
import pandas as pd
import numpy as np

In [None]:
from package.dyna_q_agent import Dyna_Q_Agent
from package.dyna_q_plus_agent import Dyna_Q_plus_Agent
from package.q_learning_agent import Q_learning_Agent

In [32]:
def plot_steps_per_episode(*agents):
    fig = go.Figure()

    for agent in agents:
        results_file = f"results/{agent.name}_results.csv"
        agent_results_concatenated = pd.read_csv(
            results_file, index_col=["Run", "episode"]
        )

        # Calculate the average results by episode across the runs
        agent_results_average = agent_results_concatenated.groupby("episode").agg(
            "mean", numeric_only=True
        )

        steps_per_episode = agent_results_average["steps"]

        fig.add_trace(
            go.Scatter(
                x=steps_per_episode.index,
                y=steps_per_episode,
                name=agent.name,
                mode="lines",
            )
        )

    fig.update_layout(
        title="Steps per Episode (Averaged over 100 runs)",
        xaxis_title="Episode",
        yaxis_title="Steps",
        legend_title="Agent",
        # yaxis_type="log",  # Set y-axis to log scale
    )

    fig.add_shape(
        type="line",
        x0=100,
        x1=100,
        y0=1,
        y1=300,
        line=dict(color="purple", width=1, dash="dot"),
    )


    fig.show()

plot_steps_per_episode(Q_learning_Agent(), Dyna_Q_Agent(), Dyna_Q_plus_Agent())

In [36]:
def plot_average_reward(*agents):
    fig = go.Figure()

    for agent in agents:
        results_file = f"results/{agent.name}_results.csv"
        agent_results_concatenated = pd.read_csv(
            results_file, index_col=["Run", "episode"]
        )

        # Calculate the average results by episode across the runs
        num_runs = agent_results_concatenated.index.get_level_values("Run").nunique()
        agent_results_average = agent_results_concatenated.groupby("episode").agg("mean",
            numeric_only=True
        )

        # cumulative_rewards = agent_results_average["reward"].cumsum()
        average_cumulative_reward = agent_results_average / (
            num_runs + 1
        )*100  # Add 1 to include the current run
        fig.add_trace(
            go.Scatter(
                x=average_cumulative_reward.index,
                y=average_cumulative_reward.reward,
                mode="lines",
                name=agent.name,
            )
        )

    fig.update_layout(
        title="Average Reward per Episode (Averaged over 100 runs)",
        xaxis_title="Episode",
        yaxis_title="Average Cumulative Reward",
    )
    fig.show()

plot_average_reward(Q_learning_Agent(), Dyna_Q_Agent(), Dyna_Q_plus_Agent())