In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from datetime import datetime
from scipy import stats

In [18]:
def find_latest_run(base_path):
    base_path = Path(base_path)
    run_dirs = [d for d in base_path.glob("run_*") if d.is_dir()]
    if not run_dirs:
        raise FileNotFoundError(f"No run directories found in {base_path}")
    return max(run_dirs, key=lambda d: datetime.strptime(d.name.replace("run_", ""), "%Y-%m-%d_%H-%M-%S"))

def load_monitor_file(monitor_dir):
    monitor_dir = Path(monitor_dir)
    csv_file = list(monitor_dir.glob("*.csv"))
    if not csv_file:
        raise FileNotFoundError(f"No CSV files found in {monitor_dir}")
    return pd.read_csv(csv_file[0], skiprows=1)

def calculate_metrics(df):
    rewards_df = df['r']
    total_timesteps = df['l'].sum()
    return {
        'mean': rewards_df.mean(),
        'std': rewards_df.std(),
        'median': rewards_df.median(),
        'max': rewards_df.max(),
        'success': np.mean(rewards_df >= 475) * 100,
        'timesteps':total_timesteps
    }

In [19]:
root = Path(__file__).resolve().parents[2] if '__file__' in globals() else Path.cwd().parents[1]
docs = root / "documentation" / "cartpole"

dqn_run = find_latest_run(docs / "dqn-cartpole")
base_run = find_latest_run(docs / "random-baseline")

dqn_df = load_monitor_file(dqn_run / "monitor")
base_df = load_monitor_file(base_run / "monitor")

comparison_dir = docs / "comparison" / f"dqn_vs_baseline_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
graphs_dir = comparison_dir / "graphs"
graphs_dir.mkdir(parents=True, exist_ok=True)

In [20]:
dqn_metrics = calculate_metrics(dqn_df)
base_metrics = calculate_metrics(base_df)
improvement = (dqn_metrics['mean'] - base_metrics['mean']) / base_metrics['mean'] * 100
t_stat, p_val = stats.ttest_ind(dqn_df['r'], base_df['r'],equal_var=False)


In [21]:
def plot_metrics(dqn_metrics, base_metrics, out):
    labels = ["Mean", "Median", "Max", "Success Rate (%)"]
    dqn_vals = [dqn_metrics['mean'], dqn_metrics['median'], dqn_metrics['max'], dqn_metrics['success']]
    base_vals = [base_metrics['mean'], base_metrics['median'], base_metrics['max'], base_metrics['success']]
    x = np.arange(len(labels)); w = 0.35
    plt.figure(figsize=(8,5))
    plt.bar(x - w/2, dqn_vals, w, label='DQN', color='tab:blue')
    plt.bar(x + w/2, base_vals, w, label='Baseline', color='tab:red')
    plt.xticks(x, labels); plt.ylabel("Value"); plt.title("Performance Metrics Comparison")
    plt.legend(); plt.grid(alpha=0.3, axis='y')
    plt.savefig(out / "metrics.png", dpi=300, bbox_inches='tight'); plt.close()

def plot_cumulative(dqn, base, out):
    plt.figure(figsize=(10,5))
    plt.plot(np.cumsum(dqn['r']), label="DQN (Cumulative)", color='tab:blue')
    plt.plot(np.cumsum(base['r']), label="Baseline (Cumulative)", color='tab:red')
    plt.title("Cumulative Rewards Over Episodes")
    plt.xlabel("Episode"); plt.ylabel("Total Reward")
    plt.legend(); plt.grid(alpha=0.3)
    plt.tight_layout()
    plt.savefig(out / "cumulative.png", dpi=300, bbox_inches='tight')
    plt.close()

def plot_overlay_with_mean(dqn, base, out):
    plt.figure(figsize=(10,5))
    plt.plot(dqn['r'].rolling(10).mean(), label="DQN (MA-10)", color='tab:blue')
    plt.plot(base['r'].rolling(10).mean(), label="Baseline (MA-10)", color='tab:red')

    plt.axhline(dqn['r'].mean(), color='blue', linestyle='--', alpha=0.5, label=f"DQN Mean ({dqn['r'].mean():.1f})")
    plt.axhline(base['r'].mean(), color='red', linestyle='--', alpha=0.5, label=f"Baseline Mean ({base['r'].mean():.1f})")

    plt.title("Moving Averages + Mean Reward Levels")
    plt.xlabel("Episode"); plt.ylabel("Reward")
    plt.legend(); plt.grid(alpha=0.3)
    plt.tight_layout()
    plt.savefig(out / "overlay_with_mean.png", dpi=300, bbox_inches='tight')
    plt.close()

In [22]:
plot_metrics(dqn_metrics, base_metrics, graphs_dir)
plot_cumulative(dqn_df, base_df, graphs_dir)
plot_overlay_with_mean(dqn_df, base_df, graphs_dir)

In [23]:
print("\n*DQN v Random Baseline Comparison*")
print(f"DQN Mean Reward: {dqn_metrics['mean']:.2f}")
print(f"Baseline Mean Reward: {base_metrics['mean']:.2f}")
print(f"Improvement: {improvement:.2f}%")
print(f"t-stat: {t_stat:.3f}, p-value: {p_val:.7f}")
print(f"DQN total timesteps: {dqn_metrics['timesteps']:,}")
print(f"Baseline total timesteps: {base_metrics['timesteps']:,}")



*DQN v Random Baseline Comparison*
DQN Mean Reward: 69.37
Baseline Mean Reward: 22.91
Improvement: 202.72%
t-stat: 16.277, p-value: 0.0000000
DQN total timesteps: 100,100
Baseline total timesteps: 4,583


In [24]:
summary_file = comparison_dir / "summary.md"
with open(summary_file, "w") as f:
    f.write(f"# DQN vs Random Baseline Comparison ({datetime.now():%Y-%m-%d %H:%M:%S})\n\n")
    f.write(f"- DQN Mean Reward: **{dqn_metrics['mean']:.2f}**\n")
    f.write(f"- Baseline Mean Reward: **{base_metrics['mean']:.2f}**\n")
    f.write(f"- Improvement: **{improvement:.2f}%**\n")
    f.write(f"- t-stat: {t_stat:.3f}\n")
    f.write(f"- p-value: {p_val:.9f}\n\n")
    f.write(f"# Uses these runs:\n")
    f.write(f"- dqn route: {dqn_run}\n")
    f.write(f"- baseline route: {base_run}\n")