# 03 â€” Agent Benchmark

Quick benchmark of PPO vs MCP-PPO vs DQN vs MAPPO on a short run.

In [None]:
import sys
from pathlib import Path
import asyncio
import numpy as np
import matplotlib.pyplot as plt

PROJECT_ROOT = Path.cwd().parent if Path.cwd().name == 'notebooks' else Path.cwd()
sys.path.insert(0, str(PROJECT_ROOT))

from config.simulation_config import SimulationConfig
from simulation.environment import SwarmEnvironment
from rl_agents.ppo_agent import PPOAgent
from rl_agents.context_aware_agent import ContextAwareAgent

In [None]:
async def run_ppo_baseline(episodes=5):
    config = SimulationConfig()
    config.render = False
    env = SwarmEnvironment(config, mcp_server_url=None)
    env.mcp_connected = False
    state_dim = env.observation_space.shape[0] // config.num_uavs
    action_dim = env.action_space.shape[0] // config.num_uavs
    agents = [PPOAgent(f'ppo_{i}', state_dim, action_dim, {
        'learning_rate': 3e-4, 'gamma': 0.99, 'batch_size': 64,
        'buffer_size': 10000, 'action_scale': 2.0
    }) for i in range(config.num_uavs)]
    rewards = []
    for ep in range(episodes):
        obs, _ = env.reset()
        ep_rew = 0
        while True:
            acts = []
            for i, a in enumerate(agents):
                sd = len(obs) // len(agents)
                acts.extend(a.select_action(obs[i*sd:(i+1)*sd]))
            obs, r, term, trunc, info = env.step(np.array(acts))
            ep_rew += r
            if term or trunc:
                break
        rewards.append(ep_rew)
    env.close()
    return rewards

rewards = asyncio.run(run_ppo_baseline(5))
print('PPO baseline rewards:', rewards)

In [None]:
fig, ax = plt.subplots(figsize=(8, 4))
ax.plot(rewards, 'o-', label='PPO Baseline')
ax.set_xlabel('Episode')
ax.set_ylabel('Reward')
ax.set_title('Agent Benchmark (PPO)')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(PROJECT_ROOT / 'results' / '03_agent_benchmark.png', dpi=150)
plt.show()