# Baseline Agent Testing

This notebook tests the two baseline agents:
1. **Shortest Path (A*)**: Deterministic optimal pathfinding
2. **Random Agent**: Uniformly random action selection

These baselines establish performance bounds for RL agents.

In [None]:
import sys
sys.path.append('..')

import numpy as np
import matplotlib.pyplot as plt
from core.environment import SnakeEnv
from scripts.baselines.shortest_path import ShortestPathAgent
from scripts.baselines.random_agent import RandomAgent

## Test Configuration

In [None]:
# Test parameters
GRID_SIZE = 10
N_EPISODES = 100
MAX_STEPS = 1000
SEED = 42

## Helper Functions

In [None]:
def test_agent(env, agent, n_episodes=100, max_steps=1000, seed=42):
    """
    Test agent over multiple episodes
    
    Returns:
        scores, rewards, lengths
    """
    scores = []
    rewards = []
    lengths = []
    
    for episode in range(n_episodes):
        obs, info = env.reset(seed=seed + episode)
        total_reward = 0
        
        for step in range(max_steps):
            action = agent.get_action(env)
            obs, reward, terminated, truncated, info = env.step(action)
            total_reward += reward
            
            if terminated or truncated:
                break
        
        scores.append(info['score'])
        rewards.append(total_reward)
        lengths.append(step + 1)
    
    return np.array(scores), np.array(rewards), np.array(lengths)

## Test 1: Shortest Path Agent (Absolute Actions)

In [None]:
print("Testing Shortest Path Agent with Absolute Actions...")

env_astar_abs = SnakeEnv(
    grid_size=GRID_SIZE,
    action_space_type='absolute',
    state_representation='feature',
    max_steps=MAX_STEPS
)

agent_astar_abs = ShortestPathAgent(action_space_type='absolute')

scores_astar_abs, rewards_astar_abs, lengths_astar_abs = test_agent(
    env_astar_abs, agent_astar_abs, N_EPISODES, MAX_STEPS, SEED
)

print(f"\nA* (Absolute) Results:")
print(f"  Avg Score: {scores_astar_abs.mean():.2f} +/- {scores_astar_abs.std():.2f}")
print(f"  Max Score: {scores_astar_abs.max()}")
print(f"  Avg Reward: {rewards_astar_abs.mean():.2f} +/- {rewards_astar_abs.std():.2f}")
print(f"  Avg Length: {lengths_astar_abs.mean():.2f} +/- {lengths_astar_abs.std():.2f}")

## Test 2: Shortest Path Agent (Relative Actions)

In [None]:
print("Testing Shortest Path Agent with Relative Actions...")

env_astar_rel = SnakeEnv(
    grid_size=GRID_SIZE,
    action_space_type='relative',
    state_representation='feature',
    max_steps=MAX_STEPS
)

agent_astar_rel = ShortestPathAgent(action_space_type='relative')

scores_astar_rel, rewards_astar_rel, lengths_astar_rel = test_agent(
    env_astar_rel, agent_astar_rel, N_EPISODES, MAX_STEPS, SEED
)

print(f"\nA* (Relative) Results:")
print(f"  Avg Score: {scores_astar_rel.mean():.2f} +/- {scores_astar_rel.std():.2f}")
print(f"  Max Score: {scores_astar_rel.max()}")
print(f"  Avg Reward: {rewards_astar_rel.mean():.2f} +/- {rewards_astar_rel.std():.2f}")
print(f"  Avg Length: {lengths_astar_rel.mean():.2f} +/- {lengths_astar_rel.std():.2f}")

## Test 3: Random Agent (Absolute Actions)

In [None]:
print("Testing Random Agent with Absolute Actions...")

env_rand_abs = SnakeEnv(
    grid_size=GRID_SIZE,
    action_space_type='absolute',
    state_representation='feature',
    max_steps=MAX_STEPS
)

agent_rand_abs = RandomAgent(action_space_type='absolute', seed=SEED)

scores_rand_abs, rewards_rand_abs, lengths_rand_abs = test_agent(
    env_rand_abs, agent_rand_abs, N_EPISODES, MAX_STEPS, SEED
)

print(f"\nRandom (Absolute) Results:")
print(f"  Avg Score: {scores_rand_abs.mean():.2f} +/- {scores_rand_abs.std():.2f}")
print(f"  Max Score: {scores_rand_abs.max()}")
print(f"  Avg Reward: {rewards_rand_abs.mean():.2f} +/- {rewards_rand_abs.std():.2f}")
print(f"  Avg Length: {lengths_rand_abs.mean():.2f} +/- {lengths_rand_abs.std():.2f}")

## Test 4: Random Agent (Relative Actions)

In [None]:
print("Testing Random Agent with Relative Actions...")

env_rand_rel = SnakeEnv(
    grid_size=GRID_SIZE,
    action_space_type='relative',
    state_representation='feature',
    max_steps=MAX_STEPS
)

agent_rand_rel = RandomAgent(action_space_type='relative', seed=SEED)

scores_rand_rel, rewards_rand_rel, lengths_rand_rel = test_agent(
    env_rand_rel, agent_rand_rel, N_EPISODES, MAX_STEPS, SEED
)

print(f"\nRandom (Relative) Results:")
print(f"  Avg Score: {scores_rand_rel.mean():.2f} +/- {scores_rand_rel.std():.2f}")
print(f"  Max Score: {scores_rand_rel.max()}")
print(f"  Avg Reward: {rewards_rand_rel.mean():.2f} +/- {rewards_rand_rel.std():.2f}")
print(f"  Avg Length: {lengths_rand_rel.mean():.2f} +/- {lengths_rand_rel.std():.2f}")

## Comparison Plots

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Score comparison
agents = ['A* (Abs)', 'A* (Rel)', 'Rand (Abs)', 'Rand (Rel)']
score_means = [
    scores_astar_abs.mean(),
    scores_astar_rel.mean(),
    scores_rand_abs.mean(),
    scores_rand_rel.mean()
]
score_stds = [
    scores_astar_abs.std(),
    scores_astar_rel.std(),
    scores_rand_abs.std(),
    scores_rand_rel.std()
]

axes[0].bar(agents, score_means, yerr=score_stds, capsize=5)
axes[0].set_ylabel('Average Score')
axes[0].set_title('Score Comparison')
axes[0].tick_params(axis='x', rotation=45)

# Reward comparison
reward_means = [
    rewards_astar_abs.mean(),
    rewards_astar_rel.mean(),
    rewards_rand_abs.mean(),
    rewards_rand_rel.mean()
]
reward_stds = [
    rewards_astar_abs.std(),
    rewards_astar_rel.std(),
    rewards_rand_abs.std(),
    rewards_rand_rel.std()
]

axes[1].bar(agents, reward_means, yerr=reward_stds, capsize=5)
axes[1].set_ylabel('Average Total Reward')
axes[1].set_title('Reward Comparison')
axes[1].tick_params(axis='x', rotation=45)

# Episode length comparison
length_means = [
    lengths_astar_abs.mean(),
    lengths_astar_rel.mean(),
    lengths_rand_abs.mean(),
    lengths_rand_rel.mean()
]
length_stds = [
    lengths_astar_abs.std(),
    lengths_astar_rel.std(),
    lengths_rand_abs.std(),
    lengths_rand_rel.std()
]

axes[2].bar(agents, length_means, yerr=length_stds, capsize=5)
axes[2].set_ylabel('Average Episode Length')
axes[2].set_title('Episode Length Comparison')
axes[2].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.savefig('../results/figures/baseline_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

print("\nBaseline comparison plot saved to results/figures/baseline_comparison.png")

## Summary

### Expected Results:

1. **A* Agent (Both Action Spaces)**:
   - Should achieve high scores (10-30 depending on grid complexity)
   - Deterministic optimal pathfinding
   - Performance upper bound for RL agents

2. **Random Agent (Both Action Spaces)**:
   - Very low scores (0-2 typically)
   - Short episode lengths
   - Performance lower bound

3. **Action Space Comparison**:
   - Absolute and relative should perform similarly for A*
   - Random may vary slightly between action spaces

### Next Steps:

- RL agents should aim to exceed random agent performance quickly
- Goal is to approach or match A* performance
- These baselines will be used for comparison in all training notebooks