# Train RL Evacuation Agent — ARGUS Phase 2

Trains a PPO agent for optimal flood evacuation choreography
using the custom Gymnasium environment.

**Owner:** Dhana · **Service:** `evacuation_rl`

In [None]:
import sys
sys.path.insert(0, '..')

import numpy as np
from services.evacuation_rl.graph import EvacuationGraph
from services.evacuation_rl.environment import EvacuationEnv
from services.evacuation_rl.agent import EvacuationAgent, HeuristicPolicy

# Create default graph
graph = EvacuationGraph()
print(f'Zones: {len(graph.zones)}')
print(f'Routes: {len(graph.routes)}')
print(f'Total population (kullu_01): {graph.total_population("kullu_01")}')
print(f'Safe capacity (kullu_01): {graph.total_safe_capacity("kullu_01")}')

## Test Environment

In [None]:
env = EvacuationEnv(graph, village_id='kullu_01', max_steps=30, initial_risk=0.5)
print(f'State dim: {env.state_dim}')
print(f'Action space: {env.n_actions} actions')
print(f'Actions: {env.actions}')

obs, info = env.reset(seed=42)
print(f'\nInitial obs shape: {obs.shape}')
print(f'Initial state: {env.get_state_summary()}')

## Evaluate Heuristic Baseline

In [None]:
agent = EvacuationAgent(graph, village_id='kullu_01')

# Evaluate heuristic policy
baseline = agent.evaluate(n_episodes=20)
print('Heuristic Baseline Performance:')
for k, v in baseline.items():
    print(f'  {k}: {v}')

## Generate Sample Evacuation Plan

In [None]:
plan = agent.generate_plan(risk_score=0.7, trigger_level='WARNING')

print(f'Plan: {plan.plan_id}')
print(f'  Village: {plan.village_id}')
print(f'  Trigger: {plan.trigger_level} (risk={plan.risk_score})')
print(f'  Total population: {plan.total_population}')
print(f'  Zones at risk: {plan.zones_at_risk}')
print(f'  Safe zones: {plan.safe_zones_available}')
print(f'  Clear time: ~{plan.estimated_clear_time_min}min')
print(f'  RL reward: {plan.rl_reward}')
print(f'\n  Actions ({len(plan.actions)}):')
for a in plan.actions:
    print(f'    P{a.priority}: {a.zone_id} → {a.recommended_route} '
          f'({a.population_to_move} people, ~{a.estimated_travel_min}min, '
          f'deadline={a.deadline_minutes}min, conf={a.confidence:.2f})')

## Train PPO Agent (if Stable-Baselines3 available)

In [None]:
try:
    from stable_baselines3 import PPO
    SB3_OK = True
except ImportError:
    SB3_OK = False
    print('Stable-Baselines3 not installed — skipping PPO training')
    print('Install with: pip install stable-baselines3')

if SB3_OK:
    print('Training PPO agent...')
    result = agent.train(total_timesteps=20000)
    print(f'Training result: {result}')
    
    # Evaluate trained agent
    trained = agent.evaluate(n_episodes=20)
    print('\nTrained PPO Performance:')
    for k, v in trained.items():
        print(f'  {k}: {v}')
    
    print(f'\nImprovement over heuristic:')
    print(f'  Reward: {baseline["mean_reward"]:.3f} → {trained["mean_reward"]:.3f}')
    print(f'  Evac ratio: {baseline["mean_evacuation_ratio"]:.3f} → {trained["mean_evacuation_ratio"]:.3f}')

## Risk Sweep: Plans at Different Risk Levels

In [None]:
risk_levels = [0.3, 0.5, 0.7, 0.85, 0.95]

print(f'{"Risk":>6s}  {"Actions":>8s}  {"Clear(min)":>10s}  {"Reward":>8s}')
print('-' * 40)

for risk in risk_levels:
    p = agent.generate_plan(risk_score=risk)
    print(f'{risk:6.2f}  {len(p.actions):8d}  {p.estimated_clear_time_min:10.0f}  {p.rl_reward:8.3f}')

## Majuli Island Scenario

In [None]:
majuli_agent = EvacuationAgent(graph, village_id='majuli_01')

# Majuli is a river island — unique challenges
majuli_plan = majuli_agent.generate_plan(risk_score=0.8, trigger_level='EMERGENCY')

print(f'Majuli Evacuation Plan:')
print(f'  Population: {majuli_plan.total_population}')
print(f'  Actions: {len(majuli_plan.actions)}')
print(f'  Clear time: ~{majuli_plan.estimated_clear_time_min}min')
for a in majuli_plan.actions:
    route = graph._route_map.get(a.recommended_route)
    road_type = route.road_type if route else 'unknown'
    print(f'    P{a.priority}: {a.zone_id} → {a.recommended_route} ({road_type}) · {a.population_to_move} people')