# 03 â€” RL Agent Analysis

Visualise PPO agent behaviour: augmentation ratios over time,
reward trajectory, policy/value losses.

In [None]:
import sys, os
os.chdir('/content/amers')
sys.path.insert(0, '.')

from pathlib import Path
import numpy as np
import torch
import matplotlib.pyplot as plt

DRIVE_BASE = Path('/content/drive/MyDrive/AMERS')
OUT = DRIVE_BASE / 'outputs'

In [None]:
results = torch.load(OUT / 'training_results.pt', map_location='cpu')
rl = results.get('rl', {})

if rl:
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    axes[0, 0].plot(rl['aug_ratio'])
    axes[0, 0].set_title('Augmentation Ratio Over Steps')
    axes[0, 0].set_ylabel('Ratio')
    
    axes[0, 1].plot(rl['val_acc'])
    axes[0, 1].set_title('Validation Accuracy')
    axes[0, 1].set_ylabel('Accuracy')
    
    axes[1, 0].plot(rl['reward'])
    axes[1, 0].set_title('Reward Per Step')
    axes[1, 0].set_ylabel('Reward')
    axes[1, 0].set_xlabel('Step')
    
    axes[1, 1].plot(rl['policy_loss'], label='Policy')
    axes[1, 1].plot(rl['value_loss'], label='Value')
    axes[1, 1].set_title('PPO Losses')
    axes[1, 1].legend()
    axes[1, 1].set_xlabel('Step')
    
    plt.tight_layout()
    plt.savefig(str(OUT / 'rl_analysis.png'), dpi=150)
    plt.show()
    
    print(f'Best val acc: {max(rl["val_acc"]):.4f} at step {np.argmax(rl["val_acc"])+1}')
    print(f'Mean aug ratio: {np.mean(rl["aug_ratio"]):.3f}')
else:
    print('No RL results found. Run training first.')