# ðŸ“Š Training Analysis & Metrics

Comprehensive analysis of training performance and metrics.

**Topics:**
- Training curves and convergence
- Reward analysis
- Loss tracking
- Curriculum learning effects
- Performance metrics

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from hybrid_gcs.training import OptimizedTrainer
from hybrid_gcs.utils import DataBuffer, normalize_data

print("âœ… Modules loaded!")

## Generate Training Data

In [None]:
# Simulate training data
episodes = 1000
episode_rewards = []
episode_lengths = []
episode_losses = []

# Simulate learning curve
for episode in range(episodes):
    # Reward increases with training (with noise)
    base_reward = 100 * (1 - np.exp(-episode / 200))
    reward = base_reward + np.random.randn() * 10
    episode_rewards.append(reward)
    
    # Length decreases (more efficient)
    base_length = 100 * np.exp(-episode / 300) + 20
    length = base_length + np.random.randn() * 5
    episode_lengths.append(max(length, 10))
    
    # Loss decreases
    base_loss = 1.0 * np.exp(-episode / 200)
    loss = base_loss + np.random.randn() * 0.05
    episode_losses.append(max(loss, 0))

print(f"âœ… Generated {episodes} episodes of training data")

## Training Curves

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Episode rewards
axes[0, 0].plot(episode_rewards, alpha=0.7, linewidth=1)
# Moving average
window = 50
ma_rewards = np.convolve(episode_rewards, np.ones(window)/window, mode='valid')
axes[0, 0].plot(range(window-1, episodes), ma_rewards, 'r-', linewidth=2, label=f'MA-{window}')
axes[0, 0].set_xlabel('Episode')
axes[0, 0].set_ylabel('Reward')
axes[0, 0].set_title('Episode Rewards')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Episode lengths
axes[0, 1].plot(episode_lengths, alpha=0.7, linewidth=1, color='green')
ma_lengths = np.convolve(episode_lengths, np.ones(window)/window, mode='valid')
axes[0, 1].plot(range(window-1, episodes), ma_lengths, 'darkgreen', linewidth=2)
axes[0, 1].set_xlabel('Episode')
axes[0, 1].set_ylabel('Episode Length')
axes[0, 1].set_title('Episode Lengths (Lower is Better)')
axes[0, 1].grid(True, alpha=0.3)

# Losses
axes[1, 0].plot(episode_losses, alpha=0.7, linewidth=1, color='red')
ma_losses = np.convolve(episode_losses, np.ones(window)/window, mode='valid')
axes[1, 0].plot(range(window-1, episodes), ma_losses, 'darkred', linewidth=2)
axes[1, 0].set_xlabel('Episode')
axes[1, 0].set_ylabel('Loss')
axes[1, 0].set_title('Training Loss')
axes[1, 0].grid(True, alpha=0.3)
axes[1, 0].set_yscale('log')

# Combined metrics
normalized_rewards = (np.array(episode_rewards) - np.min(episode_rewards)) / (np.max(episode_rewards) - np.min(episode_rewards))
normalized_lengths = 1 - (np.array(episode_lengths) - np.min(episode_lengths)) / (np.max(episode_lengths) - np.min(episode_lengths))
normalized_losses = 1 - (np.array(episode_losses) - np.min(episode_losses)) / (np.max(episode_losses) - np.min(episode_losses))

axes[1, 1].plot(normalized_rewards, label='Reward', linewidth=2, alpha=0.7)
axes[1, 1].plot(normalized_lengths, label='Efficiency', linewidth=2, alpha=0.7)
axes[1, 1].plot(normalized_losses, label='Training', linewidth=2, alpha=0.7)
axes[1, 1].set_xlabel('Episode')
axes[1, 1].set_ylabel('Normalized Score')
axes[1, 1].set_title('Normalized Performance Metrics')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Statistical Analysis

In [None]:
# Split training into quarters
quarter_size = len(episode_rewards) // 4

quarters = {
    'Q1': episode_rewards[:quarter_size],
    'Q2': episode_rewards[quarter_size:2*quarter_size],
    'Q3': episode_rewards[2*quarter_size:3*quarter_size],
    'Q4': episode_rewards[3*quarter_size:],
}

print("Training Progress Statistics:")
print("=" * 60)

for quarter, rewards in quarters.items():
    print(f"\n{quarter}:")
    print(f"  Mean Reward: {np.mean(rewards):.2f}")
    print(f"  Std Dev: {np.std(rewards):.2f}")
    print(f"  Min: {np.min(rewards):.2f}")
    print(f"  Max: {np.max(rewards):.2f}")
    print(f"  Median: {np.median(rewards):.2f}")

# Improvement
improvement = (np.mean(quarters['Q4']) - np.mean(quarters['Q1'])) / np.mean(quarters['Q1']) * 100
print(f"\n{'='*60}")
print(f"Overall Improvement (Q1 â†’ Q4): {improvement:.1f}%")

## Convergence Analysis

In [None]:
# Check convergence
window = 100
ma_rewards = np.convolve(episode_rewards, np.ones(window)/window, mode='valid')

# Compute velocity (rate of change)
velocity = np.diff(ma_rewards)

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Moving average
axes[0].plot(range(window-1, episodes), ma_rewards, linewidth=2)
axes[0].axhline(y=np.max(ma_rewards), color='r', linestyle='--', alpha=0.5, label='Peak')
axes[0].set_xlabel('Episode')
axes[0].set_ylabel('Reward (Moving Avg)')
axes[0].set_title(f'Learning Curve (MA-{window})')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Velocity
axes[1].plot(range(window, episodes), velocity, linewidth=1, alpha=0.7)
axes[1].axhline(y=0, color='k', linestyle='-', alpha=0.3)
axes[1].set_xlabel('Episode')
axes[1].set_ylabel('Reward Change')
axes[1].set_title('Convergence Velocity')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"Final velocity (avg last 50): {np.mean(velocity[-50:]):.4f}")
print(f"Convergence status: {'Converged' if abs(np.mean(velocity[-50:])) < 0.01 else 'Still improving'}")

## Summary

Key insights from training analysis:
- ðŸ“ˆ Learning curves show consistent improvement
- âš¡ Episode efficiency improves over time
- ðŸŽ¯ Loss decreases as expected
- ðŸ”„ Convergence is progressing well

Next: Explore visualization and deployment strategies!