In [None]:
"""
Visualization script for training logs
Visualizes data from episode_data JSON and step_rewards numpy files
"""

import json
import os
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path


def load_training_logs(logs_dir="Training/Logs"):
    """
    Load training log files
    
    Args:
        logs_dir: Directory containing the log files
        
    Returns:
        tuple: (episode_data dict, step_rewards array)
    """
    logs_path = Path(logs_dir)
    
    # Find episode data JSON file
    json_files = list(logs_path.glob("episode_data_*.json"))
    if not json_files:
        raise FileNotFoundError(f"No episode_data JSON file found in {logs_dir}")
    
    # Use the most recent JSON file if multiple exist
    episode_data_file = sorted(json_files)[-1]
    
    # Load episode data
    with open(episode_data_file, 'r') as f:
        episode_data = json.load(f)
    
    # Load step rewards
    step_rewards_file = logs_path / "step_rewards.npy"
    if not step_rewards_file.exists():
        raise FileNotFoundError(f"step_rewards.npy not found in {logs_dir}")
    
    step_rewards = np.load(step_rewards_file)
    
    print(f"Loaded episode data from: {episode_data_file.name}")
    print(f"Loaded step rewards from: {step_rewards_file.name}")
    print(f"Total timesteps: {episode_data.get('total_timesteps', len(step_rewards))}")
    print(f"Step rewards shape: {step_rewards.shape}")
    
    return episode_data, step_rewards


def visualize_training_logs(episode_data, step_rewards, save_figures=True, output_dir="Training/Logs"):
    """
    Create visualizations for training logs
    
    Args:
        episode_data: Dictionary containing episode-level data
        step_rewards: NumPy array of step rewards
        save_figures: Whether to save figures to disk
        output_dir: Directory to save figures
    """
    # Create figure with subplots
    fig = plt.figure(figsize=(15, 10))
    
    # 1. Step rewards over time
    ax1 = plt.subplot(2, 2, 1)
    steps = np.arange(len(step_rewards))
    ax1.plot(steps, step_rewards, alpha=0.6, linewidth=0.5, color='blue', label='Step Reward')
    
    # Add moving average
    window_size = min(100, len(step_rewards) // 10)
    if window_size > 1:
        moving_avg = np.convolve(step_rewards, np.ones(window_size)/window_size, mode='valid')
        moving_avg_steps = steps[window_size-1:]
        ax1.plot(moving_avg_steps, moving_avg, linewidth=2, color='red', 
                label=f'Moving Average (window={window_size})')
    
    ax1.set_xlabel('Step', fontsize=12)
    ax1.set_ylabel('Reward', fontsize=12)
    ax1.set_title('Step Rewards Over Time', fontsize=14, fontweight='bold')
    ax1.grid(True, alpha=0.3)
    ax1.legend()
    
    # 2. Step rewards histogram
    ax2 = plt.subplot(2, 2, 2)
    ax2.hist(step_rewards, bins=50, alpha=0.7, color='green', edgecolor='black')
    ax2.axvline(np.mean(step_rewards), color='red', linestyle='--', linewidth=2, 
               label=f'Mean: {np.mean(step_rewards):.3f}')
    ax2.axvline(np.median(step_rewards), color='orange', linestyle='--', linewidth=2, 
               label=f'Median: {np.median(step_rewards):.3f}')
    ax2.set_xlabel('Reward', fontsize=12)
    ax2.set_ylabel('Frequency', fontsize=12)
    ax2.set_title('Step Rewards Distribution', fontsize=14, fontweight='bold')
    ax2.grid(True, alpha=0.3)
    ax2.legend()
    
    # 3. Episode rewards (if available)
    ax3 = plt.subplot(2, 2, 3)
    episode_rewards = episode_data.get('episode_rewards', [])
    episode_lengths = episode_data.get('episode_lengths', [])
    
    if episode_rewards and len(episode_rewards) > 0:
        episodes = np.arange(len(episode_rewards))
        ax3.plot(episodes, episode_rewards, marker='o', markersize=4, alpha=0.7, 
                color='purple', label='Episode Reward')
        
        # Add moving average for episode rewards
        if len(episode_rewards) > 10:
            ep_window = min(10, len(episode_rewards) // 5)
            ep_moving_avg = np.convolve(episode_rewards, np.ones(ep_window)/ep_window, mode='valid')
            ep_moving_avg_eps = episodes[ep_window-1:]
            ax3.plot(ep_moving_avg_eps, ep_moving_avg, linewidth=2, color='red',
                    label=f'Moving Average (window={ep_window})')
        
        ax3.set_xlabel('Episode', fontsize=12)
        ax3.set_ylabel('Episode Reward', fontsize=12)
        ax3.set_title('Episode Rewards', fontsize=14, fontweight='bold')
        ax3.grid(True, alpha=0.3)
        ax3.legend()
    else:
        ax3.text(0.5, 0.5, 'No episode rewards data available', 
                ha='center', va='center', fontsize=12, transform=ax3.transAxes)
        ax3.set_title('Episode Rewards', fontsize=14, fontweight='bold')
    
    # 4. Episode lengths (if available) or statistics summary
    ax4 = plt.subplot(2, 2, 4)
    if episode_lengths and len(episode_lengths) > 0:
        episodes = np.arange(len(episode_lengths))
        ax4.plot(episodes, episode_lengths, marker='s', markersize=4, alpha=0.7, 
                color='teal', label='Episode Length')
        
        # Add moving average
        if len(episode_lengths) > 10:
            len_window = min(10, len(episode_lengths) // 5)
            len_moving_avg = np.convolve(episode_lengths, np.ones(len_window)/len_window, mode='valid')
            len_moving_avg_eps = episodes[len_window-1:]
            ax4.plot(len_moving_avg_eps, len_moving_avg, linewidth=2, color='red',
                    label=f'Moving Average (window={len_window})')
        
        ax4.set_xlabel('Episode', fontsize=12)
        ax4.set_ylabel('Episode Length (steps)', fontsize=12)
        ax4.set_title('Episode Lengths', fontsize=14, fontweight='bold')
        ax4.grid(True, alpha=0.3)
        ax4.legend()
    else:
        # Show statistics summary instead
        stats_text = f"""
        Training Statistics Summary
        
        Total Timesteps: {episode_data.get('total_timesteps', len(step_rewards))}
        Episode Count: {episode_data.get('episode_count', 0)}
        
        Step Rewards:
          Mean: {np.mean(step_rewards):.4f}
          Std: {np.std(step_rewards):.4f}
          Min: {np.min(step_rewards):.4f}
          Max: {np.max(step_rewards):.4f}
          Median: {np.median(step_rewards):.4f}
        """
        ax4.text(0.1, 0.5, stats_text, ha='left', va='center', fontsize=11,
                family='monospace', transform=ax4.transAxes)
        ax4.set_title('Statistics Summary', fontsize=14, fontweight='bold')
        ax4.axis('off')
    
    plt.tight_layout()
    
    if save_figures:
        output_path = Path(output_dir)
        output_path.mkdir(parents=True, exist_ok=True)
        figure_path = output_path / "training_logs_visualization.png"
        # Apply tight_layout again before saving to ensure optimal layout
        plt.tight_layout()
        plt.savefig(figure_path, dpi=150, bbox_inches='tight')
        print(f"\nFigure saved to: {figure_path}")
    
    plt.show()
    
    # Print summary statistics
    print("\n" + "="*70)
    print("TRAINING LOGS SUMMARY")
    print("="*70)
    print(f"Total Timesteps: {episode_data.get('total_timesteps', len(step_rewards))}")
    print(f"Episode Count: {episode_data.get('episode_count', 0)}")
    print(f"\nStep Rewards Statistics:")
    print(f"  Mean: {np.mean(step_rewards):.4f}")
    print(f"  Std: {np.std(step_rewards):.4f}")
    print(f"  Min: {np.min(step_rewards):.4f}")
    print(f"  Max: {np.max(step_rewards):.4f}")
    print(f"  Median: {np.median(step_rewards):.4f}")
    
    if episode_rewards and len(episode_rewards) > 0:
        print(f"\nEpisode Rewards Statistics:")
        print(f"  Mean: {np.mean(episode_rewards):.4f}")
        print(f"  Std: {np.std(episode_rewards):.4f}")
        print(f"  Min: {np.min(episode_rewards):.4f}")
        print(f"  Max: {np.max(episode_rewards):.4f}")
        print(f"  Median: {np.median(episode_rewards):.4f}")
    
    if episode_lengths and len(episode_lengths) > 0:
        print(f"\nEpisode Lengths Statistics:")
        print(f"  Mean: {np.mean(episode_lengths):.2f} steps")
        print(f"  Std: {np.std(episode_lengths):.2f} steps")
        print(f"  Min: {np.min(episode_lengths):.0f} steps")
        print(f"  Max: {np.max(episode_lengths):.0f} steps")
        print(f"  Median: {np.median(episode_lengths):.2f} steps")
    
    print("="*70)


def main():
    """Main function to visualize training logs"""
    import argparse
    
    parser = argparse.ArgumentParser(description="Visualize training logs")
    parser.add_argument(
        "--logs_dir",
        type=str,
        default="Training/Logs",
        help="Directory containing the log files"
    )
    parser.add_argument(
        "--save",
        action="store_true",
        help="Save figures to disk"
    )
    parser.add_argument(
        "--output_dir",
        type=str,
        default="Training/Logs",
        help="Directory to save figures (if --save is used)"
    )
    
    args = parser.parse_args()
    
    # Load and visualize
    try:
        episode_data, step_rewards = load_training_logs(args.logs_dir)
        visualize_training_logs(episode_data, step_rewards, 
                               save_figures=args.save, 
                               output_dir=args.output_dir)
    except FileNotFoundError as e:
        print(f"Error: {e}")
        return 1
    
    return 0


if __name__ == "__main__":
    exit(main())



usage: ipykernel_launcher.py [-h] [--logs_dir LOGS_DIR] [--save]
                             [--output_dir OUTPUT_DIR]
ipykernel_launcher.py: error: unrecognized arguments: --f=/Users/wenqifan/Library/Jupyter/runtime/kernel-v3ae54d121b99de2414e4e3d2866686ff28e046daa.json


SystemExit: 2