In [3]:
import os
import csv
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gym
import gym_donkeycar
from IPython.display import display, clear_output

# Create directories
os.makedirs("logs", exist_ok=True)
os.makedirs("models", exist_ok=True)

In [5]:
class CustomCallback:
    """
    Simple callback that saves training statistics without importing from stable_baselines3
    """
    def __init__(self, verbose=1, live_plot=True):
        self.verbose = verbose
        self.live_plot = live_plot
        self.training_start = time.time()
        self.timestamp = int(time.time())
        self.csv_path = f"logs/ppo_stats_{self.timestamp}.csv"
        
        # Stats tracking
        self.n_calls = 0
        self.num_timesteps = 0
        self.timesteps_list = []
        self.rewards_list = []
        self.lengths_list = []
        self.ep_info_buffer = []
        
        # Create CSV file with headers
        with open(self.csv_path, 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow([
                'timesteps', 'episodes', 'time_elapsed',
                'mean_reward', 'mean_episode_length', 'fps'
            ])
    
    def update_info(self, timesteps, reward, ep_length):
        """Call this after each episode to update stats"""
        self.n_calls += 1
        self.num_timesteps = timesteps
        
        # Store episode info
        self.ep_info_buffer.append({
            "r": reward,
            "l": ep_length
        })
        
        # Keep buffer to a reasonable size
        if len(self.ep_info_buffer) > 100:
            self.ep_info_buffer = self.ep_info_buffer[-100:]
        
        # Only record stats periodically
        if self.n_calls % 5 == 0:
            # Calculate statistics
            time_elapsed = time.time() - self.training_start
            mean_reward = np.mean([ep_info["r"] for ep_info in self.ep_info_buffer])
            mean_length = np.mean([ep_info["l"] for ep_info in self.ep_info_buffer])
            fps = self.num_timesteps / time_elapsed if time_elapsed > 0 else 0
            
            # Save to CSV
            with open(self.csv_path, 'a', newline='') as f:
                writer = csv.writer(f)
                writer.writerow([
                    self.num_timesteps,
                    len(self.ep_info_buffer),
                    time_elapsed,
                    mean_reward,
                    mean_length,
                    fps
                ])
            
            # Store for live plotting
            self.timesteps_list.append(self.num_timesteps)
            self.rewards_list.append(mean_reward)
            self.lengths_list.append(mean_length)
            
            # Print stats
            if self.verbose > 0:
                print(f"Steps: {self.num_timesteps} | "
                      f"Mean reward: {mean_reward:.2f} | "
                      f"Mean episode length: {mean_length:.2f}")
            
            # Update live plot if enabled
            if self.live_plot and len(self.timesteps_list) > 1:
                self._plot_live_progress()
    
    def _plot_live_progress(self):
        """Create live training progress plot in the notebook"""
        clear_output(wait=True)
        
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))
        
        # Plot rewards
        ax1.plot(self.timesteps_list, self.rewards_list, 'b-', label='Mean Reward')
        ax1.set_xlabel('Timesteps')
        ax1.set_ylabel('Mean Reward')
        ax1.set_title('Training Progress')
        ax1.grid(True)
        
        # Plot episode lengths
        ax2.plot(self.timesteps_list, self.lengths_list, 'r-', label='Mean Episode Length')
        ax2.set_xlabel('Timesteps')
        ax2.set_ylabel('Mean Episode Length')
        ax2.grid(True)
        
        plt.tight_layout()
        plt.show()

In [6]:
import gym
import gym_donkeycar
from stable_baselines3 import PPO

# Create a callback instance
callback = CustomCallback(verbose=1, live_plot=True)

# Create environment and model as you did before
env = gym.make("donkey-mountain-track-v0")
model = PPO("CnnPolicy", env, n_steps=256, verbose=1)

# Train with stats tracking
total_timesteps = 10000
episode_reward = 0
episode_length = 0

obs = env.reset()
for timestep in range(total_timesteps):
    # Model predicts action
    action, _ = model.predict(obs)
    
    # Environment step
    obs, reward, done, _ = env.step(action)
    
    # Update episode stats
    episode_reward += reward
    episode_length += 1
    
    # If episode ended
    if done:
        # Update callback with episode stats
        callback.update_info(timestep, episode_reward, episode_length)
        
        # Reset for next episode
        obs = env.reset()
        episode_reward = 0
        episode_length = 0

# Save the model
model.save(f"models/ppo_donkey_{callback.timestamp}.zip")

RuntimeError: duplicate registrations for aten.linspace.Tensor_Tensor

In [None]:
def plot_training_stats(csv_path=None):
    """
    Create visualizations from the training statistics CSV file
    """
    # Select the most recent CSV file if none provided
    if csv_path is None:
        log_files = [f for f in os.listdir("logs") if f.startswith("ppo_stats_") and f.endswith(".csv")]
        if not log_files:
            print("No training log files found!")
            return
        log_files.sort(reverse=True)  # Most recent first
        csv_path = os.path.join("logs", log_files[0])
    
    # Load the data
    try:
        data = pd.read_csv(csv_path)
        print(f"Loaded training data with {len(data)} entries from {csv_path}")
    except Exception as e:
        print(f"Error loading CSV file: {e}")
        return
    
    # Set up the style
    sns.set(style="darkgrid")
    plt.figure(figsize=(15, 12))
    
    # Plot 1: Mean reward over time
    plt.subplot(3, 2, 1)
    plt.plot(data['timesteps'], data['mean_reward'], 'b-')
    plt.title('Mean Reward vs Timesteps')
    plt.xlabel('Timesteps')
    plt.ylabel('Mean Reward')
    plt.grid(True)
    
    # Plot 2: Mean episode length over time
    plt.subplot(3, 2, 2)
    plt.plot(data['timesteps'], data['mean_episode_length'], 'r-')
    plt.title('Mean Episode Length vs Timesteps')
    plt.xlabel('Timesteps')
    plt.ylabel('Mean Episode Length')
    plt.grid(True)
    
    # Plot 3: FPS over time
    plt.subplot(3, 2, 3)
    plt.plot(data['timesteps'], data['fps'], 'g-')
    plt.title('Training Speed (FPS) vs Timesteps')
    plt.xlabel('Timesteps')
    plt.ylabel('Frames Per Second')
    plt.grid(True)
    
    # Plot 4: Mean reward vs episode length (scatter)
    plt.subplot(3, 2, 4)
    plt.scatter(data['mean_episode_length'], data['mean_reward'], alpha=0.7)
    plt.title('Mean Reward vs Episode Length')
    plt.xlabel('Mean Episode Length')
    plt.ylabel('Mean Reward')
    plt.grid(True)
    
    # Plot 5: Smoothed reward
    def smooth_curve(y, window=3):
        """Apply moving average smoothing to a curve."""
        box = np.ones(window) / window
        y_smooth = np.convolve(y, box, mode='same')
        return y_smooth
    
    if len(data) > 2:
        plt.subplot(3, 2, 5)
        plt.plot(data['timesteps'], data['mean_reward'], 'b-', alpha=0.4, label='Raw')
        plt.plot(data['timesteps'], smooth_curve(data['mean_reward']), 'r-', 
                linewidth=2, label='Smoothed')
        plt.title('Smoothed Mean Reward')
        plt.xlabel('Timesteps')
        plt.ylabel('Mean Reward')
        plt.legend()
        plt.grid(True)
    
    # Plot 6: Reward improvement rate (derivative)
    if len(data) > 2:
        plt.subplot(3, 2, 6)
        reward_changes = np.diff(data['mean_reward'])
        plt.bar(data['timesteps'][1:], reward_changes, alpha=0.7, width=data['timesteps'].iloc[1] * 0.8)
        plt.axhline(y=0, color='r', linestyle='-', alpha=0.3)
        plt.title('Reward Improvement Rate')
        plt.xlabel('Timesteps')
        plt.ylabel('Reward Change')
        plt.grid(True)
    
    # Add some overall info as text
    if len(data) > 0:
        total_time = data['time_elapsed'].iloc[-1] / 60  # minutes
        final_reward = data['mean_reward'].iloc[-1]
        max_reward = data['mean_reward'].max()
        
        info_text = (
            f"Training Summary:\n"
            f"Total time: {total_time:.1f} minutes\n"
            f"Final reward: {final_reward:.1f}\n"
            f"Max reward: {max_reward:.1f}\n"
            f"Total timesteps: {data['timesteps'].iloc[-1]}"
        )
        
        plt.figtext(0.5, 0.01, info_text, ha='center', fontsize=12, 
                   bbox={'facecolor':'lightgray', 'alpha':0.5, 'pad':5})
    
    # Save and show the figure
    plt.tight_layout(rect=[0, 0.05, 1, 1])  # Adjust for the text at the bottom
    output_path = csv_path.replace('.csv', '_plots.png')
    plt.savefig(output_path)
    print(f"Visualizations saved to {output_path}")
    plt.show()

In [None]:
plot_training_stats(callback.csv_path)