In [97]:
import gymnasium as gym
import flappy_bird_gymnasium
import torch
from collections import deque
import numpy as np
from PIL import Image
import os
from datetime import datetime
import cv2

# Import the existing classes
from FlappyBird import DQN, DQNAgent, preprocess_image, FrameStack


def simulate_episodes(num_episodes=10, model_path='models/dqn_best.pth', render_delay=1):
    """
    Simulate episodes with smooth rendering by capturing all frames during frame skipping
    
    Args:
        num_episodes: Number of episodes to simulate
        model_path: Path to the trained model
        render_delay: Delay between frames in milliseconds (1ms default for smooth rendering)
    """
    env = gym.make("FlappyBird-v0", render_mode="rgb_array", use_lidar=False)
    agent = DQNAgent(env)
    
    # Load the trained model
    if os.path.exists(model_path):
        checkpoint = torch.load(model_path)
        agent.policy_net.load_state_dict(checkpoint['model_state_dict'])
        print(f"Loaded model from {model_path}")
        print(f"Model was saved at episode {checkpoint['episode']} with reward {checkpoint['reward']}")
    else:
        raise FileNotFoundError(f"No model found at {model_path}")
    
    # Set to evaluation mode
    agent.policy_net.eval()
    
    # Create window for rendering
    cv2.namedWindow('Flappy Bird AI', cv2.WINDOW_NORMAL)
    cv2.resizeWindow('Flappy Bird AI', 400, 600)
    
    episode_rewards = []
    max_score = float('-inf')
    best_episode = -1
    
    try:
        for episode in range(num_episodes):
            state, _ = env.reset()
            raw_frame = env.render()
            frame = preprocess_image(raw_frame)
            
            # Initialize frame stack
            agent.frame_stack = FrameStack(size=4)
            for _ in range(4):
                agent.frame_stack.push(frame)
            state = agent.frame_stack.get_state()
            
            episode_reward = 0
            done = False
            
            while not done:
                # Select action without exploration (training=False)
                action = agent.select_action(state, training=False)
                
                # Frame skipping with rendering of ALL frames
                skip_reward = 0
                for _ in range(agent.frame_skip):
                    next_state, reward, done, truncated, _ = env.step(action)
                    raw_frame = env.render()  # Get the frame
                    
                    # Display every frame during frame skipping
                    display_frame = cv2.cvtColor(raw_frame, cv2.COLOR_RGB2BGR)
                    cv2.putText(display_frame, f'Episode: {episode + 1}/{num_episodes}', 
                               (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
                    cv2.putText(display_frame, f'Score: {episode_reward}', 
                               (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
                    cv2.imshow('Flappy Bird AI', display_frame)
                    
                    # Handle keyboard input (q to quit)
                    if cv2.waitKey(render_delay) & 0xFF == ord('q'):
                        raise KeyboardInterrupt
                    
                    skip_reward += reward
                    if done:
                        break
                
                next_frame = preprocess_image(raw_frame)
                agent.frame_stack.push(next_frame)
                next_state = agent.frame_stack.get_state()
                
                state = next_state
                episode_reward += skip_reward
                
                if done:
                    break
            
            episode_rewards.append(episode_reward)
            if episode_reward > max_score:
                max_score = episode_reward
                best_episode = episode
            
            print(f"Episode {episode + 1} - Score: {episode_reward}")
    
    except KeyboardInterrupt:
        print("\nSimulation interrupted by user")
    
    finally:
        # Clean up
        cv2.destroyAllWindows()
        env.close()
        
        # Print summary statistics
        avg_reward = np.mean(episode_rewards)
        std_reward = np.std(episode_rewards)
        print("\nSimulation Summary:")
        print(f"Average Score: {avg_reward:.2f} ± {std_reward:.2f}")
        print(f"Best Score: {max_score} (Episode {best_episode + 1})")
        print(f"Worst Score: {min(episode_rewards)}")
        
        return episode_rewards

if __name__ == "__main__":
    simulate_episodes(model_path='models/dqn_episode_8700.pth',num_episodes=5, render_delay=1)  # 1ms delay between frames for smooth rendering

  checkpoint = torch.load(model_path)


Loaded model from models/dqn_episode_8700.pth
Model was saved at episode 8700 with reward 31.900000000000162
Episode 1 - Score: 136.8999999999972
Episode 2 - Score: 18.300000000000015
Episode 3 - Score: 34.500000000000185
Episode 4 - Score: 67.10000000000032
Episode 5 - Score: 96.89999999999897

Simulation Summary:
Average Score: 70.74 ± 42.75
Best Score: 136.8999999999972 (Episode 1)
Worst Score: 18.300000000000015
