In [1]:
from google.colab import drive
drive.mount('/content/gdrive')
%cd /content/gdrive/My Drive/EVA_MiLab_Hackathon

%load_ext autoreload
%autoreload 2

Mounted at /content/gdrive
/content/gdrive/My Drive/EVA_MiLab_Hackathon


In [2]:
!pip install swig
!pip install "gymnasium[box2d]==1.0.0"

Collecting swig
  Downloading swig-4.3.0-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl.metadata (3.5 kB)
Downloading swig-4.3.0-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: swig
Successfully installed swig-4.3.0
Collecting box2d-py==2.3.5 (from gymnasium[box2d]==1.0.0)
  Downloading box2d-py-2.3.5.tar.gz (374 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m374.4/374.4 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: box2d-py
  Building wheel for box2d-py (setup.py) ... [?25l[?25hdone
  Created wheel for box2d-py: filename=box2d_py-2.3.5-cp311-cp311-linux_x86_64.whl size=2379442 sha256=317979f6e8128925dc64b86f3ab36bf9b6a2d17d66437032c82a08913a4f48d5
  Stored in directory: /root/.cache/pip/w

In [12]:
import torch
import gymnasium as gym
import numpy as np
import cv2
import os
from agents.cnn_dqn import CNN_DQN_Agent
from utils.env_wrapper import Env
from google.colab.patches import cv2_imshow
from google.colab import files

# Function to run the trained agent and record video
def evaluate_agent(checkpoint_file, episodes=1):
    filename = checkpoint_file[:-4]
    output_video = f"demo/{filename}_car_demo.mp4"


    # Environment hyperparameters
    env_hyperparameters = {
        "random_seed": 3,
        "img_stack": 4,  # Number of frames per state
        "action_repeat": 8
    }

    # Initialize environment
    env = Env("CarRacing-v3", **env_hyperparameters)

    # Agent hyperparameters (same as training settings)
    hyperparameters = {
        "batch_size": 1,
        "gamma": 0.99,
        "epsilon_start": 0.9,
        "epsilon_end": 0.05,
        "tau": 0.005,
        "epsilon_decay_steps": 5000,
        "learning_rate": 1e-4,
        "replay_buffer_size": 10000,
    }

    # Initialize agent
    agent = CNN_DQN_Agent(
        input_shape=env.env.observation_space.shape,
        DISCRETE_ACTIONS=env.DISCRETE_ACTIONS,
        run_name="evaluation",
        img_stack=env_hyperparameters["img_stack"],
        **hyperparameters
    )

    # Load trained model weights
    agent.load_checkpoint(checkpoint_file)

    # Setup video writer
    frame_size = (96, 96)  # Environment frame size
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video_writer = cv2.VideoWriter(output_video, fourcc, 30, frame_size)
    
    # Run evaluation
    for episode in range(episodes):
        state, info = env.reset()
        done = False
        total_reward = 0

        while not done:
            # Select action using trained agent (disable exploration)
            action = agent.select_action(state, explore=False)

            # Step environment
            state, reward, terminated, truncated, info = env.step(
                agent.get_action_from_action_index(action.item()).cpu().numpy()
            )
            state = state.unsqueeze(0)

            done = terminated or truncated
            total_reward += reward

            # **Fix: Extract only the last frame from stacked observation**
            frame = state.squeeze().numpy()[-1]  # Select last frame (96, 96)

            # Rescale pixel values to [0, 255]
            frame = (frame * 255).astype(np.uint8)

            # **Ensure frame is RGB (3 channels)**
            if len(frame.shape) == 2:  # If grayscale (96, 96), convert to RGB
                frame = np.stack([frame] * 3, axis=-1)  # Convert grayscale to RGB

            # Debug print (optional)
            #print(f"Frame shape: {frame.shape}")  # Should be (96, 96, 3)

            # Write frame to video
            video_writer.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))

            # Display in Colab (optional)
            #cv2_imshow(frame)

        print(f"Episode {episode + 1}: Total Reward: {total_reward}")

    # Release video writer
    video_writer.release()
    print(f"Video saved as {output_video}")
    files.download(output_video)

    # Close environment
    env.env.close()

# Run the evaluation and record video
evaluate_agent(checkpoint_file="CNN_DQN_20250212080216_episode_1000.pth", episodes=1)


Checkpoint loaded from checkpoints/cnn_dqn/CNN_DQN_20250212080610_episode_500.pth on cpu
Episode 1: Total Reward: -17.918855218855057
Episode 2: Total Reward: -18.033333333333278
Episode 3: Total Reward: -17.976433121019056
Video saved as demo/CNN_DQN_20250212080610_episode_500_car_demo.mp4


# Colored

In [16]:
import torch
import gymnasium as gym
import numpy as np
import cv2
import os
from agents.cnn_dqn import CNN_DQN_Agent
from utils.env_wrapper import Env
from google.colab import files


# Function to run the trained agent and record a colored video
def evaluate_agent(checkpoint_file, episodes=1):
    filename = checkpoint_file[:-4]
    output_video = f"demo/{filename}_car_demo_colored.mp4"

    # Environment hyperparameters
    env_hyperparameters = {
        "random_seed": 3,
        "img_stack": 4,  # Number of frames per state
        "action_repeat": 8
    }

    # Initialize environment (Ensure RGB mode)
    env = Env("CarRacing-v3", **env_hyperparameters, render_mode="rgb_array")

    # Agent hyperparameters (same as training settings)
    hyperparameters = {
        "batch_size": 128,
        "gamma": 0.99,
        "epsilon_start": 0.9,
        "epsilon_end": 0.05,
        "tau": 0.005,
        "epsilon_decay_steps": 5000,
        "learning_rate": 1e-4,
        "replay_buffer_size": 10000,
    }

    # Initialize agent
    agent = CNN_DQN_Agent(
        input_shape=env.env.observation_space.shape,
        DISCRETE_ACTIONS=env.DISCRETE_ACTIONS,
        run_name="evaluation",
        img_stack=env_hyperparameters["img_stack"],
        **hyperparameters
    )

    # Load trained model weights
    agent.load_checkpoint(checkpoint_file)

    # Setup video writer (Color output enabled)
    frame_size = (96, 96)  # Environment frame size
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video_writer = cv2.VideoWriter(output_video, fourcc, 30, frame_size)

    # Run evaluation
    for episode in range(episodes):
        state, info = env.reset()
        done = False
        total_reward = 0

        while not done:
            # Select action using trained agent (disable exploration)
            action = agent.select_action(state, explore=False)

            # Step environment
            state, reward, terminated, truncated, info = env.step(
                agent.get_action_from_action_index(action.item()).cpu().numpy()
            )
            state = state.unsqueeze(0)

            done = terminated or truncated
            total_reward += reward

            # **Fix: Extract only the last frame from stacked observation**
            frame = state.squeeze().numpy()[-1]  # Select last frame (96, 96)

            # Rescale pixel values to [0, 255]
            frame = (frame * 255).astype(np.uint8)

            # **Apply false color mapping (if grayscale)**
            if len(frame.shape) == 2:  # If grayscale (96, 96)
                frame = cv2.applyColorMap(frame, cv2.COLORMAP_DEEPGREEN)  # Convert grayscale to colored

            # Debug print (optional)
            #print(f"Frame shape: {frame.shape}")  # Should be (96, 96, 3)

            # Write frame to video
            video_writer.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))

        print(f"Episode {episode + 1}: Total Reward: {total_reward}")

    # Release video writer
    video_writer.release()
    print(f"Colored video saved as {output_video}")
    files.download(output_video)  # Download the colored video

    # Close environment
    env.env.close()

# Run the evaluation and record video
evaluate_agent(checkpoint_file="CNN_DQN_20250212080216_episode_1000.pth", episodes=1)


Checkpoint loaded from checkpoints/cnn_dqn/CNN_DQN_20250212080610_episode_500.pth on cpu
Episode 1: Total Reward: -17.908108108108053
Colored video saved as demo/CNN_DQN_20250212080610_episode_500_car_demo_colored.mp4


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Just save when better as threshold

In [None]:
import torch
import gymnasium as gym
import numpy as np
import cv2
import os
from agents.cnn_dqn import CNN_DQN_Agent
from utils.env_wrapper import Env

# Function to run the trained agent and record a colored video only if reward ≥ 50
def evaluate_agent(checkpoint_file, min_reward_threshold=50):
    filename = checkpoint_file[:-4]  # Remove ".pth" extension
    output_video = f"demo/{filename}_car_demo_colored.mp4"

    # Environment hyperparameters
    env_hyperparameters = {
        "random_seed": 3,
        "img_stack": 4,  # Number of frames per state
        "action_repeat": 8
    }

    # Initialize environment (Ensure RGB mode)
    env = Env("CarRacing-v3", **env_hyperparameters, render_mode="rgb_array")

    # Agent hyperparameters (same as training settings)
    hyperparameters = {
        "batch_size": 128,
        "gamma": 0.99,
        "epsilon_start": 0.9,
        "epsilon_end": 0.05,
        "tau": 0.005,
        "epsilon_decay_steps": 5000,
        "learning_rate": 1e-4,
        "replay_buffer_size": 10000,
    }

    # Initialize agent
    agent = CNN_DQN_Agent(
        input_shape=env.env.observation_space.shape,
        DISCRETE_ACTIONS=env.DISCRETE_ACTIONS,
        run_name="evaluation",
        img_stack=env_hyperparameters["img_stack"],
        **hyperparameters
    )

    # Load trained model weights
    agent.load_checkpoint(checkpoint_file)

    # Retry until a successful episode (reward ≥ min_reward_threshold)
    successful_episode = False
    episode_count = 0

    while not successful_episode:
        episode_count += 1
        print(f"Starting Episode {episode_count}...")

        # Setup video writer (Temporary file, only saved if successful)
        temp_video = f"demo/{filename}_temp.mp4"
        frame_size = (96, 96)
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        video_writer = cv2.VideoWriter(temp_video, fourcc, 30, frame_size)

        state, info = env.reset()
        done = False
        total_reward = 0

        while not done:
            # Select action using trained agent (disable exploration)
            action = agent.select_action(state, explore=False)

            # Step environment
            state, reward, terminated, truncated, info = env.step(
                agent.get_action_from_action_index(action.item()).cpu().numpy()
            )
            state = state.unsqueeze(0)

            done = terminated or truncated
            total_reward += reward

            # **Fix: Extract only the last frame from stacked observation**
            frame = state.squeeze().numpy()[-1]  # Select last frame (96, 96)

            # Rescale pixel values to [0, 255]
            frame = (frame * 255).astype(np.uint8)

            # **Apply false color mapping (if grayscale)**
            if len(frame.shape) == 2:  # If grayscale (96, 96)
                frame = cv2.applyColorMap(frame, cv2.COLORMAP_JET)  # Convert grayscale to colored

            # Write frame to video
            video_writer.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))

        # Release video writer
        video_writer.release()

        print(f"Episode {episode_count}: Total Reward = {total_reward}")

        # Check if the episode meets the reward threshold
        if total_reward >= min_reward_threshold:
            successful_episode = True
            os.rename(temp_video, output_video)  # Save the successful video
            print(f"✔ Success! Video saved as {output_video}")
        else:
            print(f"❌ Episode did not reach reward threshold. Retrying...\n")
            os.remove(temp_video)  # Delete the unsuccessful video

    # Close environment
    env.env.close()

# Run the evaluation and only save if reward ≥ 50
evaluate_agent(checkpoint_file="CNN_DQN_20250212080216_episode_1000.pth", min_reward_threshold=80)
