In [1]:
from google.colab import drive
drive.mount('/content/gdrive')
%cd /content/gdrive/My Drive/EVA_MiLab_Hackathon

%load_ext autoreload
%autoreload 2

Mounted at /content/gdrive
/content/gdrive/My Drive/EVA_MiLab_Hackathon


In [2]:
!pip install swig
!pip install "gymnasium[box2d]==1.0.0"

Collecting swig
  Downloading swig-4.3.0-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl.metadata (3.5 kB)
Downloading swig-4.3.0-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.9 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.9 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/1.9 MB[0m [31m9.4 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.9/1.9 MB[0m [31m32.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: swig
Successfully installed swig-4.3.0
Collecting box2d-py==2.3.5 (from gymnasium[box2d]==1.0.0)
  Downloading box2d-py-2.3.5.tar.gz (374 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m374.4/374.4 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25h  

In [6]:
import gymnasium as gym
import torch
import numpy as np
import time
import cv2
import os
import base64
from IPython.display import HTML
from agents.cnn_ppo import CNN_PPO_Agent
from agents.cnn_dqn import CNN_DQN_Agent

from google.colab import files

def play_video(video_path):
    """Plays a recorded video in Colab."""
    mp4 = open(video_path, "rb").read()
    data_url = "data:video/mp4;base64," + base64.b64encode(mp4).decode()
    return HTML(f'<video width="600" height="400" controls><source src="{data_url}" type="video/mp4"></video>')

def demo_agent_colab(checkpoint_file, agent_type="ppo", num_episodes=1):
    """Runs a trained PPO or DQN agent and records gameplay in Google Colab."""
    filename = checkpoint_file[:-4]
    video_path = f"demo/{filename}_car_demo.mp4"

    # Initialize the environment in "rgb_array" mode for video recording
    env = gym.make("CarRacing-v3", render_mode="rgb_array", lap_complete_percent=0.95, domain_randomize=False, continuous=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load the correct agent
    if agent_type.lower() == "ppo":
        print("Loading PPO Agent...")
        agent = CNN_PPO_Agent(input_shape=(3, 96, 96), run_name="Demo")
        agent.load_checkpoint(checkpoint_file)

    elif agent_type.lower() == "dqn":
        print("Loading DQN Agent...")
        hyperparameters = {  # Dummy values for DQN
            "batch_size": 64, "gamma": 0.95, "epsilon_start": 1.0,
            "epsilon_end": 0.1, "tau": 0.005, "epsilon_decay_steps": 200,
            "learning_rate": 0.001, "replay_buffer_size": 64, "steps_per_target_net_update": 512
        }
        agent = CNN_DQN_Agent(input_shape=(3, 96, 96), action_space=env.action_space, run_name="Demo", **hyperparameters)
        agent.load_checkpoint(checkpoint_file)

    else:
        print("Invalid agent type! Choose 'ppo' or 'dqn'.")
        return

    # Video Recording Setup
    print("Recording the episode...")
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")  # Codec for MP4
    out = cv2.VideoWriter(video_path, fourcc, 30, (600, 400))  # FPS = 30, Res = 600x400

    for episode in range(num_episodes):
        state, info = env.reset()
        state = torch.tensor(state, dtype=torch.float32, device=device)
        state = state.permute(2, 0, 1).unsqueeze(0)

        total_reward = 0
        done = False
        t = 0

        print(f"Starting Episode {episode}...")

        while not done:
            # Capture frame
            frame = env.render()
            frame = cv2.resize(frame, (600, 400))  # Resize for video
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)  # Convert to OpenCV format
            out.write(frame)  # Save frame to video

            if agent_type.lower() == "ppo":
                action, _, _ = agent.select_action(state)
            elif agent_type.lower() == "dqn":
                action = agent.select_action(state, explore=False)

            action = np.array(action, dtype=np.float32)
            next_state, reward, done, _, _ = env.step(action)
            total_reward += reward

            # Convert next state to tensor
            state = torch.tensor(next_state, dtype=torch.float32, device=device)
            state = state.permute(2, 0, 1).unsqueeze(0)

            t += 1

            if t >= 1000:  # Prevent infinite loops
                print(f"Episode {episode} reached max steps (1000), terminating.")
                done = True

        print(f"Episode {episode} finished with Total Reward: {total_reward:.2f}")

    env.close()
    out.release()  # Finalize and save the video

    print(f"Video saved as {video_path}. Playing now...")
    files.download(video_path)  # Download the colored video

    return play_video(video_path)


In [7]:
demo_agent_colab("CNN_PPO_20250211213413_episode_3250.pth", agent_type="ppo")

Loading PPO Agent...
After conv1 + pool1: torch.Size([1, 16, 23, 23])
After conv2 + pool2: torch.Size([1, 32, 10, 10])
After conv3 + conv4 + pool3: torch.Size([1, 128, 5, 5])
After conv5: torch.Size([1, 128, 4, 4])
After conv6: torch.Size([1, 256, 3, 3])
Flattened output shape: torch.Size([1, 2304])
Conv out size 2304
Checkpoint loaded from checkpoints/cnn_ppo/CNN_PPO_20250211213413_episode_3250.pth on cpu
Recording the episode...
Starting Episode 0...
Episode 0 reached max steps (1000), terminating.
Episode 0 finished with Total Reward: -71.34
Video saved as demo/CNN_PPO_20250211213413_episode_3250_car_demo.mp4. Playing now...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>