In [1]:
from google.colab import drive
drive.mount('/content/gdrive')
%cd /content/gdrive/My Drive/EVA_MiLab_Hackathon

%load_ext autoreload
%autoreload 2

Mounted at /content/gdrive
/content/gdrive/My Drive/EVA_MiLab_Hackathon


In [2]:
!pip install swig
!pip install "gymnasium[box2d]==1.0.0"

Collecting swig
  Downloading swig-4.3.0-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl.metadata (3.5 kB)
Downloading swig-4.3.0-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: swig
Successfully installed swig-4.3.0
Collecting box2d-py==2.3.5 (from gymnasium[box2d]==1.0.0)
  Downloading box2d-py-2.3.5.tar.gz (374 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m374.4/374.4 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: box2d-py
  Building wheel for box2d-py (setup.py) ... [?25l[?25hdone
  Created wheel for box2d-py: filename=box2d_py-2.3.5-cp311-cp311-linux_x86_64.whl size=2379450 sha256=aa8be6d7a0721ca441c1d574d7479b7c4e6389d16e7849f1a020beca0d23e127
  Stored in directory: /root/.cache/pip/w

In [3]:
def load_checkpoint(self, checkpoint_file):
    """Loads the model checkpoint"""
    checkpoint_path = f"checkpoints/{checkpoint_file}"

    if os.path.exists(checkpoint_path):
        self.net.load_state_dict(torch.load(checkpoint_path, map_location=self.device))
        print(f"Checkpoint loaded from {checkpoint_path}")
    else:
        print(f"Checkpoint file not found: {checkpoint_path}")


In [13]:
import gymnasium as gym
import torch
import numpy as np
import time
from agents.cnn_ppo import CNN_PPO_Agent
from agents.cnn_dqn import CNN_DQN_Agent

def demo_agent(checkpoint_file, agent_type="ppo", num_episodes=3, render_delay=0.03):
    """Loads a trained PPO or DQN agent and plays the CarRacing environment with live rendering."""

    # Initialize the environment in 'human' mode for live rendering
    env = gym.make("CarRacing-v3", render_mode="human", lap_complete_percent=0.95, domain_randomize=False, continuous=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load the correct agent type
    if agent_type.lower() == "ppo":
        print("Loading PPO Agent...")
        agent = CNN_PPO_Agent(input_shape=(3, 96, 96), run_name="Demo")
        agent.load_checkpoint(checkpoint_file)

    elif agent_type.lower() == "dqn":
        print("Loading DQN Agent...")
        hyperparameters = {  # Dummy values for DQN
            "batch_size": 64, "gamma": 0.95, "epsilon_start": 1.0,
            "epsilon_end": 0.1, "tau": 0.005, "epsilon_decay_steps": 200,
            "learning_rate": 0.001, "replay_buffer_size": 64, "steps_per_target_net_update": 512
        }
        agent = CNN_DQN_Agent(input_shape=(3, 96, 96), action_space=env.action_space, run_name="Demo", **hyperparameters)
        agent.load_checkpoint(checkpoint_file)

    else:
        print("Invalid agent type! Choose 'ppo' or 'dqn'.")
        return

    # Run the demo
    for episode in range(num_episodes):
        state, info = env.reset()
        state = torch.tensor(state, dtype=torch.float32, device=device)
        state = state.permute(2, 0, 1).unsqueeze(0)  # Ensure shape (1, C, H, W)

        total_reward = 0
        done = False
        t = 0

        print(f"Starting Episode {episode}...")

        while not done:
            env.render()  # Live rendering
            time.sleep(render_delay)  # Slow down for visibility

            if agent_type.lower() == "ppo":
                action, _, _ = agent.select_action(state)
            elif agent_type.lower() == "dqn":
                action = agent.select_action(state, explore=False)

            action = np.array(action, dtype=np.float32)
            next_state, reward, done, _, _ = env.step(action)
            total_reward += reward

            # Convert next state to tensor
            state = torch.tensor(next_state, dtype=torch.float32, device=device)
            state = state.permute(2, 0, 1).unsqueeze(0)

            t += 1

            if t >= 1000:  # Prevent infinite loops
                print(f"Episode {episode} reached max steps (1000), terminating.")
                done = True

        print(f"Episode {episode} finished with Total Reward: {total_reward:.2f}")

    env.close()


In [None]:
demo_agent("CNN_DQN_20250210203205_episode_1950.pth", agent_type="dqn")

In [None]:
demo_agent("CNN_PPO_20250211100336_episode_0.pth", agent_type="ppo")

In [13]:
import gymnasium as gym
import torch
import numpy as np
import time
import cv2
import os
import base64
from IPython.display import HTML
from agents.cnn_ppo import CNN_PPO_Agent
from agents.cnn_dqn import CNN_DQN_Agent

def play_video(video_path):
    """Plays a recorded video in Colab."""
    mp4 = open(video_path, "rb").read()
    data_url = "data:video/mp4;base64," + base64.b64encode(mp4).decode()
    return HTML(f'<video width="600" height="400" controls><source src="{data_url}" type="video/mp4"></video>')

def demo_agent_colab(checkpoint_file, agent_type="ppo", num_episodes=1):
    """Runs a trained PPO or DQN agent and records gameplay in Google Colab."""
    filename = checkpoint_file[:-4]
    video_path = f"demo/{filename}_car_demo.mp4"

    # Initialize the environment in "rgb_array" mode for video recording
    env = gym.make("CarRacing-v3", render_mode="rgb_array", lap_complete_percent=0.95, domain_randomize=False, continuous=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load the correct agent
    if agent_type.lower() == "ppo":
        print("Loading PPO Agent...")
        agent = CNN_PPO_Agent(input_shape=(3, 96, 96), run_name="Demo")
        agent.load_checkpoint(checkpoint_file)

    elif agent_type.lower() == "dqn":
        print("Loading DQN Agent...")
        hyperparameters = {  # Dummy values for DQN
            "batch_size": 64, "gamma": 0.95, "epsilon_start": 1.0,
            "epsilon_end": 0.1, "tau": 0.005, "epsilon_decay_steps": 200,
            "learning_rate": 0.001, "replay_buffer_size": 64, "steps_per_target_net_update": 512
        }
        agent = CNN_DQN_Agent(input_shape=(3, 96, 96), action_space=env.action_space, run_name="Demo", **hyperparameters)
        agent.load_checkpoint(checkpoint_file)

    else:
        print("Invalid agent type! Choose 'ppo' or 'dqn'.")
        return

    # Video Recording Setup
    print("Recording the episode...")
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")  # Codec for MP4
    out = cv2.VideoWriter(video_path, fourcc, 30, (600, 400))  # FPS = 30, Res = 600x400

    for episode in range(num_episodes):
        state, info = env.reset()
        state = torch.tensor(state, dtype=torch.float32, device=device)
        state = state.permute(2, 0, 1).unsqueeze(0)

        total_reward = 0
        done = False
        t = 0

        print(f"Starting Episode {episode}...")

        while not done:
            # Capture frame
            frame = env.render()
            frame = cv2.resize(frame, (600, 400))  # Resize for video
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)  # Convert to OpenCV format
            out.write(frame)  # Save frame to video

            if agent_type.lower() == "ppo":
                action, _, _ = agent.select_action(state)
            elif agent_type.lower() == "dqn":
                action = agent.select_action(state, explore=False)

            action = np.array(action, dtype=np.float32)
            next_state, reward, done, _, _ = env.step(action)
            total_reward += reward

            # Convert next state to tensor
            state = torch.tensor(next_state, dtype=torch.float32, device=device)
            state = state.permute(2, 0, 1).unsqueeze(0)

            t += 1

            if t >= 1000:  # Prevent infinite loops
                print(f"Episode {episode} reached max steps (1000), terminating.")
                done = True

        print(f"Episode {episode} finished with Total Reward: {total_reward:.2f}")

    env.close()
    out.release()  # Finalize and save the video

    print(f"Video saved as {video_path}. Playing now...")
    return play_video(video_path)


In [14]:
demo_agent_colab("CNN_PPO_20250211100336_episode_50.pth", agent_type="ppo")

Loading PPO Agent...
Recording the episode...
Starting Episode 0...
Episode 0 reached max steps (1000), terminating.
Episode 0 finished with Total Reward: -59.71
Video saved as demo/CNN_PPO_20250211100336_episode_50_car_demo.mp4. Playing now...


In [None]:
demo_agent_colab("CNN_DQN_20250211123740_episode_100.pth", agent_type="dqn")