In [3]:
# ============================================================
# INSTALLATION CELL - RUN THIS FIRST
# ============================================================

!pip install -q gymnasium[atari,accept-rom-license]
!pip install -q ale-py
!pip install -q stable-baselines3[extra]
!apt-get install -y ffmpeg > /dev/null 2>&1
!AutoROM --accept-license

print("‚úÖ Installation complete!")
print("‚ö†Ô∏è  Now click: Runtime ‚Üí Restart session")

[0mAutoROM will download the Atari 2600 ROMs.
They will be installed to:
	/usr/local/lib/python3.12/dist-packages/AutoROM/roms

Existing ROMs will be overwritten.
‚úÖ Installation complete!
‚ö†Ô∏è  Now click: Runtime ‚Üí Restart session


In [1]:
# ============================================================
# INSTALLATION CELL - RUN THIS FIRST
# ============================================================

!pip install -q gymnasium[atari,accept-rom-license]
!pip install -q ale-py
!pip install -q stable-baselines3[extra]
!apt-get install -y ffmpeg > /dev/null 2>&1
!AutoROM --accept-license

print("‚úÖ Installation complete!")
print("‚ö†Ô∏è  Now click: Runtime ‚Üí Restart session")

[0mAutoROM will download the Atari 2600 ROMs.
They will be installed to:
	/usr/local/lib/python3.12/dist-packages/AutoROM/roms

Existing ROMs will be overwritten.
‚úÖ Installation complete!
‚ö†Ô∏è  Now click: Runtime ‚Üí Restart session


In [2]:
# ============================================================
# TRAINING CODE - RUN AFTER RESTART (FIXED)
# ============================================================

import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecVideoRecorder
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.atari_wrappers import AtariWrapper
import ale_py

# ----------------------------
# CREATE ENVIRONMENT WITH PROPER WRAPPERS
# ----------------------------
def make_env():
    env = gym.make(
        "ALE/Freeway-v5",
        render_mode="rgb_array"
    )
    # AtariWrapper handles frame stacking, grayscale, and proper shape
    env = AtariWrapper(env)
    return env

# Test environment
print("üîç Testing environment...")
test_env = make_env()
print(f"‚úÖ Environment works!")
print(f"Observation shape: {test_env.observation_space.shape}")
test_env.close()

# Create vectorized environment
env = DummyVecEnv([make_env])

print("\nüìä Environment Info:")
print("Observation space:", env.observation_space)
print("Action space:", env.action_space)

# ----------------------------
# CREATE PPO MODEL
# ----------------------------
print("\nü§ñ Creating PPO model...")
model = PPO(
    "CnnPolicy",
    env,
    learning_rate=2.5e-4,
    n_steps=2048,
    batch_size=64,
    gamma=0.99,
    verbose=1
)

# ----------------------------
# TRAIN
# ----------------------------
print("\nüöÄ Training started... (this takes ~30-60 minutes)")
model.learn(total_timesteps=500_000)
model.save("ppo_freeway_agent")
print("\n‚úÖ Training complete!")

# ----------------------------
# EVALUATE
# ----------------------------
print("\nüìä Evaluating...")
mean_reward, std_reward = evaluate_policy(
    model, env, n_eval_episodes=10, deterministic=True
)
print(f"üéØ Mean Reward: {mean_reward:.2f} ¬± {std_reward:.2f}")

# ----------------------------
# RECORD VIDEO
# ----------------------------
print("\nüé• Recording video...")
video_env = DummyVecEnv([make_env])
video_env = VecVideoRecorder(
    video_env,
    "./videos/",
    record_video_trigger=lambda step: step == 0,
    video_length=2000,
    name_prefix="ppo-freeway"
)

obs = video_env.reset()
for _ in range(2000):
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, done, info = video_env.step(action)

video_env.close()
print("‚úÖ Video saved in ./videos/")

# ----------------------------
# DISPLAY VIDEO
# ----------------------------
import os
from IPython.display import HTML
from base64 import b64encode

video_files = [f for f in os.listdir("./videos/") if f.endswith(".mp4")]
if video_files:
    video_path = os.path.join("./videos/", video_files[0])
    with open(video_path, "rb") as f:
        video_data = f.read()
    data_url = "data:video/mp4;base64," + b64encode(video_data).decode()
    display(HTML(f'<video width="400" controls><source src="{data_url}" type="video/mp4"></video>'))
    print("üé¨ Video displayed above!")
else:
    print("‚ö†Ô∏è No video found")

üîç Testing environment...
‚úÖ Environment works!
Observation shape: (84, 84, 1)

üìä Environment Info:
Observation space: Box(0, 255, (84, 84, 1), uint8)
Action space: Discrete(3)

ü§ñ Creating PPO model...
Using cuda device
Wrapping the env in a VecTransposeImage.

üöÄ Training started... (this takes ~30-60 minutes)


  return datetime.utcnow().replace(tzinfo=utc)


-----------------------------
| time/              |      |
|    fps             | 144  |
|    iterations      | 1    |
|    time_elapsed    | 14   |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 139         |
|    iterations           | 2           |
|    time_elapsed         | 29          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.010133972 |
|    clip_fraction        | 0.0672      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | -0.0373     |
|    learning_rate        | 0.00025     |
|    loss                 | -1.23e-05   |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.00278    |
|    value_loss           | 0.0313      |
-----------------------------------------
----------------------------------



üéØ Mean Reward: 19.10 ¬± 6.20

üé• Recording video...
Saving video to /content/videos/ppo-freeway-step-0-to-step-2000.mp4


  IMAGEMAGICK_BINARY = r"C:\Program Files\ImageMagick-6.8.8-Q16\magick.exe"


Moviepy - Building video /content/videos/ppo-freeway-step-0-to-step-2000.mp4.
Moviepy - Writing video /content/videos/ppo-freeway-step-0-to-step-2000.mp4



  return datetime.utcnow().replace(tzinfo=utc)


Moviepy - Done !
Moviepy - video ready /content/videos/ppo-freeway-step-0-to-step-2000.mp4
‚úÖ Video saved in ./videos/


üé¨ Video displayed above!
