In [3]:
!pip install gymnasium[atari] stable-baselines3[extra] ale-py

Collecting ale-py
  Downloading ale_py-0.10.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.2 kB)
Collecting gymnasium[atari]
  Downloading gymnasium-1.1.1-py3-none-any.whl.metadata (9.4 kB)
Collecting stable-baselines3[extra]
  Downloading stable_baselines3-2.6.0-py3-none-any.whl.metadata (4.8 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium[atari])
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Collecting torch<3.0,>=2.3 (from stable-baselines3[extra])
  Downloading torch-2.6.0-cp310-cp310-manylinux1_x86_64.whl.metadata (28 kB)
Collecting tensorboard>=2.9.1 (from stable-baselines3[extra])
  Downloading tensorboard-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Collecting absl-py>=0.4 (from tensorboard>=2.9.1->stable-baselines3[extra])
  Downloading absl_py-2.2.2-py3-none-any.whl.metadata (2.6 kB)
Collecting grpcio>=1.48.2 (from tensorboard>=2.9.1->stable-baselines3[extra])
  Downloading grpcio-1.71.0-cp310-cp310-manylinux_

In [4]:
!pip install ale_py



In [5]:
!pip install wandb
import wandb
wandb.login()



True

In [8]:
!pip install MoviePy

Collecting MoviePy
  Downloading moviepy-2.1.2-py3-none-any.whl.metadata (6.9 kB)
Collecting imageio_ffmpeg>=0.2.0 (from MoviePy)
  Downloading imageio_ffmpeg-0.6.0-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting proglog<=1.0.0 (from MoviePy)
  Downloading proglog-0.1.11-py3-none-any.whl.metadata (794 bytes)
Collecting python-dotenv>=0.10 (from MoviePy)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Collecting pillow<11.0,>=9.2.0 (from MoviePy)
  Downloading pillow-10.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.2 kB)
Downloading moviepy-2.1.2-py3-none-any.whl (126 kB)
Downloading imageio_ffmpeg-0.6.0-py3-none-manylinux2014_x86_64.whl (29.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m29.5/29.5 MB[0m [31m140.3 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hDownloading pillow-10.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [11]:
import gymnasium as gym
import ale_py
gym.register_envs(ale_py)
from stable_baselines3 import DQN
from stable_baselines3.common.atari_wrappers import AtariWrapper
from stable_baselines3.common.vec_env import DummyVecEnv, VecVideoRecorder
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
import numpy as np
from gymnasium import RewardWrapper
from wandb.integration.sb3 import WandbCallback

# config for wandb
config = {
    "total_timesteps": 1000000,
    "env_name": "Riverraid-v5",
}
run = wandb.init(
    project="RiverraidDQN",
    config=config,
    sync_tensorboard=True,  # auto-upload sb3's tensorboard metrics
    monitor_gym=True,  # auto-upload the videos of agents playing the game
    save_code=False,  # optional
)

# custom reward wrapper for better results
class RiverraidRewardWrapper(RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
        self.prev_fuel = None
        self.prev_lives = None

    def reset(self, **kwargs):
        obs, info = self.env.reset(**kwargs)
        self.prev_fuel = info.get("fuel", 100)
        self.prev_lives = info.get("lives", 3)
        return obs, info

    def reward(self, reward):
        # Base reward from environment
        shaped_reward = reward

        # Add small time-based survival reward
        shaped_reward += 0.1

        # Penalize if fuel is very low (simulate danger)
        current_fuel = self.env.unwrapped.ale.getRAM()[65]  # RAM address for fuel
        if current_fuel < 20:
            shaped_reward -= 0.5

        # Bonus for fuel collection (fuel increases)
        if self.prev_fuel is not None and current_fuel > self.prev_fuel:
            shaped_reward += 1.0

        self.prev_fuel = current_fuel

        # Penalize for life loss
        current_lives = self.env.unwrapped.ale.lives()
        if self.prev_lives is not None and current_lives < self.prev_lives:
            shaped_reward -= 5.0

        self.prev_lives = current_lives

        # Clip final shaped reward
        return np.clip(shaped_reward, -1, 1)


# Create and wrap the Riverraid environment
def make_env():
    env = gym.make("ALE/Riverraid-v5", render_mode="rgb_array")
    env = Monitor(env)
    env = RiverraidRewardWrapper(env)
    env = AtariWrapper(env)
    return env

env = DummyVecEnv([make_env])
env = VecVideoRecorder(
    env,
    f"videos/{run.id}",
    record_video_trigger=lambda x: x % 50000 == 0,
    video_length=400,
)

# Define and train the DQN model
model = DQN("CnnPolicy", env, verbose=1, buffer_size=100000, learning_starts=10000,
            batch_size=32, gamma=0.99, train_freq=4, target_update_interval=1000, tensorboard_log=f"runs/{run.id}")

# model.learn(total_timesteps=100000)

model.learn(
    total_timesteps=config["total_timesteps"],
    callback=WandbCallback(
        gradient_save_freq=100,
        model_save_path=f"models/{run.id}",
        verbose=2,
    ),
)
run.finish()

# Save the model
model.save("dqn_riverraid")

# Evaluate the trained agent
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
print(f"Mean reward: {mean_reward} +/- {std_reward}")



Using cuda device
Wrapping the env in a VecTransposeImage.
Logging to runs/xk82qzec/DQN_1
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 471      |
|    ep_rew_mean      | 960      |
|    exploration_rate | 0.999    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 249      |
|    time_elapsed     | 0        |
|    total_timesteps  | 102      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 456      |
|    ep_rew_mean      | 840      |
|    exploration_rate | 0.998    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 258      |
|    time_elapsed     | 0        |
|    total_timesteps  | 194      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 405      |
|    ep_rew_mean      | 713      |


                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-0-to-step-400.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 434      |
|    ep_rew_mean      | 806      |
|    exploration_rate | 0.996    |
| time/               |          |
|    episodes         | 20       |
|    fps              | 242      |
|    time_elapsed     | 1        |
|    total_timesteps  | 469      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 443      |
|    ep_rew_mean      | 853      |
|    exploration_rate | 0.995    |
| time/               |          |
|    episodes         | 24       |
|    fps              | 250      |
|    time_elapsed     | 2        |
|    total_timesteps  | 576      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 435      |
|    ep_rew_m

                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-50000-to-step-50400.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 380      |
|    ep_rew_mean      | 610      |
|    exploration_rate | 0.521    |
| time/               |          |
|    episodes         | 2372     |
|    fps              | 206      |
|    time_elapsed     | 244      |
|    total_timesteps  | 50401    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.434    |
|    n_updates        | 10100    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 380      |
|    ep_rew_mean      | 609      |
|    exploration_rate | 0.52     |
| time/               |          |
|    episodes         | 2376     |
|    fps              | 206      |
|    time_elapsed     | 245      |
|    total_timesteps  | 50482    |
| train

                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-100000-to-step-100400.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 499      |
|    ep_rew_mean      | 1e+03    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4492     |
|    fps              | 194      |
|    time_elapsed     | 517      |
|    total_timesteps  | 100457   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.85     |
|    n_updates        | 22614    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 499      |
|    ep_rew_mean      | 1e+03    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4496     |
|    fps              | 194      |
|    time_elapsed     | 517      |
|    total_timesteps  | 100588   |
| tra

                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-150000-to-step-150400.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 588      |
|    ep_rew_mean      | 1.09e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 6100     |
|    fps              | 190      |
|    time_elapsed     | 791      |
|    total_timesteps  | 150407   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.55     |
|    n_updates        | 35101    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 587      |
|    ep_rew_mean      | 1.08e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 6104     |
|    fps              | 190      |
|    time_elapsed     | 791      |
|    total_timesteps  | 150506   |
| tra

                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-200000-to-step-200400.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 534      |
|    ep_rew_mean      | 969      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 7716     |
|    fps              | 188      |
|    time_elapsed     | 1065     |
|    total_timesteps  | 200499   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.5      |
|    n_updates        | 47624    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 535      |
|    ep_rew_mean      | 971      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 7720     |
|    fps              | 188      |
|    time_elapsed     | 1065     |
|    total_timesteps  | 200584   |
| tra

                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-250000-to-step-250400.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 484      |
|    ep_rew_mean      | 820      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 9540     |
|    fps              | 186      |
|    time_elapsed     | 1341     |
|    total_timesteps  | 250457   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 9.08     |
|    n_updates        | 60114    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 484      |
|    ep_rew_mean      | 815      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 9544     |
|    fps              | 186      |
|    time_elapsed     | 1342     |
|    total_timesteps  | 250583   |
| tra

                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-300000-to-step-300400.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 448      |
|    ep_rew_mean      | 706      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 11600    |
|    fps              | 185      |
|    time_elapsed     | 1622     |
|    total_timesteps  | 300473   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.599    |
|    n_updates        | 72618    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 448      |
|    ep_rew_mean      | 704      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 11604    |
|    fps              | 185      |
|    time_elapsed     | 1623     |
|    total_timesteps  | 300545   |
| tra

                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-350000-to-step-350400.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 407      |
|    ep_rew_mean      | 554      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 13888    |
|    fps              | 183      |
|    time_elapsed     | 1905     |
|    total_timesteps  | 350452   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 5.03     |
|    n_updates        | 85112    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 405      |
|    ep_rew_mean      | 554      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 13892    |
|    fps              | 183      |
|    time_elapsed     | 1906     |
|    total_timesteps  | 350507   |
| tra

                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-400000-to-step-400400.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 413      |
|    ep_rew_mean      | 620      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 16176    |
|    fps              | 183      |
|    time_elapsed     | 2187     |
|    total_timesteps  | 400482   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 9.7      |
|    n_updates        | 97620    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 412      |
|    ep_rew_mean      | 617      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 16180    |
|    fps              | 183      |
|    time_elapsed     | 2187     |
|    total_timesteps  | 400553   |
| tra

                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-450000-to-step-450400.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 462      |
|    ep_rew_mean      | 702      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 18284    |
|    fps              | 182      |
|    time_elapsed     | 2465     |
|    total_timesteps  | 450482   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 5.49     |
|    n_updates        | 110120   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 464      |
|    ep_rew_mean      | 703      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 18288    |
|    fps              | 182      |
|    time_elapsed     | 2466     |
|    total_timesteps  | 450625   |
| tra

                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-500000-to-step-500400.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 466      |
|    ep_rew_mean      | 702      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 20264    |
|    fps              | 182      |
|    time_elapsed     | 2744     |
|    total_timesteps  | 500406   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.179    |
|    n_updates        | 122601   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 465      |
|    ep_rew_mean      | 702      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 20268    |
|    fps              | 182      |
|    time_elapsed     | 2745     |
|    total_timesteps  | 500483   |
| tra

                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-550000-to-step-550400.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 420      |
|    ep_rew_mean      | 628      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 22348    |
|    fps              | 181      |
|    time_elapsed     | 3024     |
|    total_timesteps  | 550409   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 13.1     |
|    n_updates        | 135102   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 419      |
|    ep_rew_mean      | 625      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 22352    |
|    fps              | 181      |
|    time_elapsed     | 3025     |
|    total_timesteps  | 550471   |
| tra

                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-600000-to-step-600400.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 391      |
|    ep_rew_mean      | 521      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 24648    |
|    fps              | 181      |
|    time_elapsed     | 3307     |
|    total_timesteps  | 600486   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 8.79     |
|    n_updates        | 147621   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 392      |
|    ep_rew_mean      | 521      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 24652    |
|    fps              | 181      |
|    time_elapsed     | 3307     |
|    total_timesteps  | 600558   |
| tra

                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-650000-to-step-650400.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 501      |
|    ep_rew_mean      | 783      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 26612    |
|    fps              | 181      |
|    time_elapsed     | 3585     |
|    total_timesteps  | 650486   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.39     |
|    n_updates        | 160121   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 503      |
|    ep_rew_mean      | 790      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 26616    |
|    fps              | 181      |
|    time_elapsed     | 3586     |
|    total_timesteps  | 650609   |
| tra

                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-700000-to-step-700400.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 455      |
|    ep_rew_mean      | 716      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 28548    |
|    fps              | 181      |
|    time_elapsed     | 3863     |
|    total_timesteps  | 700478   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.91     |
|    n_updates        | 172619   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 453      |
|    ep_rew_mean      | 714      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 28552    |
|    fps              | 181      |
|    time_elapsed     | 3863     |
|    total_timesteps  | 700579   |
| tra

                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-750000-to-step-750400.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 435      |
|    ep_rew_mean      | 651      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 30572    |
|    fps              | 181      |
|    time_elapsed     | 4136     |
|    total_timesteps  | 750439   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 8.25     |
|    n_updates        | 185109   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 435      |
|    ep_rew_mean      | 653      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 30576    |
|    fps              | 181      |
|    time_elapsed     | 4137     |
|    total_timesteps  | 750516   |
| tra

                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-800000-to-step-800400.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 440      |
|    ep_rew_mean      | 690      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 32596    |
|    fps              | 181      |
|    time_elapsed     | 4415     |
|    total_timesteps  | 800424   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.513    |
|    n_updates        | 197605   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 441      |
|    ep_rew_mean      | 687      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 32600    |
|    fps              | 181      |
|    time_elapsed     | 4416     |
|    total_timesteps  | 800548   |
| tra

                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-850000-to-step-850400.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 455      |
|    ep_rew_mean      | 713      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 34664    |
|    fps              | 181      |
|    time_elapsed     | 4695     |
|    total_timesteps  | 850423   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.145    |
|    n_updates        | 210105   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 454      |
|    ep_rew_mean      | 713      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 34668    |
|    fps              | 181      |
|    time_elapsed     | 4696     |
|    total_timesteps  | 850490   |
| tra

                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-900000-to-step-900400.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 502      |
|    ep_rew_mean      | 831      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 36584    |
|    fps              | 181      |
|    time_elapsed     | 4973     |
|    total_timesteps  | 900489   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 8.08     |
|    n_updates        | 222622   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 502      |
|    ep_rew_mean      | 832      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 36588    |
|    fps              | 181      |
|    time_elapsed     | 4974     |
|    total_timesteps  | 900602   |
| tra

                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-950000-to-step-950400.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 535      |
|    ep_rew_mean      | 905      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 38296    |
|    fps              | 181      |
|    time_elapsed     | 5246     |
|    total_timesteps  | 950450   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.69     |
|    n_updates        | 235112   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 534      |
|    ep_rew_mean      | 901      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 38300    |
|    fps              | 181      |
|    time_elapsed     | 5246     |
|    total_timesteps  | 950552   |
| tra

0,1
global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇██
rollout/ep_len_mean,▁▂▅▅▆██▇▆▆▃▂▂▂▂▁▂▂▂▂▃▂▂▂▂▄▅▄▃▄▃▃▃▃▃▃▅▆▆▅
rollout/ep_rew_mean,▄▃▃▃▂▃▄▅▆▆▇█▅▃▃▂▁▁▁▂▃▃▂▃▂▂▁▂▃▄▄▄▃▃▃▃▃▃▅▅
rollout/exploration_rate,██▇▆▆▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
time/fps,█▆▅▅▅▄▄▄▃▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/loss,▂▁▁▅▄▁▂▄▃▅▁▁▇▁▇▃▁▅▃▃▃▅█▆▃▃▃▅▄▁▃█▁▁▅▁▄▃▁▂

0,1
global_step,999941.0
rollout/ep_len_mean,472.9
rollout/ep_rew_mean,721.0
rollout/exploration_rate,0.05
time/fps,181.0
train/learning_rate,0.0001
train/loss,8.18511


MoviePy - Building video /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-1000000-to-step-1000400.mp4.
MoviePy - Writing video /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-1000000-to-step-1000400.mp4



                                                            

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-1000000-to-step-1000400.mp4




Saving video to /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-1000000-to-step-1000400.mp4
MoviePy - Building video /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-1000000-to-step-1000400.mp4.
MoviePy - Writing video /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-1000000-to-step-1000400.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready /home/ec2-user/SageMaker/videos/xk82qzec/rl-video-step-1000000-to-step-1000400.mp4
Mean reward: 1162.0 +/- 212.82856951076846


In [None]:
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
import time

# Load model
model = DQN.load("dqn_riverraid")

# Create renderable env
env = gym.make("ALE/Riverraid-v5", render_mode="rgb_array")
env = AtariWrapper(env)

obs, info = env.reset()
done = False

for _ in range(1000):
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, done, truncated, info = env.step(action)

    # Render frame
    frame = env.render()
    plt.imshow(frame)
    plt.axis("off")
    clear_output(wait=True)
    display(plt.gcf())
    time.sleep(0.03)

    if done or truncated:
        obs, info = env.reset()

env.close()

In [None]:
# should be changed to record a video, instead.