In [1]:
!apt update
!echo Y | apt-get install libgl1-mesa-dev

Get:1 http://security.debian.org/debian-security bullseye-security InRelease [44.1 kB]
Get:2 http://security.debian.org/debian-security bullseye-security/main amd64 Packages [76.2 kB]
Get:3 http://deb.debian.org/debian bullseye InRelease [116 kB]33m0m[33m
Get:4 http://deb.debian.org/debian bullseye-updates InRelease [39.4 kB]
Get:5 http://deb.debian.org/debian bullseye/main amd64 Packages [8180 kB]
Get:6 http://deb.debian.org/debian bullseye-updates/main amd64 Packages [2592 B]
Fetched 8458 kB in 5s (1594 kB/s)3m                       [0m[33m[33m[33m[33m[33m[33m[33m
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
1 package can be upgraded. Run 'apt list --upgradable' to see it.
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libdrm-amdgpu1 libdrm-common libdrm-intel1 libdrm-nouveau2 libdrm-radeon1
  libdrm2 libegl-dev libegl-m

In [None]:
import os
import gym
import numpy as np
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback
from stable_baselines3.common.vec_env import VecFrameStack, VecVideoRecorder, DummyVecEnv
from stable_baselines3 import A2C, DQN

log_dir = "./logs/"
os.makedirs(log_dir, exist_ok=True)
save_dir = "./tmp/gym"
os.makedirs(save_dir, exist_ok=True)
env_id = 'BreakoutNoFrameskip-v4'

monitor_kwargs = {'allow_early_resets':True}
env = make_atari_env(
    env_id, seed=2937996057, monitor_dir=log_dir, monitor_kwargs=monitor_kwargs
)
env.reset()
checkpoint_callback = CheckpointCallback(save_freq=1e5, save_path='./logs/')

model = DQN(
    policy='CnnPolicy', env=env, learning_rate=0.0001, buffer_size=100000,
    learning_starts=100000, batch_size=32, tau=1.0, gamma=0.99, train_freq=4,
    gradient_steps=1, replay_buffer_class=None, replay_buffer_kwargs=None, 
    optimize_memory_usage=True, target_update_interval=1000, 
    exploration_fraction=0.1, exploration_initial_eps=1.0,
    exploration_final_eps=0.01, max_grad_norm=10, tensorboard_log='', 
    create_eval_env=False, policy_kwargs=None, verbose=1, seed=2937996057,
    device="auto", _init_setup_model=True
)
    
# total_timestepsを決める
model.learn(
    total_timesteps=1e5, callback=checkpoint_callback, log_interval=-1, 
    eval_env=None, eval_freq=10000, n_eval_episodes=5, tb_log_name="DQN",
    eval_log_path=None, reset_num_timesteps=True
)
#model.learn(total_timesteps=10000, callback=checkpoint_callback)

model.save(save_dir + "/breakout")

Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to DQN_1
----------------------------------
| rollout/            |          |
|    exploration rate | 0.998    |
| time/               |          |
|    episodes         | 1        |
|    fps              | 215      |
|    time_elapsed     | 0        |
|    total timesteps  | 23       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.995    |
| time/               |          |
|    episodes         | 2        |
|    fps              | 252      |
|    time_elapsed     | 0        |
|    total timesteps  | 46       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.986    |
| time/               |          |
|    episodes         | 3        |
|    fps              | 72       |
|    time_elapsed     | 1        |
|    total timesteps  | 142      |
---------------

In [None]:
# Video Record
video_folder = './logs/videos'
monitor_kwargs = {'allow_early_resets':True}
wrapper_kwargs = {'noop_max':1}

env = make_atari_env(env_id, seed=293799605, wrapper_kwargs=wrapper_kwargs)
env = VecVideoRecorder(
    venv=env, video_folder=video_folder, record_video_trigger=lambda x : x == 0,
    video_length=np.inf, name_prefix='rl-video'
)

done = False
state = env.reset()
while not done:
    action, _ = model.predict(state)
    state, _, done, info = env.step(action)
env.close()
print("finished")