<a href="https://colab.research.google.com/github/sgoodfriend/rl-algo-impls/blob/main/hf-deep-rl/dqn_SpaceInvadersNoFrameskip_v4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
ALGO = "dqn"
ENV = "SpaceInvadersNoFrameskip-v4"
IS_EVAL_DETERMINISTIC = True
MODEL_NAME = f"{ALGO}-sb3-{ENV}"

In [None]:
%%capture
!apt install python-opengl
!apt install ffmpeg
!apt install xvfb
!apt install swig
!apt-get install swig cmake freeglut3-dev

In [None]:
%%capture
!pip install stable-baselines3[extra] box2d huggingface_sb3 pyglet==1.5.1 pyvirtualdisplay==3.0

In [None]:
%%capture
from pyvirtualdisplay import Display

virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()

In [None]:
!git config --global credential.helper store
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|
    
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) 
Token is valid.
Your token has been saved in your configured git credential helpers (store).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
from stable_baselines3 import DQN
from stable_baselines3.common.atari_wrappers import AtariWrapper
from stable_baselines3.common.env_util import make_vec_env

def make_env(n_envs=1):
    return make_vec_env(ENV, n_envs=n_envs, wrapper_class=AtariWrapper)

env = make_env(n_envs=8)

model = DQN(
    "CnnPolicy",
    env,
    learning_rate=1e-4,
    buffer_size=400_000,
    learning_starts=200_000,
    batch_size=32,
    tau=1.0,
    gamma=0.99,
    train_freq=4,
    gradient_steps=1,
    replay_buffer_class=None,
    replay_buffer_kwargs={"handle_timeout_termination": False},
    optimize_memory_usage=True,
    target_update_interval=30_000,
    exploration_fraction=0.1,
    exploration_initial_eps=1.0,
    exploration_final_eps=0.01,
    verbose=1,
)
model.learn(total_timesteps=10_000_000)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00559  |
|    n_updates        | 293223   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.58e+03 |
|    ep_rew_mean      | 550      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 39960    |
|    fps              | 710      |
|    time_elapsed     | 13491    |
|    total_timesteps  | 9584512  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0384   |
|    n_updates        | 293265   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.58e+03 |
|    ep_rew_mean      | 548      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3

<stable_baselines3.dqn.dqn.DQN at 0x7f1adfe4ab50>

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy

eval_env = make_env()
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=IS_EVAL_DETERMINISTIC)
print(f"{mean_reward} +/- {std_reward}")

698.0 +/- 166.63132958720578


In [None]:
from huggingface_hub.hf_api import HfApi
from huggingface_sb3 import package_to_hub


username = HfApi().whoami()["name"]

repo_id = f"{username}/{MODEL_NAME}"
publish_env = make_env()
commit_message = repo_id

package_to_hub(
    model, 
    MODEL_NAME, 
    ALGO,
    ENV,
    publish_env,
    repo_id,
    commit_message,
    is_deterministic=IS_EVAL_DETERMINISTIC,
)

[38;5;4mℹ This function will save, evaluate, generate a video of your agent,
create a model card and push everything to the hub. It might take up to 1min.
This is a work in progress: if you encounter a bug, please open an issue.[0m
Saving video to /tmp/tmpksnmhmmy/-step-0-to-step-1000.mp4
[38;5;4mℹ Pushing repo sgoodfriend/dqn-sb3-SpaceInvadersNoFrameskip-v4 to the
Hugging Face Hub[0m


pytorch_variables.pth:   0%|          | 0.00/431 [00:00<?, ?B/s]

dqn-sb3-SpaceInvadersNoFrameskip-v4.zip:   0%|          | 0.00/27.1M [00:00<?, ?B/s]

policy.optimizer.pth:   0%|          | 0.00/13.5M [00:00<?, ?B/s]

policy.pth:   0%|          | 0.00/13.5M [00:00<?, ?B/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

[38;5;4mℹ Your model is pushed to the Hub. You can view your model here:
https://huggingface.co/sgoodfriend/dqn-sb3-SpaceInvadersNoFrameskip-v4/tree/main/[0m


'https://huggingface.co/sgoodfriend/dqn-sb3-SpaceInvadersNoFrameskip-v4/tree/main/'