In [3]:
# Import libraires
import gymnasium as gym
from huggingface_sb3 import  package_to_hub
from huggingface_hub import (
    notebook_login,
)

from stable_baselines3 import DQN
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor

In [4]:
# Create environment
env = gym.make('LunarLander-v2')

# Create the DQN model
model = DQN(
    policy="MlpPolicy",
    env=env,
    learning_rate=1e-4,
    buffer_size=100000,
    learning_starts=1000,
    batch_size=64,
    tau=1.0,
    gamma=0.99,
    train_freq=4,
    gradient_steps=1,
    target_update_interval=1000,
    exploration_fraction=0.1,
    exploration_initial_eps=1.0,
    exploration_final_eps=0.05,
    max_grad_norm=10,
    tensorboard_log="./dqn_lunarlander_tensorboard/",
    policy_kwargs=dict(net_arch=[256, 256]),
    verbose=1
)


# Train the model
model.learn(total_timesteps=1000000, log_interval=10)

# Save trained model
model_name = "dqn-LunarLander-v2"
model.save(model_name)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./dqn_lunarlander_tensorboard/DQN_2
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 92.2     |
|    ep_rew_mean      | -251     |
|    exploration_rate | 0.991    |
| time/               |          |
|    episodes         | 10       |
|    fps              | 14267    |
|    time_elapsed     | 0        |
|    total_timesteps  | 922      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 85.8     |
|    ep_rew_mean      | -206     |
|    exploration_rate | 0.984    |
| time/               |          |
|    episodes         | 20       |
|    fps              | 1490     |
|    time_elapsed     | 1        |
|    total_timesteps  | 1715     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.01     |
|    n_updates     

In [5]:
# Save trained model
model_name = "dqn-LunarLander-v2"
model.save(model_name)

In [9]:
# Evaluate and see results
eval_env = Monitor(gym.make('LunarLander-v2', render_mode="human"))
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=2, deterministic=True)
print(f"mean_reward={mean_reward:.2f}+/-{std_reward}")
eval_env.close()

mean_reward=277.54+/-2.342390999999992


In [8]:
package_to_hub(model = model,
               model_name="Belwen/dqn-LunarLander-v2",
               model_architecture="DQN",
               env_id="LunarLander-v2",
               eval_env=DummyVecEnv([lambda: Monitor(gym.make("LunarLander-v2", render_mode="rgb_array"))]),
               repo_id="Belwen/dqn-LunarLander-v2",
               commit_message="Upload trained dqn LunarLander")

[38;5;4mℹ This function will save, evaluate, generate a video of your agent,
create a model card and push everything to the hub. It might take up to 1min.
This is a work in progress: if you encounter a bug, please open an issue.[0m




Saving video to C:\Users\Sarah\AppData\Local\Temp\tmphw8gx418\-step-0-to-step-1000.mp4
Moviepy - Building video C:\Users\Sarah\AppData\Local\Temp\tmphw8gx418\-step-0-to-step-1000.mp4.
Moviepy - Writing video C:\Users\Sarah\AppData\Local\Temp\tmphw8gx418\-step-0-to-step-1000.mp4



                                                                 

Moviepy - Done !
Moviepy - video ready C:\Users\Sarah\AppData\Local\Temp\tmphw8gx418\-step-0-to-step-1000.mp4
[38;5;4mℹ Pushing repo Belwen/dqn-LunarLander-v2 to the Hugging Face Hub[0m


policy.optimizer.pth:   0%|          | 0.00/558k [00:00<?, ?B/s]

pytorch_variables.pth:   0%|          | 0.00/864 [00:00<?, ?B/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

policy.pth:   0%|          | 0.00/557k [00:00<?, ?B/s]

dqn-LunarLander-v2.zip:   0%|          | 0.00/1.13M [00:00<?, ?B/s]

[38;5;4mℹ Your model is pushed to the Hub. You can view your model here:
https://huggingface.co/Belwen/dqn-LunarLander-v2/tree/main/[0m


CommitInfo(commit_url='https://huggingface.co/Belwen/dqn-LunarLander-v2/commit/7c94efb3e0d91c1d1902957efbc96ef712b7e759', commit_message='Upload trained dqn LunarLander', commit_description='', oid='7c94efb3e0d91c1d1902957efbc96ef712b7e759', pr_url=None, pr_revision=None, pr_num=None)