In [12]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor

In [None]:
# Create our environment we want LunarLander-v2
env = gym.make("LunarLander-v2")

# Reset the environment to its original state
observation, info = env.reset()

for _ in range(100):
    # Take random action
    action = env.action_space.sample()
    print(f"Action Taken: {action}")
    
    # Do this action in the environment and get
    # next_state, reward, terminated, truncated, and info
    observation, reward, terminated, truncated, info = env.step(action)
    
    # If the game is terminated (in our case we land, crashed) or truncated (timeout)
    if terminated or truncated:
        # Reset the environment
        print("Environment is reset")
        observation, info = env.reset()
        
env.close()

In [10]:
env = make_vec_env("LunarLander-v2", n_envs=32)
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=3e6, progress_bar=True)
model_name = "ppo-lunar-landerv2-v2-3e6-lr3e-2"
model.save(model_name)

Output()

Using cuda device


KeyboardInterrupt: 

In [13]:
eval_env = Monitor(gym.make("LunarLander-v2"))
model = PPO.load("./ppo-lunar-landerv2-v1-5e6-lr3e-4.zip", env, device="cuda")
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

mean_reward=263.77 +/- 17.134602775684165


In [None]:
# How to make predictions
env = make_vec_env("LunarLander-v2")
model = PPO.load("./ppo-lunar-landerv2-v1.zip", env, device="cuda")

observation = env.reset()

for _ in range(100):
    action = model.predict(observation, deterministic=True)
    
    observation, reward, terminated, truncated = env.step(action[0])
    env.render("human")

env.close()

In [14]:
import gymnasium as gym

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env

from huggingface_sb3 import package_to_hub

model_name = "ppo-lunar-landerv2-v1-5e6-lr3e-4"
env = make_vec_env("LunarLander-v2")
model = PPO.load("./ppo-lunar-landerv2-v1-5e6-lr3e-4.zip", env, device="cuda")

# PLACE the variables you've just defined two cells above
# Define the name of the environment
env_id = "LunarLander-v2"

# TODO: Define the model architecture we used
model_architecture = "PPO"

## Define a repo_id
## repo_id is the id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
## CHANGE WITH YOUR REPO ID
repo_id = "Lounarisnia/ppo-lunar-landerv2-v1-5e6-lr3e-4" # Change with your repo id, you can't push with mine 😄

## Define the commit message
commit_message = "Upload PPO LunarLander-v2 trained agent"

# Create the evaluation env and set the render_mode="rgb_array"
eval_env = DummyVecEnv([lambda: gym.make(env_id, render_mode="rgb_array")])

# PLACE the package_to_hub function you've just filled here
package_to_hub(model=model, # Our trained model
               model_name=model_name, # The name of our trained model
               model_architecture=model_architecture, # The model architecture we used: in our case PPO
               env_id=env_id, # Name of the environment
               eval_env=eval_env, # Evaluation Environment
               repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
               commit_message=commit_message)


[38;5;4mℹ This function will save, evaluate, generate a video of your agent,
create a model card and push everything to the hub. It might take up to 1min.
This is a work in progress: if you encounter a bug, please open an issue.[0m
Saving video to C:\Users\Loona\AppData\Local\Temp\tmp0vnfiimc\-step-0-to-step-1000.mp4
Moviepy - Building video C:\Users\Loona\AppData\Local\Temp\tmp0vnfiimc\-step-0-to-step-1000.mp4.
Moviepy - Writing video C:\Users\Loona\AppData\Local\Temp\tmp0vnfiimc\-step-0-to-step-1000.mp4


                                                                 

Moviepy - Done !
Moviepy - video ready C:\Users\Loona\AppData\Local\Temp\tmp0vnfiimc\-step-0-to-step-1000.mp4
[38;5;4mℹ Pushing repo Lounarisnia/ppo-lunar-landerv2-v1-5e6-lr3e-4 to the
Hugging Face Hub[0m


policy.optimizer.pth:   0%|          | 0.00/88.5k [00:00<?, ?B/s]

policy.pth:   0%|          | 0.00/43.8k [00:00<?, ?B/s]

pytorch_variables.pth:   0%|          | 0.00/864 [00:00<?, ?B/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

ppo-lunar-landerv2-v1-5e6-lr3e-4.zip:   0%|          | 0.00/149k [00:00<?, ?B/s]

[38;5;4mℹ Your model is pushed to the Hub. You can view your model here:
https://huggingface.co/Lounarisnia/ppo-lunar-landerv2-v1-5e6-lr3e-4/tree/main/[0m


CommitInfo(commit_url='https://huggingface.co/Lounarisnia/ppo-lunar-landerv2-v1-5e6-lr3e-4/commit/9b8593306ccd09b48b8dff5941b5e269131aa67d', commit_message='Upload PPO LunarLander-v2 trained agent', commit_description='', oid='9b8593306ccd09b48b8dff5941b5e269131aa67d', pr_url=None, pr_revision=None, pr_num=None)