In [None]:
import gymnasium

from huggingface_sb3 import load_from_hub, package_to_hub
from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.

from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
import gymnasium as gym

In [None]:
from PIL import Image 
def visualize(model, env, frames=100):
    obs = env.reset()
    for i in range(frames):
        action, _states = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        img = env.render("rgb_array")
        img = Image.fromarray(img)
        display(img, clear=True)

In [None]:
def print_results(model):
    eval_env = Monitor(gym.make("LunarLander-v2"))
    mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
    print(f"mean_reward= {mean_reward:.2f} +/- {std_reward}", end="\r")

In [None]:
# Parallel environments
env = make_vec_env("LunarLander-v2", n_envs=16)


# Instantiate the agent
model = PPO(
    "MlpPolicy",
    env,
    n_steps=1024,
    batch_size=64,
    n_epochs=4,
    gamma=0.999,
    gae_lambda=0.98,
    ent_coef=0.01,
    verbose=0,
    device='mps'
)

# Train the agent
steps = 1e6
model.learn(total_timesteps=steps, progress_bar=True)
model_name = 'ppo-LunarLander-v2_vector'
model.save(model_name)

In [None]:
visualize(model,env,200)

In [None]:
print_results(model)