In [None]:
# Install necessary packages
!apt install swig cmake ffmpeg xvfb python3-opengl
!pip install stable-baselines3==2.0.0a5 gymnasium[box2d] huggingface_sb3 pyvirtualdisplay imageio[ffmpeg]

In [None]:
import os
os.kill(os.getpid(), 9)

In [None]:
from pyvirtualdisplay import Display
virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()

<pyvirtualdisplay.display.Display at 0x797480c9b460>

In [None]:
import gymnasium as gym
env = gym.make("BipedalWalker-v3", hardcore=True)
env.reset()

(array([ 2.7456926e-03,  1.1390344e-05, -1.4852337e-03, -1.6000073e-02,
         9.2516460e-02,  3.4478784e-03,  8.5975957e-01, -1.3387570e-03,
         1.0000000e+00,  3.2824788e-02,  3.4477431e-03,  8.5352099e-01,
        -2.3459264e-03,  1.0000000e+00,  4.4081339e-01,  4.4581950e-01,
         4.6142212e-01,  4.8954949e-01,  5.3410202e-01,  6.0246021e-01,
         7.0914787e-01,  8.8593054e-01,  1.0000000e+00,  1.0000000e+00],
       dtype=float32),
 {})

In [None]:
print("_____OBSERVATION SPACE_____ \n")
print("Observation Space Shape", env.observation_space.shape)
print("Sample observation", env.observation_space.sample()) # Get a random observation

_____OBSERVATION SPACE_____ 

Observation Space Shape (24,)
Sample observation [ 2.1438754   4.5886707   4.835742    3.4845269   1.0308722  -4.471876
  1.889703   -4.36432     0.29804963  1.7554917  -0.82427794 -2.0413795
 -2.0218992   4.86995    -0.21926726  0.31620005 -0.16165149 -0.44752583
  0.6803081   0.951827   -0.8649444   0.16387157  0.6710377   0.45267555]


In [None]:
print("\n _____ACTION SPACE_____ \n")
print("Action Space Shape", env.action_space.shape)
print("Action Space Sample", env.action_space.sample()) # Take a random action


 _____ACTION SPACE_____ 

Action Space Shape (4,)
Action Space Sample [ 0.04507523  0.6536329  -0.56822634  0.38231775]


In [None]:
from stable_baselines3.common.env_util import make_vec_env
env = make_vec_env('BipedalWalker-v3', n_envs=16)

In [None]:
from stable_baselines3 import PPO
model = PPO(
    policy = 'MlpPolicy',
    env = env,
    n_steps = 2048,
    batch_size = 128,
    n_epochs = 6,
    gamma = 0.999,
    gae_lambda = 0.98,
    ent_coef = 0.01,
    verbose=1)

Using cuda device


In [None]:
from wasabi import Printer
import numpy as np
from stable_baselines3.common.base_class import BaseAlgorithm
from pathlib import Path
import tempfile
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import (
    DummyVecEnv,
    VecEnv,
    VecVideoRecorder,
)

In [None]:
msg = Printer()

In [None]:
def generate_replay(
    model: BaseAlgorithm,
    eval_env: VecEnv,
    video_length: int,
    is_deterministic: bool,
    local_path: Path,
):
    """
    Generate a replay video of the agent
    :param model: trained model
    :param eval_env: environment used to evaluate the agent
    :param video_length: length of the video (in timesteps)
    :param is_deterministic: use deterministic or stochastic actions
    :param local_path: path of the local repository
    """
    # This is another temporary directory for video outputs
    # SB3 created a -step-0-to-... meta files as well as other
    # artifacts which we don't want in the repo.
    with tempfile.TemporaryDirectory() as tmpdirname:
        # Step 1: Create the VecVideoRecorder
        env = VecVideoRecorder(
            eval_env,
            tmpdirname,
            record_video_trigger=lambda x: x == 0,
            video_length=video_length,
            name_prefix="",
        )

        obs = env.reset()
        lstm_states = None
        episode_starts = np.ones((env.num_envs,), dtype=bool)

        try:
            for _ in range(video_length):
                action, lstm_states = model.predict(
                    obs,
                    state=lstm_states,
                    episode_start=episode_starts,
                    deterministic=is_deterministic,
                )
                obs, _, episode_starts, _ = env.step(action)

            # Save the video
            env.close()

            # Convert the video with x264 codec
            inp = env.video_recorder.path
            out = local_path
            os.system(f"ffmpeg -y -i {inp} -vcodec h264 {out}".format(inp, out))
            print(f"Video saved to: {out}")
        except KeyboardInterrupt:
            pass
        except Exception as e:
            msg.fail(str(e))
            # Add a message for video
            msg.fail(
                "We are unable to generate a replay of your agent"
            )

In [None]:
import os

In [None]:
#create a directory to save the videos
video_dir = "/content/videos"
if not os.path.exists(video_dir):
    os.makedirs(video_dir)

In [None]:
env_id = "BipedalWalker-v3"
# Train and generate video at every 100000 steps, adjust the timesteps to your liking
for i in range(0, 2000000, 100000):
    model.learn(total_timesteps=100000)
    # Save the model
    model_name = "ppo-BipedalWalker-v3"
    model.save(model_name)
    video_name = f"replay_{i + 100000}.mp4"
    generate_replay(
        model=model,
        eval_env=DummyVecEnv([lambda: Monitor(gym.make(env_id, hardcore=True, render_mode="rgb_array"))]),
        video_length=1000,
        is_deterministic=True,
        local_path=os.path.join(video_dir, video_name)
    )

model_name = "ppo-BipedalWalker-v3"
model.save(model_name)

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.28e+03 |
|    ep_rew_mean     | -52.2    |
| time/              |          |
|    fps             | 2862     |
|    iterations      | 1        |
|    time_elapsed    | 11       |
|    total_timesteps | 32768    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.37e+03    |
|    ep_rew_mean          | -45.9       |
| time/                   |             |
|    fps                  | 2228        |
|    iterations           | 2           |
|    time_elapsed         | 29          |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.006669936 |
|    clip_fraction        | 0.068       |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.7        |
|    explained_variance   | 0.677       |
|    learning_rate        | 0.



Moviepy - Done !
Moviepy - video ready /tmp/tmpifb0sakd/-step-0-to-step-1000.mp4
Video saved to: /content/videos/replay_100000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.31e+03 |
|    ep_rew_mean     | -11.7    |
| time/              |          |
|    fps             | 2865     |
|    iterations      | 1        |
|    time_elapsed    | 11       |
|    total_timesteps | 32768    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.44e+03     |
|    ep_rew_mean          | 2.71         |
| time/                   |              |
|    fps                  | 2240         |
|    iterations           | 2            |
|    time_elapsed         | 29           |
|    total_timesteps      | 65536        |
| train/                  |              |
|    approx_kl            | 0.0057155737 |
|    clip_fraction        | 0.0497       |
|    clip_ran



Moviepy - Done !
Moviepy - video ready /tmp/tmplgahr9ff/-step-0-to-step-1000.mp4
Video saved to: /content/videos/replay_200000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.53e+03 |
|    ep_rew_mean     | 32.2     |
| time/              |          |
|    fps             | 2868     |
|    iterations      | 1        |
|    time_elapsed    | 11       |
|    total_timesteps | 32768    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.56e+03    |
|    ep_rew_mean          | 38.7        |
| time/                   |             |
|    fps                  | 2227        |
|    iterations           | 2           |
|    time_elapsed         | 29          |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.007565735 |
|    clip_fraction        | 0.0842      |
|    clip_range          



Moviepy - Done !
Moviepy - video ready /tmp/tmpecavbqgw/-step-0-to-step-1000.mp4
Video saved to: /content/videos/replay_300000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.11e+03 |
|    ep_rew_mean     | 12.9     |
| time/              |          |
|    fps             | 2857     |
|    iterations      | 1        |
|    time_elapsed    | 11       |
|    total_timesteps | 32768    |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.3e+03    |
|    ep_rew_mean          | 35.9       |
| time/                   |            |
|    fps                  | 2212       |
|    iterations           | 2          |
|    time_elapsed         | 29         |
|    total_timesteps      | 65536      |
| train/                  |            |
|    approx_kl            | 0.00606855 |
|    clip_fraction        | 0.0504     |
|    clip_range           | 0.2      



Moviepy - Done !
Moviepy - video ready /tmp/tmpd1eigutz/-step-0-to-step-1000.mp4
Video saved to: /content/videos/replay_400000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.27e+03 |
|    ep_rew_mean     | 25.3     |
| time/              |          |
|    fps             | 3045     |
|    iterations      | 1        |
|    time_elapsed    | 10       |
|    total_timesteps | 32768    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.34e+03    |
|    ep_rew_mean          | 38.5        |
| time/                   |             |
|    fps                  | 2233        |
|    iterations           | 2           |
|    time_elapsed         | 29          |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.007870065 |
|    clip_fraction        | 0.0967      |
|    clip_range          



Moviepy - Done !
Moviepy - video ready /tmp/tmpyz1ypq0g/-step-0-to-step-1000.mp4
Video saved to: /content/videos/replay_500000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.43e+03 |
|    ep_rew_mean     | 69.6     |
| time/              |          |
|    fps             | 3145     |
|    iterations      | 1        |
|    time_elapsed    | 10       |
|    total_timesteps | 32768    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.48e+03     |
|    ep_rew_mean          | 65.7         |
| time/                   |              |
|    fps                  | 2228         |
|    iterations           | 2            |
|    time_elapsed         | 29           |
|    total_timesteps      | 65536        |
| train/                  |              |
|    approx_kl            | 0.0069470056 |
|    clip_fraction        | 0.0767       |
|    clip_ran



Moviepy - Done !
Moviepy - video ready /tmp/tmphjdd62gm/-step-0-to-step-1000.mp4
Video saved to: /content/videos/replay_600000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.38e+03 |
|    ep_rew_mean     | 64.5     |
| time/              |          |
|    fps             | 3166     |
|    iterations      | 1        |
|    time_elapsed    | 10       |
|    total_timesteps | 32768    |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.41e+03   |
|    ep_rew_mean          | 70.8       |
| time/                   |            |
|    fps                  | 2230       |
|    iterations           | 2          |
|    time_elapsed         | 29         |
|    total_timesteps      | 65536      |
| train/                  |            |
|    approx_kl            | 0.00858254 |
|    clip_fraction        | 0.0942     |
|    clip_range           | 0.2      



Moviepy - Done !
Moviepy - video ready /tmp/tmp4t7a1cfs/-step-0-to-step-1000.mp4
Video saved to: /content/videos/replay_700000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.41e+03 |
|    ep_rew_mean     | 82.9     |
| time/              |          |
|    fps             | 3125     |
|    iterations      | 1        |
|    time_elapsed    | 10       |
|    total_timesteps | 32768    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.36e+03    |
|    ep_rew_mean          | 62.1        |
| time/                   |             |
|    fps                  | 2223        |
|    iterations           | 2           |
|    time_elapsed         | 29          |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.008203497 |
|    clip_fraction        | 0.111       |
|    clip_range          



Moviepy - Done !
Moviepy - video ready /tmp/tmpm2jyhsa_/-step-0-to-step-1000.mp4
Video saved to: /content/videos/replay_800000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.47e+03 |
|    ep_rew_mean     | 78.2     |
| time/              |          |
|    fps             | 3209     |
|    iterations      | 1        |
|    time_elapsed    | 10       |
|    total_timesteps | 32768    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.51e+03    |
|    ep_rew_mean          | 90.5        |
| time/                   |             |
|    fps                  | 2248        |
|    iterations           | 2           |
|    time_elapsed         | 29          |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.008260492 |
|    clip_fraction        | 0.0882      |
|    clip_range          



Moviepy - Done !
Moviepy - video ready /tmp/tmps6obpi7o/-step-0-to-step-1000.mp4
Video saved to: /content/videos/replay_900000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.37e+03 |
|    ep_rew_mean     | 92.3     |
| time/              |          |
|    fps             | 3082     |
|    iterations      | 1        |
|    time_elapsed    | 10       |
|    total_timesteps | 32768    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.29e+03    |
|    ep_rew_mean          | 75          |
| time/                   |             |
|    fps                  | 2241        |
|    iterations           | 2           |
|    time_elapsed         | 29          |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.010989245 |
|    clip_fraction        | 0.104       |
|    clip_range          



Moviepy - Done !
Moviepy - video ready /tmp/tmplhvtl4pu/-step-0-to-step-1000.mp4
Video saved to: /content/videos/replay_1000000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.13e+03 |
|    ep_rew_mean     | 49       |
| time/              |          |
|    fps             | 3037     |
|    iterations      | 1        |
|    time_elapsed    | 10       |
|    total_timesteps | 32768    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.29e+03     |
|    ep_rew_mean          | 74.9         |
| time/                   |              |
|    fps                  | 2198         |
|    iterations           | 2            |
|    time_elapsed         | 29           |
|    total_timesteps      | 65536        |
| train/                  |              |
|    approx_kl            | 0.0076318327 |
|    clip_fraction        | 0.0888       |
|    clip_ra



Moviepy - Done !
Moviepy - video ready /tmp/tmp49s5u35s/-step-0-to-step-1000.mp4
Video saved to: /content/videos/replay_1100000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.48e+03 |
|    ep_rew_mean     | 110      |
| time/              |          |
|    fps             | 2986     |
|    iterations      | 1        |
|    time_elapsed    | 10       |
|    total_timesteps | 32768    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.3e+03     |
|    ep_rew_mean          | 80.8        |
| time/                   |             |
|    fps                  | 2181        |
|    iterations           | 2           |
|    time_elapsed         | 30          |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.008218529 |
|    clip_fraction        | 0.108       |
|    clip_range         



Moviepy - Done !
Moviepy - video ready /tmp/tmp5563b9z6/-step-0-to-step-1000.mp4
Video saved to: /content/videos/replay_1200000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.43e+03 |
|    ep_rew_mean     | 110      |
| time/              |          |
|    fps             | 2938     |
|    iterations      | 1        |
|    time_elapsed    | 11       |
|    total_timesteps | 32768    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.35e+03    |
|    ep_rew_mean          | 91.7        |
| time/                   |             |
|    fps                  | 2199        |
|    iterations           | 2           |
|    time_elapsed         | 29          |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.009493979 |
|    clip_fraction        | 0.0915      |
|    clip_range         



Moviepy - Done !
Moviepy - video ready /tmp/tmpy0tbx2s6/-step-0-to-step-1000.mp4
Video saved to: /content/videos/replay_1300000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.5e+03  |
|    ep_rew_mean     | 120      |
| time/              |          |
|    fps             | 2920     |
|    iterations      | 1        |
|    time_elapsed    | 11       |
|    total_timesteps | 32768    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.55e+03     |
|    ep_rew_mean          | 131          |
| time/                   |              |
|    fps                  | 2151         |
|    iterations           | 2            |
|    time_elapsed         | 30           |
|    total_timesteps      | 65536        |
| train/                  |              |
|    approx_kl            | 0.0090184845 |
|    clip_fraction        | 0.109        |
|    clip_ra



Moviepy - Done !
Moviepy - video ready /tmp/tmpgumxjn4j/-step-0-to-step-1000.mp4
Video saved to: /content/videos/replay_1400000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.52e+03 |
|    ep_rew_mean     | 130      |
| time/              |          |
|    fps             | 3223     |
|    iterations      | 1        |
|    time_elapsed    | 10       |
|    total_timesteps | 32768    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.48e+03    |
|    ep_rew_mean          | 122         |
| time/                   |             |
|    fps                  | 2232        |
|    iterations           | 2           |
|    time_elapsed         | 29          |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.008581879 |
|    clip_fraction        | 0.0942      |
|    clip_range         



Moviepy - Done !
Moviepy - video ready /tmp/tmp7v5ejjf6/-step-0-to-step-1000.mp4
Video saved to: /content/videos/replay_1500000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.22e+03 |
|    ep_rew_mean     | 89.6     |
| time/              |          |
|    fps             | 3246     |
|    iterations      | 1        |
|    time_elapsed    | 10       |
|    total_timesteps | 32768    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.31e+03    |
|    ep_rew_mean          | 107         |
| time/                   |             |
|    fps                  | 2268        |
|    iterations           | 2           |
|    time_elapsed         | 28          |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.007769268 |
|    clip_fraction        | 0.0602      |
|    clip_range         



Moviepy - Done !
Moviepy - video ready /tmp/tmpxpdkf0w0/-step-0-to-step-1000.mp4
Video saved to: /content/videos/replay_1600000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.33e+03 |
|    ep_rew_mean     | 117      |
| time/              |          |
|    fps             | 2942     |
|    iterations      | 1        |
|    time_elapsed    | 11       |
|    total_timesteps | 32768    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.36e+03    |
|    ep_rew_mean          | 127         |
| time/                   |             |
|    fps                  | 2197        |
|    iterations           | 2           |
|    time_elapsed         | 29          |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.006957966 |
|    clip_fraction        | 0.0507      |
|    clip_range         



Moviepy - Done !
Moviepy - video ready /tmp/tmp8obkii8y/-step-0-to-step-1000.mp4
Video saved to: /content/videos/replay_1700000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.3e+03  |
|    ep_rew_mean     | 113      |
| time/              |          |
|    fps             | 2905     |
|    iterations      | 1        |
|    time_elapsed    | 11       |
|    total_timesteps | 32768    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.37e+03    |
|    ep_rew_mean          | 124         |
| time/                   |             |
|    fps                  | 2185        |
|    iterations           | 2           |
|    time_elapsed         | 29          |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.007960538 |
|    clip_fraction        | 0.0797      |
|    clip_range         



Moviepy - Done !
Moviepy - video ready /tmp/tmp0jxksptr/-step-0-to-step-1000.mp4
Video saved to: /content/videos/replay_1800000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.37e+03 |
|    ep_rew_mean     | 138      |
| time/              |          |
|    fps             | 2879     |
|    iterations      | 1        |
|    time_elapsed    | 11       |
|    total_timesteps | 32768    |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.35e+03   |
|    ep_rew_mean          | 136        |
| time/                   |            |
|    fps                  | 2230       |
|    iterations           | 2          |
|    time_elapsed         | 29         |
|    total_timesteps      | 65536      |
| train/                  |            |
|    approx_kl            | 0.00915749 |
|    clip_fraction        | 0.0835     |
|    clip_range           | 0.2     



Moviepy - Done !
Moviepy - video ready /tmp/tmpszirdkql/-step-0-to-step-1000.mp4
Video saved to: /content/videos/replay_1900000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.27e+03 |
|    ep_rew_mean     | 122      |
| time/              |          |
|    fps             | 2900     |
|    iterations      | 1        |
|    time_elapsed    | 11       |
|    total_timesteps | 32768    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.31e+03    |
|    ep_rew_mean          | 133         |
| time/                   |             |
|    fps                  | 2235        |
|    iterations           | 2           |
|    time_elapsed         | 29          |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.009003179 |
|    clip_fraction        | 0.0916      |
|    clip_range         



Moviepy - Done !
Moviepy - video ready /tmp/tmpb39ab5ij/-step-0-to-step-1000.mp4
Video saved to: /content/videos/replay_2000000.mp4


**Add All the Videos into One**

In [None]:
with open(os.path.join(video_dir, "filelist.txt"), "w") as f:
    for i in range(0, 2000000, 100000):
        video_name = f"replay_{i + 100000}.mp4"
        f.write(f"file '{os.path.join(video_dir, video_name)}'\n")
# Concatenate all the videos into one
os.system(f"ffmpeg -f concat -safe 0 -i {os.path.join(video_dir, 'filelist.txt')} -c copy {os.path.join(video_dir, 'replay_all.mp4')}")


0

# 5. Visualize Final Video



In [None]:
from IPython.display import HTML
from base64 import b64encode
mp4 = open('videos/replay_all.mp4','rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video  width=600 controls>
  <source src="%s" type="video/mp4">
  </video>

""" % data_url)

# 6. Evaluate The Model

In [21]:
from stable_baselines3.common.evaluation import evaluate_policy

In [22]:
eval_env = Monitor(gym.make("BipedalWalker-v3"))
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

mean_reward=-22.96 +/- 51.24835979069776


# 7.Hugging Face

In [23]:
from huggingface_sb3 import load_from_hub, package_to_hub
from huggingface_hub import notebook_login

In [26]:
notebook_login()
!git config --global credential.helper store

  and should_run_async(code)


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [28]:
env_id = "BipedalWalker-v3"
model_name = "ppo-BipedalWalker-v3"
model_architecture = "PPO"

repo_id = "Ahmerraza12/ppo-BipedalWalker" # Change with your repo id

## Define the commit message
commit_message = "Upload PPO BipedalWalker-v3 trained agent"

# Create the evaluation env and set the render_mode="rgb_array"
eval_env = DummyVecEnv([lambda: gym.make(env_id, hardcore=True, render_mode="rgb_array")])

package_to_hub(model=model, # trained model
               model_name=model_name, # The name of our trained model
               model_architecture=model_architecture, # The model architecture we used: in our case PPO
               env_id=env_id, # Name of the environment
               eval_env=eval_env,
               repo_id=repo_id,
               commit_message=commit_message)


[38;5;4mℹ This function will save, evaluate, generate a video of your agent,
create a model card and push everything to the hub. It might take up to 1min.
This is a work in progress: if you encounter a bug, please open an issue.[0m




Saving video to /tmp/tmp2jil9c5w/-step-0-to-step-1000.mp4
Moviepy - Building video /tmp/tmp2jil9c5w/-step-0-to-step-1000.mp4.
Moviepy - Writing video /tmp/tmp2jil9c5w/-step-0-to-step-1000.mp4





Moviepy - Done !
Moviepy - video ready /tmp/tmp2jil9c5w/-step-0-to-step-1000.mp4
[38;5;4mℹ Pushing repo Ahmerraza12/ppo-BipedalWalker to the Hugging Face
Hub[0m


policy.optimizer.pth:   0%|          | 0.00/105k [00:00<?, ?B/s]

policy.pth:   0%|          | 0.00/52.3k [00:00<?, ?B/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

pytorch_variables.pth:   0%|          | 0.00/864 [00:00<?, ?B/s]

ppo-BipedalWalker-v3.zip:   0%|          | 0.00/176k [00:00<?, ?B/s]

[38;5;4mℹ Your model is pushed to the Hub. You can view your model here:
https://huggingface.co/Ahmerraza12/ppo-BipedalWalker/tree/main/[0m


CommitInfo(commit_url='https://huggingface.co/Ahmerraza12/ppo-BipedalWalker/commit/b20639138bba9317ffa330aeb2814502d63fb671', commit_message='Upload PPO BipedalWalker-v3 trained agent', commit_description='', oid='b20639138bba9317ffa330aeb2814502d63fb671', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Ahmerraza12/ppo-BipedalWalker', endpoint='https://huggingface.co', repo_type='model', repo_id='Ahmerraza12/ppo-BipedalWalker'), pr_revision=None, pr_num=None)

# 8. Load Models from HuggingFace (Optional)


In [None]:

from huggingface_sb3 import load_from_hub
repo_id = "YoungMeng/ppo-BipedalWalker-test" # The repo_id
filename = "ppo-BipedalWalker-v3.zip" # The model filename.zip

checkpoint = load_from_hub(repo_id, filename)
model = PPO.load(checkpoint, print_system_info=True)

In [None]:
eval_env = Monitor(gym.make("BipedalWalker-v3", hardcore=True))
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")