In [None]:
!apt install swig cmake

In [None]:
!pip install -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt

In [None]:
!apt-get update
!apt-get install -y python3-opengl
!apt install ffmpeg
!apt install xvfb
!pip3 install pyvirtualdisplay

In [1]:
# Virtual display
from pyvirtualdisplay import Display

virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()

<pyvirtualdisplay.display.Display at 0x7f91242192e0>

In [2]:
import gymnasium as gym

from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv

In [3]:
env = gym.make("LunarLander-v2")
env.reset()
print("_____OBSERVATION SPACE_____ \n")
print("Observation Space Shape", env.observation_space.shape)
print("Sample observation", env.observation_space.sample())

print("\n _____ACTION SPACE_____ \n")
print("Action Space Shape", env.action_space.n)
print("Action Space Sample", env.action_space.sample())

_____OBSERVATION SPACE_____ 

Observation Space Shape (8,)
Sample observation [-0.04776433  0.37539226 -0.66619956 -3.4528992   2.72239     2.4114401
  0.50794303  0.7482365 ]

 _____ACTION SPACE_____ 

Action Space Shape 4
Action Space Sample 1


In [11]:
env_id = 'LunarLander-v2'
env = make_vec_env(env_id, n_envs=16)

In [12]:
model = PPO(
    policy = 'MlpPolicy',
    env = env,
    n_steps = 1024,
    batch_size = 128,
    n_epochs = 4,
    gamma = 0.999,
    gae_lambda = 0.98,
    ent_coef = 0.01,
    verbose=1
)

Using cuda device


In [13]:
model.learn(total_timesteps=1_000_000)
model_name = "1-lunar-lander"
model.save(model_name)

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 91       |
|    ep_rew_mean     | -178     |
| time/              |          |
|    fps             | 1608     |
|    iterations      | 1        |
|    time_elapsed    | 10       |
|    total_timesteps | 16384    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 91.5         |
|    ep_rew_mean          | -172         |
| time/                   |              |
|    fps                  | 1236         |
|    iterations           | 2            |
|    time_elapsed         | 26           |
|    total_timesteps      | 32768        |
| train/                  |              |
|    approx_kl            | 0.0047857948 |
|    clip_fraction        | 0.031        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.38        |
|    explained_variance   | 0.000596     |
|    learning_r

In [14]:
eval_env = Monitor(gym.make("LunarLander-v2"))
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

mean_reward=250.25 +/- 20.26048002277981


In [15]:
import os
from pathlib import Path
import tempfile

import numpy as np
from stable_baselines3.common.vec_env import (
    DummyVecEnv,
    VecVideoRecorder
)


def generate_replay(
    model,
    eval_env,
    video_length: int = 1000,
    is_deterministic: bool = True,
    local_path: Path = './',
    file_name = 'replay.mp4'
):
    """
    Generate a replay video of the agent
    :param model: trained model
    :param eval_env: environment used to evaluate the agent
    :param video_length: length of the video (in timesteps)
    :param is_deterministic: use deterministic or stochastic actions
    :param local_path: path of the local repository
    """
    # This is another temporary directory for video outputs
    # SB3 created a -step-0-to-... meta files as well as other
    # artifacts which we don't want in the repo.
    with tempfile.TemporaryDirectory() as tmpdirname:
        # Step 1: Create the VecVideoRecorder
        env = VecVideoRecorder(
            eval_env,
            tmpdirname,
            record_video_trigger=lambda x: x == 0,
            video_length=video_length,
            name_prefix="",
        )

        obs = env.reset()
        lstm_states = None
        episode_starts = np.ones((env.num_envs,), dtype=bool)

        try:
            for _ in range(video_length):
                action, lstm_states = model.predict(
                    obs,
                    state=lstm_states,
                    episode_start=episode_starts,
                    deterministic=is_deterministic,
                )
                obs, _, episode_starts, _ = env.step(action)

            # Save the video
            env.close()

            # Convert the video with x264 codec
            inp = env.video_recorder.path
            out = os.path.join(local_path, file_name)
            os.system(f"ffmpeg -y -i {inp} -vcodec h264 {out}".format(inp, out))

        except KeyboardInterrupt:
            pass
        except Exception as e:
            print(str(e))

In [16]:
replay_env = DummyVecEnv([lambda: gym.make(env_id, render_mode="rgb_array")])
generate_replay(
    model,
    replay_env,
    file_name=model_name+'.mp4'
)

Saving video to /tmp/tmpguegdmm0/-step-0-to-step-1000.mp4
Moviepy - Building video /tmp/tmpguegdmm0/-step-0-to-step-1000.mp4.
Moviepy - Writing video /tmp/tmpguegdmm0/-step-0-to-step-1000.mp4



ffmpeg version 4.2.7-0ubuntu0.1 Copyright (c) 2000-2022 the FFmpeg developers
  built with gcc 9 (Ubuntu 9.4.0-1ubuntu1~20.04.1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-avresample --disable-filter=resample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librsvg --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --e

Moviepy - Done !
Moviepy - video ready /tmp/tmpguegdmm0/-step-0-to-step-1000.mp4


frame= 1001 fps=750 q=-1.0 Lsize=     196kB time=00:00:19.96 bitrate=  80.4kbits/s speed=14.9x    
video:183kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 6.765255%
[libx264 @ 0x5575ed6bfb80] frame I:5     Avg QP:11.84  size:  1886
[libx264 @ 0x5575ed6bfb80] frame P:262   Avg QP:23.42  size:   244
[libx264 @ 0x5575ed6bfb80] frame B:734   Avg QP:26.08  size:   155
[libx264 @ 0x5575ed6bfb80] consecutive B-frames:  1.3%  1.2%  4.8% 92.7%
[libx264 @ 0x5575ed6bfb80] mb I  I16..4: 76.9% 16.8%  6.3%
[libx264 @ 0x5575ed6bfb80] mb P  I16..4:  0.3%  0.5%  0.2%  P16..4:  2.0%  0.5%  0.2%  0.0%  0.0%    skip:96.4%
[libx264 @ 0x5575ed6bfb80] mb B  I16..4:  0.0%  0.0%  0.1%  B16..8:  3.4%  0.3%  0.0%  direct: 0.1%  skip:96.0%  L0:54.5% L1:44.6% BI: 0.8%
[libx264 @ 0x5575ed6bfb80] 8x8 transform intra:28.3% inter:17.0%
[libx264 @ 0x5575ed6bfb80] coded y,uvDC,uvAC intra: 8.3% 13.6% 12.5% inter: 0.2% 0.4% 0.3%
[libx264 @ 0x5575ed6bfb80] i16 v,h,dc,p: 83% 12%  5%  0%
[lib

In [17]:
%%html
<video controls autoplay><source src="./1-lunar-lander.mp4" type="video/mp4"></video>