# Parking with Hindsight Experience Replay

##  Warming up
We start with a few useful installs and imports:

In [1]:
# Install environment and agent
!pip install highway-env
# TODO: we use the bleeding edge version because the current stable version does not support the latest gym>=0.21 versions. Revert back to stable at the next SB3 release.
!pip install git+https://github.com/DLR-RM/stable-baselines3
!pip install sb3-contrib

# Environment
import gym
import highway_env

# Agent
from stable_baselines3 import HerReplayBuffer, SAC
from sb3_contrib import TQC

Collecting highway-env
  Downloading highway_env-1.5-py3-none-any.whl (103 kB)
[K     |████████████████████████████████| 103 kB 6.8 MB/s 
[?25hCollecting pygame
  Downloading pygame-2.1.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (21.8 MB)
[K     |████████████████████████████████| 21.8 MB 1.5 MB/s 
Installing collected packages: pygame, highway-env
Successfully installed highway-env-1.5 pygame-2.1.2
Collecting git+https://github.com/DLR-RM/stable-baselines3
  Cloning https://github.com/DLR-RM/stable-baselines3 to /tmp/pip-req-build-o66iuj8j
  Running command git clone -q https://github.com/DLR-RM/stable-baselines3 /tmp/pip-req-build-o66iuj8j
Collecting gym==0.21
  Downloading gym-0.21.0.tar.gz (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 5.2 MB/s 
Building wheels for collected packages: stable-baselines3, gym
  Building wheel for stable-baselines3 (setup.py) ... [?25l[?25hdone
  Created wheel for stable-baselines3: filename=stable_baselines3-1.4.1a3-p

## Training

In [2]:
env = gym.make("parking-v0")
her_kwargs = dict(n_sampled_goal=4, goal_selection_strategy='future', online_sampling=True, max_episode_length=100)
# You can replace TQC with SAC agent
model = TQC('MultiInputPolicy', env, replay_buffer_class=HerReplayBuffer,
            replay_buffer_kwargs=her_kwargs, verbose=1, buffer_size=int(1e6),
            learning_rate=1e-3,
            gamma=0.95, batch_size=1024, tau=0.05,
            policy_kwargs=dict(net_arch=[512, 512, 512]))
model.learn(int(5e4))


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
|    success_rate    | 0.76     |
| time/              |          |
|    episodes        | 320      |
|    fps             | 17       |
|    time_elapsed    | 1301     |
|    total_timesteps | 22869    |
| train/             |          |
|    actor_loss      | 1.84     |
|    critic_loss     | 0.0023   |
|    ent_coef        | 0.00495  |
|    ent_coef_loss   | 0.702    |
|    learning_rate   | 0.001    |
|    n_updates       | 22768    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 43       |
|    ep_rew_mean     | -11.9    |
|    success_rate    | 0.78     |
| time/              |          |
|    episodes        | 324      |
|    fps             | 17       |
|    time_elapsed    | 1306     |
|    total_timesteps | 22960    |
| train/             |          |
|    actor_loss      | 1.89     |
|    critic_loss     | 0.00406  |
|    ent_coef    

<sb3_contrib.tqc.tqc.TQC at 0x7fa514ce71d0>

## Visualize a few episodes

We first define a simple helper function for visualization of episodes:

In [3]:
!pip install gym pyvirtualdisplay
!apt-get install -y xvfb python-opengl ffmpeg

from IPython import display as ipythondisplay
from pyvirtualdisplay import Display
from gym.wrappers import RecordVideo
from pathlib import Path
import base64
from tqdm.notebook import trange

display = Display(visible=0, size=(1400, 900))
display.start()

def show_video():
    html = []
    for mp4 in Path("video").glob("*.mp4"):
        video_b64 = base64.b64encode(mp4.read_bytes())
        html.append('''<video alt="{}" autoplay 
                      loop controls style="height: 400px;">
                      <source src="data:video/mp4;base64,{}" type="video/mp4" />
                 </video>'''.format(mp4, video_b64.decode('ascii')))
    ipythondisplay.display(ipythondisplay.HTML(data="<br>".join(html)))


Collecting pyvirtualdisplay
  Downloading PyVirtualDisplay-3.0-py3-none-any.whl (15 kB)
Installing collected packages: pyvirtualdisplay
Successfully installed pyvirtualdisplay-3.0
Reading package lists... Done
Building dependency tree       
Reading state information... Done
ffmpeg is already the newest version (7:3.4.8-0ubuntu0.2).
Suggested packages:
  libgle3
The following NEW packages will be installed:
  python-opengl xvfb
0 upgraded, 2 newly installed, 0 to remove and 39 not upgraded.
Need to get 1,280 kB of archives.
After this operation, 7,687 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 python-opengl all 3.1.0+dfsg-1 [496 kB]
Get:2 http://archive.ubuntu.com/ubuntu bionic-updates/universe amd64 xvfb amd64 2:1.19.6-1ubuntu4.10 [784 kB]
Fetched 1,280 kB in 1s (1,189 kB/s)
Selecting previously unselected package python-opengl.
(Reading database ... 155335 files and directories currently installed.)
Preparing to unpack .../p


Test the policy

In [6]:
env = gym.make("parking-v0")
env = RecordVideo(env, video_folder='./videos', episode_trigger=lambda e: True)
env.unwrapped.set_record_video_wrapper(env)
for episode in trange(3, desc="Test episodes"):
    obs, done = env.reset(), False
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
        print("observation\n")
        print(obs)
        print("reward\n")
        print(reward)
        print("done\n")
        print(done)
env.close()
show_video()

  f"Overwriting existing videos at {self.video_folder} folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)"


Test episodes:   0%|          | 0/3 [00:00<?, ?it/s]

observation

{'observation': array([-1.13092201e-04,  6.45032020e-04,  1.01553848e-02,  1.96198766e-01,
        5.16914978e-02,  9.98663101e-01]), 'achieved_goal': array([-1.13092201e-04,  6.45032020e-04,  1.01553848e-02,  1.96198766e-01,
        5.16914978e-02,  9.98663101e-01]), 'desired_goal': array([-2.000000e-02,  1.400000e-01,  0.000000e+00,  0.000000e+00,
        6.123234e-17,  1.000000e+00])}
reward

-0.25050741731781095
done

False
observation

{'observation': array([-8.77711601e-04,  3.15047870e-03,  6.72090058e-03,  3.92903284e-01,
        1.71032358e-02,  9.99853729e-01]), 'achieved_goal': array([-8.77711601e-04,  3.15047870e-03,  6.72090058e-03,  3.92903284e-01,
        1.71032358e-02,  9.99853729e-01]), 'desired_goal': array([-2.000000e-02,  1.400000e-01,  0.000000e+00,  0.000000e+00,
        6.123234e-17,  1.000000e+00])}
reward

-0.24601246904268856
done

False
observation

{'observation': array([-0.00262271,  0.00738995, -0.03049525,  0.58886151, -0.0517175 ,
        0