# Parking with Hindsight Experience Replay

##  Warming up
We start with a few useful installs and imports:

In [0]:
# Install environment and visualization dependencies 
!pip install git+https://github.com/eleurent/highway-env#egg=highway-env  > /dev/null 2>&1
!pip install gym pyvirtualdisplay > /dev/null 2>&1
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1

# Environment
import gym
import highway_env

# Agent
import baselines.run as run

## Training

In [None]:
model = run.main({
    "--env=parking-ActionRepeat-v0",
    "--alg=her",
    "--num_timesteps=1e4",
    "--network=default",
    "--num_env=0",
    "--save_path=models/latest"
})


## Visualize a few episodes

We first define a simple helper function for visualization of episodes:

In [0]:
# Visualization
from IPython import display as ipythondisplay
from pyvirtualdisplay import Display
from gym.wrappers import Monitor
from pathlib import Path
import base64
from tqdm import tnrange

display = Display(visible=0, size=(1400, 900))
display.start()

def show_video():
    html = []
    for mp4 in Path("video").glob("*.mp4"):
        video_b64 = base64.b64encode(mp4.read_bytes())
        html.append('''<video alt="{}" autoplay 
                      loop controls style="height: 400px;">
                      <source src="data:video/mp4;base64,{}" type="video/mp4" />
                 </video>'''.format(mp4, video_b64.decode('ascii')))
    ipythondisplay.display(ipythondisplay.HTML(data="<br>".join(html)))



Test the policy

In [0]:
env = gym.make("parking-ActionRepeat-v0")
env = Monitor(env, './video', force=True, video_callable=lambda episode: True)
for episode in tnrange(3, desc="Test episodes"):
    obs, done = env.reset(), False
    while not done:
        action = model.step(obs)
        obs, reward, done, info = env.step(action.numpy())
env.close()
show_video()