# Challenge 3 : Bipedal Walker

## Imports :

In [1]:
import os

# Reinforcement Learning
import gym

# Distributed Learning
import ray
from ray.rllib.agents.ppo import PPOTrainer

# Display
from gym.wrappers.monitoring.video_recorder import VideoRecorder
from IPython.display import Video

# Optional
import warnings
warnings.filterwarnings('ignore')

## Context :

In [2]:
env = gym.make("BipedalWalker-v3", hardcore=False)

In [3]:
env.action_space

Box(-1.0, 1.0, (4,), float32)

In [4]:
env.reset()

array([ 2.7458975e-03,  1.0137333e-05, -1.3218448e-03, -1.6000068e-02,
        9.2476480e-02,  3.0685817e-03,  8.5980362e-01, -1.0371268e-03,
        1.0000000e+00,  3.2795932e-02,  3.0684697e-03,  8.5354102e-01,
       -2.0879239e-03,  1.0000000e+00,  4.4081339e-01,  4.4581950e-01,
        4.6142212e-01,  4.8954949e-01,  5.3410202e-01,  6.0246021e-01,
        7.0914787e-01,  8.8593054e-01,  1.0000000e+00,  1.0000000e+00],
      dtype=float32)

## Random Action :

In [5]:
def check_video_folder_sanity(path, video_name):
    video_path = path + video_name
    os.makedirs(path, exist_ok=True)
    if os.path.exists(video_path + ".mp4"):
        os.remove(video_path + ".mp4")
    if os.path.exists(video_path + ".meta.json"):
        os.remove(video_path + ".meta.json")
    return video_path

In [6]:
video_name = "before_training"
path = "videos/bipedal_walker/"
random_seed = 42

video_path = check_video_folder_sanity(path, video_name)
        
env = gym.make("BipedalWalker-v3", hardcore=False)
env.action_space.seed(random_seed)
video = VideoRecorder(env, video_path + ".mp4", enabled=video_name is not None)

env.reset()
for i in range(200):
    env.render()
    video.capture_frame()
    observation, reward, done, info, _ = env.step(env.action_space.sample())
    #print("step", i, observation, reward, done, info)
video.close()
env.close()

In [7]:
Video(video_path + ".mp4")

ValueError: To embed videos, you must pass embed=True (this may make your notebook files huge)
Consider passing Video(url='...')

## Train an agent :

In [None]:
os.cpu_count()

In [None]:
config = {
    "env": "BipedalWalker-v3",
    # “tf” to use tensorflow, "torch" to use pytorch
    "framework": "tf",
    "model": {
        "fcnet_hiddens": [32],
        "fcnet_activation": "linear",
    },
}
stop = {"episode_reward_mean": 300}
ray.shutdown()
ray.init(num_cpus=4, include_dashboard=False,
         ignore_reinit_error=True, log_to_driver=False)
# Start Training 
analysis = ray.tune.run("PPO", config=config,
                        stop=stop, checkpoint_at_end=True)

In [None]:
# restore a trainer from the last checkpoint
trial = analysis.get_best_logdir("episode_reward_mean", "max")
checkpoint = analysis.get_best_checkpoint(
  trial,
  "training_iteration",
  "max",
)
trainer = PPOTrainer(config=config)
trainer.restore(checkpoint)

## Evaluate the Agent :

In [None]:
video_name = "after_training"
video_path = check_video_folder_sanity(path, video_name)

after_video = VideoRecorder(env, video_path + ".mp4", enabled=video_name is not None)
observation = env.reset()
done = False
while not done:
    env.render()
    after_video.capture_frame()
    action = trainer.compute_single_action(observation)
    observation, reward, done, info = env.step(action)
after_video.close()
env.close()

In [None]:
Video(video_path + ".mp4")