# Behavioural Planning for Autonomous Highway Driving

We plan a trajectory using the _Optimistic Planning for Deterministic systems_ ([OPD](https://hal.inria.fr/hal-00830182)) algorithm.

##  Setup

We first import useful modules for the environment, agent, and visualization.

In [None]:
# Environment
!pip install git+https://github.com/eleurent/highway-env#egg=highway-env
import gym
import highway_env

# Agent
!pip install git+https://github.com/eleurent/rl-agents#egg=rl-agents
from rl_agents.agents.common.factory import agent_factory

# Visualisation
import sys
from tqdm.notebook import trange
!git clone https://github.com/eleurent/highway-env.git
!pip install gym pyvirtualdisplay
!apt-get install -y xvfb python-opengl ffmpeg
sys.path.insert(0, './highway-env/scripts/')
from utils import record_videos, show_videos, capture_intermediate_frames

import pprint

fatal: destination path 'highway-env' already exists and is not an empty directory.
Reading package lists... Done
Building dependency tree       
Reading state information... Done
python-opengl is already the newest version (3.1.0+dfsg-1).
ffmpeg is already the newest version (7:3.4.8-0ubuntu0.2).
xvfb is already the newest version (2:1.19.6-1ubuntu4.7).
0 upgraded, 0 newly installed, 0 to remove and 12 not upgraded.


## Run an episode

In [None]:
# Make environment
env = gym.make("highway-v0")
env.configure({
    "controlled_vehicles": 2,
    "action": {
        "type": "MultiAgentAction",
        "action_config": {
            "type": "DiscreteMetaAction"
            # "type": "ContinuousAction"
        }
    },
    "observation": {
        "type": "MultiAgentObservation",
        "observation_config": {
            "type": "Kinematics"
        }
    }
    ,"other_vehicles_type" : "highway_env.vehicle.behavior.IDMVehicle" 
    # highway_env.vehicle.behavior.IDMVehicle/LinearVehicle/AggressiveVehicle/DefensiveVehicle or 
    # highway_env.vehicle.controller.MDPVehicle (by default the ego vehicles are MDP)
    ,'screen_height': 150
    ,'screen_width': 300
    ,"vehicles_count": 10
    ,'lanes_count': 5  
    # ,"absolute" : True # works well with only Continous Action spaces, else it will throw errors with graphics.py
    ,"initial_lane_id" : 2 # None is random (lanes start from top to bottom)
    ,'duration': 50
    # ,'collision_penalty' : -1 # Tuning this too much will create a conservative policy 
})

env = record_videos(env)
obs, done = env.reset(), False
capture_intermediate_frames(env)
pprint.pprint(env.config)

# Make agent
# agent_config = {
    # "__class__": "<class 'rl_agents.agents.tree_search.deterministic.DeterministicPlannerAgent'>",
    # "env_preprocessors": [{"method":"simplify"}],
    # "budget": 50,
    # "gamma": 0.7,
    # }

# Instead of this we can add stable baselines as a "model" and train the model.
agent_config = {
    "__class__": "<class 'rl_agents.agents.deep_q_network.pytorch.DQNAgent'>",
    "model": {
        "type": "MultiLayerPerceptron", # CNN, EgoAttentionModel, etc etc..
        "layers": [256, 256]
    },
    # "double": False,
    "gamma": 0.85, #0.8 Discount factor
    "n_steps": 1, # If n_steps is greater than one, the batch will be composed of lists of successive transitions.
    "batch_size": 32, #32 Sample a batch of transitions
    "memory_capacity": 15000, # Used in replay memory default-10000, file had 15000
    "target_update": 50,
    "exploration": {
        "method": "EpsilonGreedy",
        "tau": 6000,
        "temperature": 1.0,
        "final_temperature": 0.05
    },
    "loss_function": "l2"
}
agent = agent_factory(env, agent_config)

# Run episode
for step in trange(env.unwrapped.config["duration"], desc="Running..."):
    action = agent.act(obs)
    print(action)
    obs, reward, done, info = env.step(action)
    env.render()
    if done:
        env.reset()
        # break

env.close()
show_videos()

{'action': {'action_config': {'type': 'DiscreteMetaAction'},
            'type': 'MultiAgentAction'},
 'centering_position': [0.3, 0.5],
 'collision_reward': -1,
 'controlled_vehicles': 2,
 'duration': 50,
 'ego_spacing': 2,
 'initial_lane_id': 2,
 'lanes_count': 5,
 'manual_control': False,
 'observation': {'observation_config': {'type': 'Kinematics'},
                 'type': 'MultiAgentObservation'},
 'offroad_terminal': False,
 'offscreen_rendering': False,
 'other_vehicles_type': 'highway_env.vehicle.behavior.IDMVehicle',
 'policy_frequency': 1,
 'real_time_rendering': False,
 'render_agent': True,
 'reward_speed_range': [20, 30],
 'scaling': 5.5,
 'screen_height': 150,
 'screen_width': 300,
 'show_trajectories': False,
 'simulation_frequency': 15,
 'vehicles_count': 10,
 'vehicles_density': 1}


Preferred device cuda:best unavailable, switching to default cpu


HBox(children=(FloatProgress(value=0.0, description='Running...', max=50.0, style=ProgressStyle(description_wi…

(2, 1)
(2, 1)
(3, 1)
(4, 0)
(1, 4)
(3, 2)
(0, 1)
(2, 0)
(3, 4)
(4, 2)
(2, 3)
(3, 3)
(2, 1)
(4, 0)
(1, 3)
(4, 3)
(4, 1)
(4, 3)
(2, 1)
(0, 1)
(0, 0)
(2, 0)
(1, 1)
(1, 0)
(1, 0)
(3, 2)
(2, 0)
(0, 3)
(0, 4)
(2, 0)
(2, 3)
(2, 1)
(3, 0)
(0, 1)
(3, 1)
(3, 1)
(2, 2)
(2, 4)
(0, 3)
(3, 3)
(0, 3)
(1, 4)
(3, 0)
(2, 1)
(4, 2)
(1, 2)
(0, 2)
(0, 4)
(0, 2)
(2, 4)

