##  Reinforcement Learning project notebook

#### We will import all the libraries that are needed for the project. 

In [1]:
# Import Dependencies
#!pip install stable_baselines    
#!pip install matplotlib

In [2]:
from stable_baselines import HER, SAC, PPO2, DQN
import highway_env
import numpy as np
import gym
import matplotlib.pyplot as plt
import pprint
from statistics import mean

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



  "stable-baselines is in maintenance mode, please use [Stable-Baselines3 (SB3)](https://github.com/DLR-RM/stable-baselines3) for an up-to-date version. You can find a [migration guide](https://stable-baselines3.readthedocs.io/en/master/guide/migration.html) in SB3 documentation."


## Roundabout setup

In [3]:
# Setting up the environment for roundabout
env = gym.make('roundabout-v0')

#### Using the env.config to look at the configurations available and the default values for this configuration.



In [4]:
pprint.pprint(env.config)

{'action': {'target_speeds': [0, 8, 16], 'type': 'DiscreteMetaAction'},
 'centering_position': [0.5, 0.6],
 'collision_reward': -1,
 'duration': 11,
 'high_speed_reward': 0.2,
 'incoming_vehicle_destination': None,
 'lane_change_reward': -0.05,
 'manual_control': False,
 'observation': {'absolute': True,
                 'features_range': {'vx': [-15, 15],
                                    'vy': [-15, 15],
                                    'x': [-100, 100],
                                    'y': [-100, 100]},
                 'type': 'Kinematics'},
 'offscreen_rendering': False,
 'other_vehicles_type': 'highway_env.vehicle.behavior.IDMVehicle',
 'policy_frequency': 1,
 'real_time_rendering': False,
 'render_agent': True,
 'right_lane_reward': 0,
 'scaling': 5.5,
 'screen_height': 600,
 'screen_width': 600,
 'show_trajectories': False,
 'simulation_frequency': 15}


#### I am going to change some of the default values of the rewards. I believe that with the default values in the configuration led to rewards that were too good to be true

#### By changing the default values I add some noise  and more difficulty and test the agent. I have assigned a very high negative reward when the car collides and a good enough reward if the car drives on high speed as compared to the default values in config

In [5]:
env.config["collision_reward"] = -8
env.config['high_speed_reward'] = 1
env.reset()


array([[ 1.0000000e+00,  2.0000000e-02,  4.4999999e-01,  0.0000000e+00,
        -5.3333336e-01],
       [ 1.0000000e+00, -1.7955920e-02,  1.9919233e-01,  1.0000000e+00,
         9.9252149e-02],
       [ 1.0000000e+00, -1.8256707e-01,  1.5578596e-01,  6.7975676e-01,
         7.9661351e-01],
       [ 1.0000000e+00,  1.0000000e+00, -2.0000000e-02, -1.0000000e+00,
         2.2204460e-16],
       [ 1.0000000e+00, -1.6724715e-01, -1.0967404e-01, -4.9941036e-01,
         7.6157457e-01]], dtype=float32)

## GREEDY WITH ROUNDABOUT

### Performance of agents taking random actions

Reward function: We will look at the reward function to evaluate the performance of the agent taking greedy actions. I take the reward from each loop and then find the average of all the rewards.

Number of crashes: The second method to evaluate the performance of the agent taking the greedy function is to count the number of crashes.

In [6]:
# Writing a function for calculating the average of the rewards.

def Average(l): 
    avg = mean(l) 
    return avg


crashcount = 0
for i in range(100):
    done = False
    env.reset()
    total_reward = []
    while not done:
        env.render()
        #render the next actions, rewards and states.
        action = env.action_space.sample()
        next_state, reward, done, info = env.step(action)
        if info['crashed']==True : crashcount +=1
        total_reward.append(reward)
        print(info)
        print(done)
        
        
rewards_average = Average(total_reward) 
  
print("The average of rewards: ",rewards_average)
print("Number of Crashes: ", crashcount)
env.close()

{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 0.23362230986910987, 'crashed': False, 'action': 4}
False
{'speed': 0.03992330410281328, 'crashed': False, 'action': 4}
False
{'speed': 0.006822422958572302, 'crashed': False, 'action': 1}
False
{'speed': 0.0011658718152632692, 'crashed': False, 'action': 0}
False
{'speed': 0.00019923377631071346, 'crashed': False, 'action': 1}
False
{'speed': 3.404670831163714e-05, 'crashed': False, 'action': 0}
False
{'speed': 5.818181878206805e-06, 'crashed': False, 'action': 2}
False
{'speed': 9.942588299005616e-07, 'crashed': False, 'action': 2}
False
{'speed': 1.699071362031588e-07, 'crashed': False, 'action': 4}
False
{'speed': 2.9035130556140955e-08, 'crashed': False, 'action': 0}
True
{'speed': 14.632894122990876, 'crashed': False, 'action': 3}
False
{'speed': 10.263391338495092, 'crashed': True, 'action': 1}
True
{'speed': 8.0, 'crashed': False, 'action': 2}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{

{'speed': 8.0, 'crashed': False, 'action': 2}
False
{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 6.866516432859989, 'crashed': False, 'action': 3}
False
{'speed': 7.806300994233703, 'crashed': False, 'action': 0}
False
{'speed': 7.966899118855758, 'crashed': False, 'action': 2}
False
{'speed': 7.994343448856691, 'crashed': False, 'action': 1}
False
{'speed': 7.999033361961048, 'crashed': False, 'action': 0}
False
{'speed': 14.632728935922879, 'crashed': False, 'action': 3}
False
{'speed': 9.133455338613578, 'crashed': False, 'action': 4}
False
{'speed': 8.220261646785515, 'crashed': True, 'action': 3}
True
{'speed': 14.632894122990876, 'crashed': False, 'action': 3}
False
{'speed': 11.857383708634414, 'crashed': True, 'action': 1}
True
{'speed': 8.0, 'crashed': False, 'action': 2}
False
{'speed': 8.0, 'crashed': False, 'action': 0}
False
{'speed': 14.632894122990876, 'crashed': False, 'action': 3}
False
{'speed': 15.76637769013089, 'crashed': False, 'act

{'speed': 6.866516432859989, 'crashed': False, 'action': 3}
False
{'speed': 7.806300994233703, 'crashed': False, 'action': 2}
False
{'speed': 7.966899118855758, 'crashed': False, 'action': 2}
False
{'speed': 14.62723757184757, 'crashed': False, 'action': 3}
False
{'speed': 9.132516929101062, 'crashed': False, 'action': 4}
False
{'speed': 8.1935338186983, 'crashed': False, 'action': 0}
False
{'speed': 8.033072652617808, 'crashed': False, 'action': 2}
False
{'speed': 8.00565172722026, 'crashed': False, 'action': 2}
False
{'speed': 1.36807169069638, 'crashed': False, 'action': 4}
False
{'speed': 0.2337873560651032, 'crashed': False, 'action': 2}
True
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 2}
False
{'speed': 8.0, 'crashed': False, 'action': 2}
False
{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 6.866516432859989, 'crashed': False, 'action': 3}
False
{'s

{'speed': 15.960076695897188, 'crashed': False, 'action': 1}
False
{'speed': 9.360283454050553, 'crashed': False, 'action': 4}
False
{'speed': 8.23245643805385, 'crashed': False, 'action': 0}
False
{'speed': 1.4068299473356247, 'crashed': False, 'action': 4}
False
{'speed': 0.24041068611937064, 'crashed': False, 'action': 4}
False
{'speed': 0.041083357736198346, 'crashed': False, 'action': 2}
False
{'speed': 0.007020662476053114, 'crashed': False, 'action': 2}
False
{'speed': 0.0011997486164387026, 'crashed': False, 'action': 2}
True
{'speed': 14.632894122990876, 'crashed': False, 'action': 3}
False
{'speed': 15.76637769013089, 'crashed': False, 'action': 0}
False
{'speed': 15.960076695897188, 'crashed': False, 'action': 3}
False
{'speed': 15.993177577041429, 'crashed': False, 'action': 1}
False
{'speed': 15.998834128184738, 'crashed': False, 'action': 3}
False
{'speed': 15.999800766223688, 'crashed': False, 'action': 1}
False
{'speed': 15.999965953291689, 'crashed': False, 'action': 0

{'speed': 15.727620257843341, 'crashed': False, 'action': 0}
False
{'speed': 8.853254019576916, 'crashed': True, 'action': 4}
True
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 3.7453682952449086, 'crashed': True, 'action': 1}
True
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 2}
False
{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 0.23362230986910987, 'crashed': False, 'action': 1}
False
{'speed': 0.03992330410281328, 'crashed': False, 'action': 0}
False
{'speed': 0.006822422958572302, 'crashed': False, 'action': 2}
False
{'speed': 0.0011658718152632692, 'crashed': False, 'action': 2}
False
{'speed': 6.63309335676719, 'crashed': False, 'action': 3}
False
{'speed': 14.39930585983008, 'crashed': False, 'action': 3}
False
{'speed': 15.726460204209955, 'crashed': False, 'action': 0}
False
{'speed': 15.953255267197445, 'crashed': False, 'action'

{'speed': 9.366906643232811, 'crashed': False, 'action': 4}
False
{'speed': 8.233588263160799, 'crashed': False, 'action': 1}
False
{'speed': 8.039917485920935, 'crashed': False, 'action': 1}
False
{'speed': 8.006821428699745, 'crashed': False, 'action': 1}
True
{'speed': 14.632894122990876, 'crashed': False, 'action': 3}
False
{'speed': 15.76637769013089, 'crashed': False, 'action': 1}
False
{'speed': 15.960076695897188, 'crashed': False, 'action': 2}
False
{'speed': 15.993177577041429, 'crashed': False, 'action': 3}
False
{'speed': 15.998834128184738, 'crashed': False, 'action': 0}
False
{'speed': 15.999800766223688, 'crashed': False, 'action': 0}
False
{'speed': 15.999965953291689, 'crashed': False, 'action': 1}
False
{'speed': 15.999994181818122, 'crashed': False, 'action': 0}
False
{'speed': 15.99999900574117, 'crashed': False, 'action': 0}
False
{'speed': 15.999999830092865, 'crashed': False, 'action': 3}
False
{'speed': 15.99999997096487, 'crashed': False, 'action': 3}
True
{'sp

{'speed': 7.773200113089462, 'crashed': False, 'action': 1}
False
{'speed': 1.3283484447215714, 'crashed': False, 'action': 4}
False
{'speed': 0.22699912068684838, 'crashed': False, 'action': 2}
False
{'speed': 0.03879147899586166, 'crashed': False, 'action': 1}
False
{'speed': 6.639523130355019, 'crashed': False, 'action': 3}
False
{'speed': 14.40040463248755, 'crashed': False, 'action': 3}
False
{'speed': 15.726647971529646, 'crashed': False, 'action': 3}
False
{'speed': 9.320393231432348, 'crashed': False, 'action': 4}
True
{'speed': 14.632894122990876, 'crashed': False, 'action': 3}
False
{'speed': 15.76637769013089, 'crashed': False, 'action': 0}
False
{'speed': 15.960076695897188, 'crashed': False, 'action': 0}
False
{'speed': 15.993177577041429, 'crashed': False, 'action': 1}
False
{'speed': 15.998834128184738, 'crashed': False, 'action': 3}
False
{'speed': 15.999800766223688, 'crashed': False, 'action': 0}
False
{'speed': 15.999965953291689, 'crashed': False, 'action': 1}
False

The environment always through random environments at the agent. So, the performance metric used that is the reward function keeps changing after the code snippet is completed

After running the code a few times, I aw that the reward is anywhere from 0.50 to 0.96, which is a pretty good reward in good cases.

The number of crashed out of 100, were only about 30-40.

### Agent taking greedy action is not always the best idea. We need an intelligent agent who learns and applies for getting maximum rewards. 

## PPO WITH ROUNDABOUT

More information on the algorithm can be found in the report. 

### Creating a ppo model


I set the timesteps at 10000.

In [7]:
# creating the ppo policy with the roundabout environment
ppo_roundabout = PPO2('MlpPolicy', env, verbose=1)
# Instead of choosing greedily, next step helps the agent learn to take better decisions inturn action

ppo_roundabout.learn(total_timesteps=10000)

Wrapping the env in a DummyVecEnv.




Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Please use `layer.__call__` method instead.





Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



--------------------------------------
| approxkl           | 0.00015213432 |
| clipfrac           | 0.0           |
| explained_variance | -0.0637       |
| fps                | 20            |
| n_updates          | 1             |
| policy_entropy     | 1.6092974     |
| policy_loss        | -0.004361937  |
| serial_timesteps   | 128           |
| time_elapsed       | 0             |
| total_timesteps    | 128           |
| value_loss         | 11.6282015    |
--------------------------------------
--------------------------------------
| approxkl           | 8.6744505e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.0634       |
| fps                | 21            |
| n_updates          | 2 

--------------------------------------
| approxkl           | 0.00042618622 |
| clipfrac           | 0.0           |
| explained_variance | 0.113         |
| fps                | 20            |
| n_updates          | 11            |
| policy_entropy     | 1.5901613     |
| policy_loss        | -0.0058364635 |
| serial_timesteps   | 1408          |
| time_elapsed       | 59.8          |
| total_timesteps    | 1408          |
| value_loss         | 3.65749       |
--------------------------------------
--------------------------------------
| approxkl           | 0.00019157317 |
| clipfrac           | 0.0           |
| explained_variance | 0.241         |
| fps                | 10            |
| n_updates          | 12            |
| policy_entropy     | 1.584565      |
| policy_loss        | 5.090155e-05  |
| serial_timesteps   | 1536          |
| time_elapsed       | 66            |
| total_timesteps    | 1536          |
| value_loss         | 3.6033657     |
-------------------------

--------------------------------------
| approxkl           | 0.00029147716 |
| clipfrac           | 0.0           |
| explained_variance | 0.748         |
| fps                | 21            |
| n_updates          | 28            |
| policy_entropy     | 1.5216125     |
| policy_loss        | -0.0019713524 |
| serial_timesteps   | 3584          |
| time_elapsed       | 172           |
| total_timesteps    | 3584          |
| value_loss         | 0.91908836    |
--------------------------------------
--------------------------------------
| approxkl           | 0.00048713412 |
| clipfrac           | 0.0           |
| explained_variance | 0.796         |
| fps                | 21            |
| n_updates          | 29            |
| policy_entropy     | 1.511496      |
| policy_loss        | -0.00511436   |
| serial_timesteps   | 3712          |
| time_elapsed       | 178           |
| total_timesteps    | 3712          |
| value_loss         | 0.8093783     |
-------------------------

--------------------------------------
| approxkl           | 0.0005872547  |
| clipfrac           | 0.005859375   |
| explained_variance | 0.916         |
| fps                | 21            |
| n_updates          | 45            |
| policy_entropy     | 1.4356691     |
| policy_loss        | -0.0030564745 |
| serial_timesteps   | 5760          |
| time_elapsed       | 276           |
| total_timesteps    | 5760          |
| value_loss         | 0.29124314    |
--------------------------------------
--------------------------------------
| approxkl           | 0.000573296   |
| clipfrac           | 0.001953125   |
| explained_variance | 0.909         |
| fps                | 21            |
| n_updates          | 46            |
| policy_entropy     | 1.4701973     |
| policy_loss        | -0.0059052785 |
| serial_timesteps   | 5888          |
| time_elapsed       | 283           |
| total_timesteps    | 5888          |
| value_loss         | 0.3226201     |
-------------------------

--------------------------------------
| approxkl           | 0.00024072255 |
| clipfrac           | 0.0           |
| explained_variance | 0.908         |
| fps                | 20            |
| n_updates          | 62            |
| policy_entropy     | 1.4508443     |
| policy_loss        | -0.0037288994 |
| serial_timesteps   | 7936          |
| time_elapsed       | 383           |
| total_timesteps    | 7936          |
| value_loss         | 0.32314852    |
--------------------------------------
-------------------------------------
| approxkl           | 0.0007457528 |
| clipfrac           | 0.0          |
| explained_variance | 0.735        |
| fps                | 19           |
| n_updates          | 63           |
| policy_entropy     | 1.4753739    |
| policy_loss        | -0.004592479 |
| serial_timesteps   | 8064         |
| time_elapsed       | 389          |
| total_timesteps    | 8064         |
| value_loss         | 0.93445647   |
-------------------------------------

<stable_baselines.ppo2.ppo2.PPO2 at 0x20ba1c8f948>

## Performance of PPO on roundabout env.

I used the average of rewards and the number of crashes for evaluation of the algorithm on the environment.

In [8]:
#Visualising the performance of the model and noting the performance for comparison


crashcount = 0
for i in range(100):
    done = False
    obs = env.reset()
    total_reward = []
    while not done:
        env.render()
        #render the next actions, rewards and states.
        action, _states = ppo_roundabout.predict(obs)
        next_state, reward, done, info = env.step(action)
        if info['crashed']==True : crashcount +=1
        total_reward.append(reward)
        print(info)
        print(done)
      
    
rewards_average = Average(total_reward) 
  
print("The average of rewards: ",rewards_average)   
print("Number of Crashes: ", crashcount)
env.close()

{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 0.23362230986910987, 'crashed': False, 'action': 4}
False
{'speed': 0.03992330410281328, 'crashed': False, 'action': 4}
False
{'speed': 0.006822422958572302, 'crashed': False, 'action': 4}
False
{'speed': 0.0011658718152632692, 'crashed': False, 'action': 4}
False
{'speed': 0.00019923377631071346, 'crashed': False, 'action': 4}
False
{'speed': 3.404670831163714e-05, 'crashed': False, 'action': 4}
False
{'speed': 5.818181878206805e-06, 'crashed': False, 'action': 0}
False
{'speed': 9.942588299005616e-07, 'crashed': False, 'action': 4}
False
{'speed': 1.699071362031588e-07, 'crashed': False, 'action': 2}
False
{'speed': 2.9035130556140955e-08, 'crashed': False, 'action': 4}
True
{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 0.23362230986910987, 'crashed': False, 'action': 4}
False
{'speed': 0.03992330410281328, 'crashed': False, 'action': 4}
False
{'speed': 0.006822422958572302, 'c

{'speed': 1.699071362031588e-07, 'crashed': False, 'action': 4}
True
{'speed': 8.0, 'crashed': False, 'action': 0}
False
{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 0.23362230986910987, 'crashed': False, 'action': 4}
False
{'speed': 0.03992330410281328, 'crashed': False, 'action': 4}
False
{'speed': 0.006822422958572302, 'crashed': False, 'action': 4}
False
{'speed': 0.0011658718152632692, 'crashed': False, 'action': 4}
False
{'speed': 0.00019923377631071346, 'crashed': False, 'action': 4}
False
{'speed': 3.404670831163714e-05, 'crashed': False, 'action': 0}
False
{'speed': 5.818181878206805e-06, 'crashed': False, 'action': 4}
False
{'speed': 9.942588299005616e-07, 'crashed': False, 'action': 1}
False
{'speed': 1.699071362031588e-07, 'crashed': False, 'action': 2}
True
{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 0.23362230986910987, 'crashed': False, 'action': 4}
False
{'speed': 0.03992330410281328, 'crashed': False, 'ac

{'speed': 9.942588299005616e-07, 'crashed': False, 'action': 4}
False
{'speed': 1.699071362031588e-07, 'crashed': False, 'action': 4}
True
{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 0.23362230986910987, 'crashed': False, 'action': 4}
False
{'speed': 0.03992330410281328, 'crashed': False, 'action': 2}
False
{'speed': 0.006822422958572302, 'crashed': False, 'action': 4}
False
{'speed': 0.0011658718152632692, 'crashed': False, 'action': 4}
False
{'speed': 0.00019923377631071346, 'crashed': False, 'action': 4}
False
{'speed': 3.404670831163714e-05, 'crashed': False, 'action': 4}
False
{'speed': 5.818181878206805e-06, 'crashed': False, 'action': 2}
False
{'speed': 9.942588299005616e-07, 'crashed': False, 'action': 0}
False
{'speed': 1.699071362031588e-07, 'crashed': False, 'action': 4}
False
{'speed': 2.9035130556140955e-08, 'crashed': False, 'action': 0}
True
{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 0.23362230986910987, 

{'speed': 9.942588299005616e-07, 'crashed': False, 'action': 4}
False
{'speed': 1.699071362031588e-07, 'crashed': False, 'action': 4}
False
{'speed': 2.9035130556140955e-08, 'crashed': False, 'action': 4}
True
{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 0.23362230986910987, 'crashed': False, 'action': 4}
False
{'speed': 0.03992330410281328, 'crashed': False, 'action': 4}
False
{'speed': 0.006822422958572302, 'crashed': False, 'action': 4}
False
{'speed': 0.0011658718152632692, 'crashed': False, 'action': 4}
False
{'speed': 0.00019923377631071346, 'crashed': False, 'action': 4}
False
{'speed': 3.404670831163714e-05, 'crashed': False, 'action': 0}
False
{'speed': 5.818181878206805e-06, 'crashed': False, 'action': 4}
False
{'speed': 9.942588299005616e-07, 'crashed': False, 'action': 1}
False
{'speed': 6.632894292898015, 'crashed': False, 'action': 3}
False
{'speed': 7.76637771916602, 'crashed': False, 'action': 2}
True
{'speed': 1.3671058770091211, 'crashe

{'speed': 0.00019923377631071346, 'crashed': False, 'action': 4}
False
{'speed': 3.404670831163714e-05, 'crashed': False, 'action': 0}
False
{'speed': 5.818181878206805e-06, 'crashed': False, 'action': 4}
False
{'speed': 9.942588299005616e-07, 'crashed': False, 'action': 4}
True
{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 0.23362230986910987, 'crashed': False, 'action': 4}
False
{'speed': 0.03992330410281328, 'crashed': False, 'action': 4}
False
{'speed': 0.006822422958572302, 'crashed': False, 'action': 4}
False
{'speed': 0.0011658718152632692, 'crashed': False, 'action': 0}
False
{'speed': 0.00019923377631071346, 'crashed': False, 'action': 4}
False
{'speed': 3.404670831163714e-05, 'crashed': False, 'action': 0}
False
{'speed': 5.818181878206805e-06, 'crashed': False, 'action': 4}
False
{'speed': 9.942588299005616e-07, 'crashed': False, 'action': 4}
False
{'speed': 1.699071362031588e-07, 'crashed': False, 'action': 4}
False
{'speed': 2.903513055614095

{'speed': 0.00019923377631071346, 'crashed': False, 'action': 4}
False
{'speed': 3.404670831163714e-05, 'crashed': False, 'action': 1}
False
{'speed': 5.818181878206805e-06, 'crashed': False, 'action': 4}
False
{'speed': 9.942588299005616e-07, 'crashed': False, 'action': 4}
False
{'speed': 1.699071362031588e-07, 'crashed': False, 'action': 2}
True
{'speed': 8.0, 'crashed': False, 'action': 2}
False
{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 0.23362230986910987, 'crashed': False, 'action': 0}
False
{'speed': 0.03992330410281328, 'crashed': False, 'action': 4}
False
{'speed': 0.006822422958572302, 'crashed': False, 'action': 4}
False
{'speed': 0.0011658718152632692, 'crashed': False, 'action': 4}
False
{'speed': 0.00019923377631071346, 'crashed': False, 'action': 4}
False
{'speed': 3.404670831163714e-05, 'crashed': False, 'action': 0}
False
{'speed': 5.818181878206805e-06, 'crashed': False, 'action': 4}
False
{'speed': 9.942588299005616e-07, 'crashed': F

{'speed': 0.00019923377631071346, 'crashed': False, 'action': 4}
False
{'speed': 3.404670831163714e-05, 'crashed': False, 'action': 4}
False
{'speed': 5.818181878206805e-06, 'crashed': False, 'action': 1}
False
{'speed': 9.942588299005616e-07, 'crashed': False, 'action': 2}
False
{'speed': 1.699071362031588e-07, 'crashed': False, 'action': 4}
False
{'speed': 2.9035130556140955e-08, 'crashed': False, 'action': 4}
True
{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 6.866516432859989, 'crashed': False, 'action': 3}
False
{'speed': 14.43919511722458, 'crashed': False, 'action': 3}
False
{'speed': 9.10038268599577, 'crashed': False, 'action': 4}
False
{'speed': 8.188042454622988, 'crashed': False, 'action': 1}
False
{'speed': 1.3992401201144098, 'crashed': False, 'action': 4}
False
{'speed': 0.23911367394441976, 'crashed': False, 'action': 4}
False
{'speed': 0.04086171361533239, 'crashed': False, 'action': 1}
False
{'speed': 0.006982786103523066, 'crashed': Fal

{'speed': 0.0011658718152632692, 'crashed': False, 'action': 4}
False
{'speed': 0.00019923377631071346, 'crashed': False, 'action': 0}
False
{'speed': 3.404670831163714e-05, 'crashed': False, 'action': 0}
False
{'speed': 5.818181878206805e-06, 'crashed': False, 'action': 4}
False
{'speed': 9.942588299005616e-07, 'crashed': False, 'action': 4}
False
{'speed': 1.699071362031588e-07, 'crashed': False, 'action': 4}
False
{'speed': 2.9035130556140955e-08, 'crashed': False, 'action': 2}
True
{'speed': 8.0, 'crashed': False, 'action': 2}
False
{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 0.23362230986910987, 'crashed': False, 'action': 0}
False
{'speed': 0.03992330410281328, 'crashed': False, 'action': 2}
False
{'speed': 0.006822422958572302, 'crashed': False, 'action': 4}
False
{'speed': 0.0011658718152632692, 'crashed': False, 'action': 4}
False
{'speed': 0.00019923377631071346, 'crashed': False, 'action': 4}
False
{'speed': 3.404670831163714e-05, 'crashed': 

{'speed': 0.23362230986910987, 'crashed': False, 'action': 4}
False
{'speed': 0.03992330410281328, 'crashed': False, 'action': 2}
False
{'speed': 0.006822422958572302, 'crashed': False, 'action': 4}
False
{'speed': 0.0011658718152632692, 'crashed': False, 'action': 4}
False
{'speed': 0.00019923377631071346, 'crashed': False, 'action': 4}
False
{'speed': 3.404670831163714e-05, 'crashed': False, 'action': 2}
False
{'speed': 5.818181878206805e-06, 'crashed': False, 'action': 4}
False
{'speed': 9.942588299005616e-07, 'crashed': False, 'action': 4}
True
{'speed': 8.0, 'crashed': False, 'action': 0}
False
{'speed': 8.0, 'crashed': False, 'action': 0}
False
{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 0.23362230986910987, 'crashed': False, 'action': 4}
False
{'speed': 0.03992330410281328, 'crashed': False, 'action': 2}
False
{'speed': 0.006822422958572302, 'crashed': False, 'action': 2}
False
{'speed': 0.0011658718152632692, 'crashed': False, 'action': 4}
False

{'speed': 0.03992330410281328, 'crashed': False, 'action': 1}
False
{'speed': 0.006822422958572302, 'crashed': False, 'action': 0}
False
{'speed': 0.0011658718152632692, 'crashed': False, 'action': 4}
False
{'speed': 0.00019923377631071346, 'crashed': False, 'action': 4}
False
{'speed': 3.404670831163714e-05, 'crashed': False, 'action': 4}
False
{'speed': 5.818181878206805e-06, 'crashed': False, 'action': 4}
False
{'speed': 9.942588299005616e-07, 'crashed': False, 'action': 4}
False
{'speed': 1.699071362031588e-07, 'crashed': False, 'action': 4}
False
{'speed': 2.9035130556140955e-08, 'crashed': False, 'action': 4}
True
{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 0.23362230986910987, 'crashed': False, 'action': 4}
False
{'speed': 0.03992330410281328, 'crashed': False, 'action': 4}
False
{'speed': 0.006822422958572302, 'crashed': False, 'action': 4}
False
{'speed': 0.0011658718152632692, 'crashed': False, 'action': 4}
False
{'speed': 0.000199233776310713

The environment always through random environments at the agent. So, the performance metric used that is the reward function keeps changing after the code snippet is completed

After running the code a few times, I aw that the reward is anywhere from 0.89 to 0.99, which is a pretty good reward. The number of crashed out of 100, were only about 0-25.

The performance of PPO is better than agent taking random actions

# DQN WITH ROUNDABOUT

## Creating the DQN model


For more information on the algorithm look at the report attached with the code.

Number of timesteps: 10000

In [9]:
# creating the dqn policy with the roundabout environment
dqn_roundabout = DQN('MlpPolicy', env, verbose=1)
# we take actions based on DQN policy
dqn_roundabout.learn(total_timesteps=10000)









--------------------------------------
| % time spent exploring  | 7        |
| episodes                | 100      |
| mean 100 episode reward | 8.7      |
| steps                   | 948      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 200      |
| mean 100 episode reward | 8.2      |
| steps                   | 1852     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 300      |
| mean 100 episode reward | 8.5      |
| steps                   | 2792     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 400      |
| mean 100 episode reward | 9.9      |
| steps                   | 3860     |
--------------------------------------
--------------------------------------
| % time spent expl

<stable_baselines.deepq.dqn.DQN at 0x20ba215f6c8>

## Performance of DQN on roundabout env

In [10]:


#Visualising the performance of the model and noting the performance for comparison
crashcount = 0
for i in range(100):
    done = False
    obs = env.reset()
    total_reward = []
    while not done:
        env.render()
        action, _states = dqn_roundabout.predict(obs)
        next_state, reward, done, info = env.step(action)
        if info['crashed']==True : crashcount +=1
        total_reward.append(reward)
        print(info)
        print(done)

        
rewards_average = Average(total_reward) 
  
print("The average of rewards: ",rewards_average) 
print("Number of Crashes: ", crashcount)       
env.close()

{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
True
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 4.012894602048116, 'crashed': True, 'action': 1}
True
{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 0.23362230986910987, 'crashed': False, 'action': 4}
False
{'speed': 0.03992330410281328, 'crashed': False, 'action': 4}
False
{'speed': 0.006822422958572302, 'crashed': False, 'action'

{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
True
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0

{'speed': 0.00019923377631071346, 'crashed': False, 'action': 4}
False
{'speed': 3.404670831163714e-05, 'crashed': False, 'action': 4}
False
{'speed': 5.818181878206805e-06, 'crashed': False, 'action': 4}
False
{'speed': 9.942588299005616e-07, 'crashed': False, 'action': 4}
False
{'speed': 1.699071362031588e-07, 'crashed': False, 'action': 4}
False
{'speed': 2.9035130556140955e-08, 'crashed': False, 'action': 4}
True
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
True
{'speed'

{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
True
{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 0.23362230986910987, 'crashed': False, 'action': 4}
False
{'speed': 0.03992330410281328, 'crashed': False, 'action': 4}
False
{'speed': 0.006822422958572302, 'crashed': False, 'action': 4}
False
{'speed': 0.0011658718152632692, 'crashed': False, 'action': 4}
False
{'speed': 0.00019923377631071346, 'crashed': False, 'action': 4}
False
{'speed': 3.404670831163714e-05, 'crashed': False, 'action': 4}
False
{'speed': 5.818181878206805e-06, 'crashed': False, 'action': 4}
False
{'speed': 9.942588299005616e-07, 'crashed': False, 'action': 4}
False
{'speed': 1.699071362031588e-07, 'crashed': False, 'action': 4}
False
{'speed': 2.9035130556140955e-08, 'crashed': False, 'action': 4}
True
{'speed': 8.0, 'crashed': Fals

{'speed': 9.942588299005616e-07, 'crashed': False, 'action': 4}
False
{'speed': 1.699071362031588e-07, 'crashed': False, 'action': 4}
False
{'speed': 2.9035130556140955e-08, 'crashed': False, 'action': 4}
True
{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 0.23362230986910987, 'crashed': False, 'action': 4}
False
{'speed': 0.03992330410281328, 'crashed': False, 'action': 4}
False
{'speed': 0.006822422958572302, 'crashed': False, 'action': 4}
False
{'speed': 0.0011658718152632692, 'crashed': False, 'action': 4}
False
{'speed': 0.00019923377631071346, 'crashed': False, 'action': 4}
False
{'speed': 3.404670831163714e-05, 'crashed': False, 'action': 4}
False
{'speed': 5.818181878206805e-06, 'crashed': False, 'action': 4}
False
{'speed': 9.942588299005616e-07, 'crashed': False, 'action': 4}
False
{'speed': 1.699071362031588e-07, 'crashed': False, 'action': 4}
False
{'speed': 2.9035130556140955e-08, 'crashed': False, 'action': 4}
True
{'speed': 1.367105877009121

{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
True
{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 0.23362230986910987, 'crashed': False, 'action': 4}
False
{'speed': 0.03992330410281328, 'crashed': False, 'action': 4}
False
{'speed': 0.006822422958572302, 'crashed': False, 'action': 4}
False
{'speed': 0.0011658718152632692, 'crashed': False, 'action': 4}
False
{'speed': 0.00019923377631071346, 'crashed': False, 'action': 4}
False
{'speed': 3.404670831163714e-05, 'crashed': False, 'action': 4}
False
{'speed': 5.818181878206805e-06, 'crashed': False, 'action': 4}
False
{'speed': 9.942588299005616e-07, 'crashed': False, 'action': 4}
False
{'speed': 1.699071362031588e-07, 'crashed': False, 'action': 4}
False
{'speed': 2.9035130556140955e-08, 'crashed': False, 'action': 4}
True
{'speed': 1.3671058770091211, 

{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
True
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
True
{'speed': 1.3671058770091211, 'crashed': False, 'action': 4}
False
{'speed': 0.23362230986910987, 'crashed': False, 'action': 4}
False
{'speed': 0.03992330410281328, 'crashed': False, 'action': 4}
False
{'speed': 0.006822422958572302, 'crashed': False, 'action': 4}
False
{'

{'speed': 1.699071362031588e-07, 'crashed': False, 'action': 4}
False
{'speed': 2.9035130556140955e-08, 'crashed': False, 'action': 4}
True
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
True
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': False, 'action': 1}
False
{'speed': 8.0, 'crashed': Fal

The environment always through random environments at the agent. So, the performance metric used that is the reward function keeps changing after the code snippet is completed

After running the code a few times, I aw that the reward is anywhere from 0.81 to 0.99

The number of crashed out of 100, were only about 0-25.

### For three different environment, I ran this code 5 times, so after running the loop 100 times in the same environment (chosen randomly), I also run the code in 100 loops, in 5 randomly selected environment to test thoroughly.



### The detailed results of Five runs can be found in the report in the report.

## Parking setup

#### Setting up the environment for parking

In [11]:

env = gym.make('parking-v0')


#### Using the env.config to look at the configurations available and the default values for this configuration.



In [12]:
pprint.pprint(env.config)

{'action': {'type': 'ContinuousAction'},
 'centering_position': [0.5, 0.5],
 'collision_reward': -5,
 'controlled_vehicles': 1,
 'duration': 100,
 'manual_control': False,
 'observation': {'features': ['x', 'y', 'vx', 'vy', 'cos_h', 'sin_h'],
                 'normalize': False,
                 'scales': [100, 100, 5, 5, 1, 1],
                 'type': 'KinematicsGoal'},
 'offscreen_rendering': False,
 'other_vehicles_type': 'highway_env.vehicle.behavior.IDMVehicle',
 'policy_frequency': 5,
 'real_time_rendering': False,
 'render_agent': True,
 'reward_weights': [1, 0.3, 0, 0, 0.02, 0.02],
 'scaling': 7,
 'screen_height': 300,
 'screen_width': 600,
 'show_trajectories': False,
 'simulation_frequency': 15,
 'steering_range': 0.7853981633974483,
 'success_goal_reward': 0.12}


#### Changing the rewards in the parking environment was more important in parking more than any other environment. The magnitude of results changed heavily when the rewards values changed and I had to curate it exactly as needed. Even if the rewards for sccessful parking was high the accuracy (average_rewards) was low. 

#### In this case, "Testing" the environment with different values became very crucial.

In [13]:
env.config['controlled_vehicles'] = 2
env.config["success_goal_reward"] = 20
env.reset()


{'observation': array([ 0.        ,  0.        , -0.        , -0.        , -0.29982512,
        -0.95399418]),
 'achieved_goal': array([ 0.        ,  0.        , -0.        , -0.        , -0.29982512,
        -0.95399418]),
 'desired_goal': array([2.200000e-01, 1.400000e-01, 0.000000e+00, 0.000000e+00,
        6.123234e-17, 1.000000e+00])}

## GREEDY WITH PARKING

### Performance of agents taking random actions

There are no other cars than the agent in this environment, so we will not look at the number of crashes. We will only look at the reward function. 

Reward function: We will look at the reward function to evaluate the performance of the agent taking greedy actions. I take the reward from each loop and then find the average of all the rewards.



In [14]:

# Writing a function for calculating the average of the rewards.

def Average(l): 
    avg = mean(l) 
    return avg


for i in range(100):
    done = False
    env.reset()
    
    total_reward = []
    while not done:
        env.render()
        action = env.action_space.sample()
        next_state, reward, done, info = env.step(action)
        total_reward.append(reward)
        print(info)
        print(done)
        
rewards_average = Average(total_reward)      

print("The average of rewards: ",rewards_average)
   
env.close()

{'speed': -0.9997962117195129, 'crashed': False, 'action': array([-0.9997962 ,  0.88134676], dtype=float32), 'is_success': True}
True
{'speed': -0.045205067843198776, 'crashed': False, 'action': array([-0.04520507,  0.485113  ], dtype=float32), 'is_success': True}
True
{'speed': -0.3784888684749603, 'crashed': False, 'action': array([-0.37848887,  0.9909345 ], dtype=float32), 'is_success': True}
True
{'speed': 0.387527197599411, 'crashed': False, 'action': array([0.3875272 , 0.38862443], dtype=float32), 'is_success': True}
True
{'speed': -0.13067419826984406, 'crashed': False, 'action': array([-0.1306742 , -0.08394579], dtype=float32), 'is_success': True}
True
{'speed': -0.6677389740943909, 'crashed': False, 'action': array([-0.667739  ,  0.43124896], dtype=float32), 'is_success': True}
True
{'speed': 0.5656623840332031, 'crashed': False, 'action': array([0.5656624, 0.9572386], dtype=float32), 'is_success': True}
True
{'speed': -0.4537866711616516, 'crashed': False, 'action': array([-0

{'speed': 0.2905200719833374, 'crashed': False, 'action': array([0.29052007, 0.7559798 ], dtype=float32), 'is_success': True}
True
{'speed': -0.8905428647994995, 'crashed': False, 'action': array([-0.89054286, -0.13423018], dtype=float32), 'is_success': True}
True
{'speed': 0.3985496163368225, 'crashed': False, 'action': array([0.39854962, 0.69164515], dtype=float32), 'is_success': True}
True
{'speed': -0.23653414845466614, 'crashed': False, 'action': array([-0.23653415,  0.9503188 ], dtype=float32), 'is_success': True}
True
{'speed': 0.6692821979522705, 'crashed': False, 'action': array([0.6692822 , 0.46639025], dtype=float32), 'is_success': True}
True
{'speed': -0.9933217763900757, 'crashed': False, 'action': array([-0.9933218 , -0.43055972], dtype=float32), 'is_success': True}
True
{'speed': -0.5180860757827759, 'crashed': False, 'action': array([-0.5180861 ,  0.35060734], dtype=float32), 'is_success': True}
True
{'speed': 0.2310871034860611, 'crashed': False, 'action': array([ 0.23

The reward for the agent taking the random actions in the parking environment is -0.371 to -0.59 in the very best case.

#### Let us look at an intelligent agent that which takes action based on policy and not randomly.

## HER WITH PARKING

More information on the algorithm can be found in the report.

### Creating an HER model

In [15]:

her_parking = HER('MlpPolicy', env, SAC, n_sampled_goal=4,
            goal_selection_strategy='future',
            verbose=1, buffer_size=int(1e6),
            learning_rate=1e-3,
            gamma=0.9, batch_size=256,
            policy_kwargs=dict(layers=[256, 256, 256]))


Instructions for updating:
Use keras.layers.Dense instead.



  "Box bound precision lowered by casting to {}".format(self.dtype)





In [16]:
her_parking = HER('MlpPolicy', env, SAC, n_sampled_goal= 4, goal_selection_strategy='future', verbose=2)

The next step was a very difficult step for me. One of the disadvantages of the HER policy is that it takes a long time to train. I had to experiment with a few values of timestep. 

In [17]:
her_parking.learn(10000)

-----------------------------------------
| current_lr              | 0.0003      |
| ent_coef                | 1.0         |
| ent_coef_loss           | 0.0         |
| entropy                 | 2.4352427   |
| episodes                | 100         |
| fps                     | 71          |
| mean 100 episode reward | -0.5        |
| n_updates               | 1           |
| policy_loss             | -1.3269765  |
| qf1_loss                | 0.11634705  |
| qf2_loss                | 0.061007086 |
| success rate            | 1           |
| time_elapsed            | 1           |
| total timesteps         | 100         |
| value_loss              | 0.74661946  |
-----------------------------------------
------------------------------------------
| current_lr              | 0.0003       |
| ent_coef                | 0.9703197    |
| ent_coef_loss           | -0.10103407  |
| entropy                 | 2.5697665    |
| episodes                | 200          |
| fps                     | 

------------------------------------------
| current_lr              | 0.0003       |
| ent_coef                | 0.69727606   |
| ent_coef_loss           | -1.2192323   |
| entropy                 | 2.5800061    |
| episodes                | 1300         |
| fps                     | 66           |
| mean 100 episode reward | -0.5         |
| n_updates               | 1201         |
| policy_loss             | -0.48666865  |
| qf1_loss                | 0.0013412186 |
| qf2_loss                | 0.0004102926 |
| success rate            | 1            |
| time_elapsed            | 19           |
| total timesteps         | 1300         |
| value_loss              | 0.010157927  |
------------------------------------------
-------------------------------------------
| current_lr              | 0.0003        |
| ent_coef                | 0.6766711     |
| ent_coef_loss           | -1.3135191    |
| entropy                 | 2.5851114     |
| episodes                | 1400          |
| fps

-------------------------------------------
| current_lr              | 0.0003        |
| ent_coef                | 0.50122076    |
| ent_coef_loss           | -2.3320532    |
| entropy                 | 2.5831013     |
| episodes                | 2400          |
| fps                     | 66            |
| mean 100 episode reward | -0.5          |
| n_updates               | 2301          |
| policy_loss             | -0.2196055    |
| qf1_loss                | 0.00018120094 |
| qf2_loss                | 7.517934e-05  |
| success rate            | 1             |
| time_elapsed            | 36            |
| total timesteps         | 2400          |
| value_loss              | 0.003507447   |
-------------------------------------------
-------------------------------------------
| current_lr              | 0.0003        |
| ent_coef                | 0.48639917    |
| ent_coef_loss           | -2.4437861    |
| entropy                 | 2.5561767     |
| episodes                | 2500

-------------------------------------------
| current_lr              | 0.0003        |
| ent_coef                | 0.36031651    |
| ent_coef_loss           | -3.4608097    |
| entropy                 | 2.5540438     |
| episodes                | 3500          |
| fps                     | 66            |
| mean 100 episode reward | -0.4          |
| n_updates               | 3401          |
| policy_loss             | -0.03856542   |
| qf1_loss                | 8.854778e-05  |
| qf2_loss                | 5.7480862e-05 |
| success rate            | 1             |
| time_elapsed            | 52            |
| total timesteps         | 3500          |
| value_loss              | 0.0049341363  |
-------------------------------------------
------------------------------------------
| current_lr              | 0.0003       |
| ent_coef                | 0.34967414   |
| ent_coef_loss           | -3.5207076   |
| entropy                 | 2.5460339    |
| episodes                | 3600     

------------------------------------------
| current_lr              | 0.0003       |
| ent_coef                | 0.25136948   |
| ent_coef_loss           | -4.630338    |
| entropy                 | 2.5770783    |
| episodes                | 4700         |
| fps                     | 65           |
| mean 100 episode reward | -0.5         |
| n_updates               | 4601         |
| policy_loss             | 0.12030859   |
| qf1_loss                | 5.887291e-05 |
| qf2_loss                | 5.688355e-05 |
| success rate            | 1            |
| time_elapsed            | 72           |
| total timesteps         | 4700         |
| value_loss              | 0.0009082528 |
------------------------------------------
-------------------------------------------
| current_lr              | 0.0003        |
| ent_coef                | 0.24394235    |
| ent_coef_loss           | -4.745345     |
| entropy                 | 2.5569968     |
| episodes                | 4800          |
| fps

-------------------------------------------
| current_lr              | 0.0003        |
| ent_coef                | 0.18072096    |
| ent_coef_loss           | -5.7173715    |
| entropy                 | 2.5699272     |
| episodes                | 5800          |
| fps                     | 64            |
| mean 100 episode reward | -0.5          |
| n_updates               | 5701          |
| policy_loss             | 0.23124672    |
| qf1_loss                | 4.6407135e-05 |
| qf2_loss                | 5.1544128e-05 |
| success rate            | 1             |
| time_elapsed            | 89            |
| total timesteps         | 5800          |
| value_loss              | 0.0007707999  |
-------------------------------------------
-------------------------------------------
| current_lr              | 0.0003        |
| ent_coef                | 0.17538378    |
| ent_coef_loss           | -5.846649     |
| entropy                 | 2.569053      |
| episodes                | 5900

-------------------------------------------
| current_lr              | 0.0003        |
| ent_coef                | 0.12993614    |
| ent_coef_loss           | -6.800297     |
| entropy                 | 2.5442104     |
| episodes                | 6900          |
| fps                     | 64            |
| mean 100 episode reward | -0.5          |
| n_updates               | 6801          |
| policy_loss             | 0.29425144    |
| qf1_loss                | 2.4572371e-05 |
| qf2_loss                | 3.4551565e-05 |
| success rate            | 1             |
| time_elapsed            | 107           |
| total timesteps         | 6900          |
| value_loss              | 0.0001817959  |
-------------------------------------------
-------------------------------------------
| current_lr              | 0.0003        |
| ent_coef                | 0.12609556    |
| ent_coef_loss           | -7.0112004    |
| entropy                 | 2.5741615     |
| episodes                | 7000

-------------------------------------------
| current_lr              | 0.0003        |
| ent_coef                | 0.093405366   |
| ent_coef_loss           | -7.9591093    |
| entropy                 | 2.5685668     |
| episodes                | 8000          |
| fps                     | 64            |
| mean 100 episode reward | -0.5          |
| n_updates               | 7901          |
| policy_loss             | 0.3451834     |
| qf1_loss                | 1.8380131e-05 |
| qf2_loss                | 1.4158253e-05 |
| success rate            | 1             |
| time_elapsed            | 124           |
| total timesteps         | 8000          |
| value_loss              | 0.00012505862 |
-------------------------------------------
-------------------------------------------
| current_lr              | 0.0003        |
| ent_coef                | 0.09064591    |
| ent_coef_loss           | -8.164745     |
| entropy                 | 2.564538      |
| episodes                | 8100

-------------------------------------------
| current_lr              | 0.0003        |
| ent_coef                | 0.06715447    |
| ent_coef_loss           | -9.075875     |
| entropy                 | 2.5946102     |
| episodes                | 9100          |
| fps                     | 64            |
| mean 100 episode reward | -0.5          |
| n_updates               | 9001          |
| policy_loss             | 0.3878112     |
| qf1_loss                | 1.3626741e-05 |
| qf2_loss                | 1.623981e-05  |
| success rate            | 1             |
| time_elapsed            | 141           |
| total timesteps         | 9100          |
| value_loss              | 9.0797796e-05 |
-------------------------------------------
--------------------------------------------
| current_lr              | 0.0003         |
| ent_coef                | 0.06517076     |
| ent_coef_loss           | -9.251135      |
| entropy                 | 2.5615337      |
| episodes                |

<stable_baselines.sac.sac.SAC at 0x20ba64c7788>

## Performance of HER model on the parking env

Now, we look at the evaluation of the HER policy on the parking environment

In [18]:

for i in range(100):
    done = False
    obs = env.reset()
    while not done:
        env.render()
        action, _states = her_parking.predict(obs)
        next_state, reward, done, info = env.step(action)
 
        total_reward.append(reward)
        print(info)
        print(done)
        
        
rewards_average = Average(total_reward) 
  
print("The average of rewards: ",rewards_average)     
env.close()

{'speed': -0.006456494331359863, 'crashed': False, 'action': array([-0.00645649,  0.00726366], dtype=float32), 'is_success': True}
True
{'speed': -0.031987905502319336, 'crashed': False, 'action': array([-0.03198791,  0.00257814], dtype=float32), 'is_success': True}
True
{'speed': -0.03243362903594971, 'crashed': False, 'action': array([-0.03243363, -0.01940978], dtype=float32), 'is_success': True}
True
{'speed': -0.0354120135307312, 'crashed': False, 'action': array([-0.03541201, -0.00528121], dtype=float32), 'is_success': True}
True
{'speed': 0.019814491271972656, 'crashed': False, 'action': array([0.01981449, 0.05716217], dtype=float32), 'is_success': True}
True
{'speed': 0.019608497619628906, 'crashed': False, 'action': array([0.0196085 , 0.02118444], dtype=float32), 'is_success': True}
True
{'speed': 0.014252543449401855, 'crashed': False, 'action': array([0.01425254, 0.0302217 ], dtype=float32), 'is_success': True}
True
{'speed': -0.0004140138626098633, 'crashed': False, 'action'

{'speed': -0.044463396072387695, 'crashed': False, 'action': array([-0.0444634 , -0.01600796], dtype=float32), 'is_success': True}
True
{'speed': -0.03427690267562866, 'crashed': False, 'action': array([-0.0342769 , -0.01539314], dtype=float32), 'is_success': True}
True
{'speed': 0.010435819625854492, 'crashed': False, 'action': array([0.01043582, 0.00847089], dtype=float32), 'is_success': True}
True
{'speed': -0.014842212200164795, 'crashed': False, 'action': array([-0.01484221,  0.01302838], dtype=float32), 'is_success': True}
True
{'speed': -0.027298808097839355, 'crashed': False, 'action': array([-0.02729881, -0.02324617], dtype=float32), 'is_success': True}
True
{'speed': -0.015053927898406982, 'crashed': False, 'action': array([-0.01505393,  0.00048459], dtype=float32), 'is_success': True}
True
{'speed': -0.00446009635925293, 'crashed': False, 'action': array([-0.0044601 , -0.01834464], dtype=float32), 'is_success': True}
True
{'speed': -0.013929903507232666, 'crashed': False, 'a

The reward for the agent taking the random actions in the parking environment is -0.48 to -0.43.

In conclusion, the agents that I deployed on the parking environment performed very poorly. The value of the average over all the rewards was in the negative. However, the results of the HER policy is better than the agent taking random actions.


When the timesteps for HER was in 100s, it performed worse than the agent who took random action. It was mandatory to pick an agent with timesteps with a minimum of 1000, for the agent to achieve a performance which was better than the agent taking random actions.

# Merge setup

In [19]:

# Setting up the environment for roundabout
env = gym.make('merge-v0')


#### Using the env.config to look at the configurations available and the default values for this configuration.



In [20]:
pprint.pprint(env.config)

{'action': {'type': 'DiscreteMetaAction'},
 'centering_position': [0.3, 0.5],
 'collision_reward': -1,
 'high_speed_reward': 0.2,
 'lane_change_reward': -0.05,
 'manual_control': False,
 'merging_speed_reward': -0.5,
 'observation': {'type': 'Kinematics'},
 'offscreen_rendering': False,
 'other_vehicles_type': 'highway_env.vehicle.behavior.IDMVehicle',
 'policy_frequency': 1,
 'real_time_rendering': False,
 'render_agent': True,
 'right_lane_reward': 0.1,
 'scaling': 5.5,
 'screen_height': 150,
 'screen_width': 600,
 'show_trajectories': False,
 'simulation_frequency': 15}


In [21]:
env.config["collision_reward"] = -5
env.config['high_speed_reward'] = 1
env.reset()


array([[ 1.    ,  0.15  ,  0.5   ,  0.375 ,  0.    ],
       [ 1.    ,  0.2   ,  0.    ,  0.0125,  0.    ],
       [ 1.    ,  0.3   , -0.5   , -0.0125,  0.    ],
       [ 1.    ,  0.4   ,  1.    , -0.125 ,  0.    ],
       [ 0.    ,  0.    ,  0.    ,  0.    ,  0.    ]], dtype=float32)

## GREEDY WITH Merge

Let us check the performance of an agent who takes random actions in the merge environment. In this environment we are checking if the cars can merge and unmerge without any human interruption.

### Performance of agents taking random actions

Reward function: We will look at the reward function to evaluate the performance of the agent taking greedy actions. I take the reward from each loop and then find the average of all the rewards.

Number of crashes: The second method to evaluate the performance of the agent taking the greedy function is to count the number of crashes.

In [22]:

def Average(l): 
    avg = mean(l) 
    return avg


crashcount = 0
for i in range(100):
    done = False
    env.reset()
    total_reward = []
    while not done:
        env.render()
        action = env.action_space.sample()
        next_state, reward, done, info = env.step(action)
        if info['crashed']==True : crashcount +=1
        total_reward.append(reward)
        print(info)
        print(done)
        
rewards_average = Average(total_reward) 
  
print("The average of rewards: ",rewards_average)
print("Number of Crashes: ", crashcount)
env.close()

{'speed': 30.0, 'crashed': False, 'action': 2}
False
{'speed': 30.0, 'crashed': False, 'action': 3}
False
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 21.000455116798893, 'crashed': False, 'action': 4}
False
{'speed': 20.170966008732453, 'crashed': False, 'action': 4}
False
{'speed': 20.029216079413366, 'crashed': False, 'action': 4}
False
{'speed': 10.04036906164168, 'crashed': True, 'action': 0}
True
{'speed': 30.0, 'crashed': False, 'action': 3}
False
{'speed': 30.0, 'crashed': False, 'action': 1}
False
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 21.000455116798893, 'crashed': False, 'action': 4}
False
{'speed': 20.170966008732453, 'crashed': False, 'action': 4}
False
{'speed': 24.174774906282668, 'crashed': False, 'action': 3}
False
{'speed': 20.713419913696157, 'crashed': False, 'action': 4}
False
{'speed': 20.121915069598668, 'crashed': False, 'action': 0}
False
{'speed': 20.020833851018036, 'crashed': False, 'action': 

{'speed': 30.0, 'crashed': False, 'action': 2}
False
{'speed': 30.0, 'crashed': False, 'action': 3}
False
{'speed': 30.0, 'crashed': False, 'action': 2}
False
{'speed': 30.0, 'crashed': False, 'action': 2}
False
{'speed': 30.0, 'crashed': False, 'action': 1}
False
{'speed': 30.0, 'crashed': False, 'action': 3}
True
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 29.29157277053749, 'crashed': False, 'action': 3}
False
{'speed': 29.878938121396065, 'crashed': False, 'action': 2}
False
{'speed': 29.97931194928485, 'crashed': False, 'action': 1}
False
{'speed': 25.850905828666136, 'crashed': False, 'action': 4}
False
{'speed': 25.145409794893848, 'crashed': False, 'action': 1}
False
{'speed': 20.87928999627746, 'crashed': False, 'action': 4}
False
{'speed': 20.15026031518828, 'crashed': False, 'action': 4}
False
{'speed': 10.074062805251126, 'crashed': True, 'action': 0}
True
{'speed': 30.0, 'crashed': False, 'action': 0}
False
{'speed': 25.8544411731307, 'crashe

{'speed': 30.0, 'crashed': False, 'action': 2}
False
{'speed': 30.0, 'crashed': False, 'action': 3}
False
{'speed': 30.0, 'crashed': False, 'action': 0}
True
{'speed': 30.0, 'crashed': False, 'action': 3}
False
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 21.000455116798893, 'crashed': False, 'action': 4}
False
{'speed': 20.170966008732453, 'crashed': False, 'action': 1}
False
{'speed': 20.029216079413366, 'crashed': False, 'action': 4}
False
{'speed': 20.00499268423365, 'crashed': False, 'action': 4}
False
{'speed': 20.000853190994736, 'crashed': False, 'action': 1}
False
{'speed': 20.00014580030289, 'crashed': False, 'action': 0}
False
{'speed': 20.00002491555637, 'crashed': False, 'action': 2}
False
{'speed': 20.000004257775444, 'crashed': False, 'action': 2}
False
{'speed': 20.000000727603734, 'crashed': False, 'action': 2}
False
{'speed': 20.000000124338918, 'crashed': False, 'action': 2}
False
{'speed': 20.000000021248056, 'crashed': False, 'action':

{'speed': 20.85516984301524, 'crashed': False, 'action': 4}
False
{'speed': 10.270280665163314, 'crashed': True, 'action': 0}
True
{'speed': 30.0, 'crashed': False, 'action': 0}
False
{'speed': 30.0, 'crashed': False, 'action': 3}
False
{'speed': 30.0, 'crashed': False, 'action': 1}
False
{'speed': 30.0, 'crashed': False, 'action': 3}
False
{'speed': 30.0, 'crashed': False, 'action': 2}
False
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 25.146013943668194, 'crashed': False, 'action': 0}
False
{'speed': 29.17051089193356, 'crashed': False, 'action': 3}
False
{'speed': 29.85825007068091, 'crashed': False, 'action': 1}
False
{'speed': 25.830217777950978, 'crashed': False, 'action': 4}
False
{'speed': 25.141874450429277, 'crashed': False, 'action': 2}
False
{'speed': 20.878685847503117, 'crashed': False, 'action': 4}
False
{'speed': 20.150157073270783, 'crashed': False, 'action': 0}
True
{'speed': 30.0, 'crashed': False, 'action': 1}
False
{'speed': 30.0, 'cra

{'speed': 25.708531092791777, 'crashed': False, 'action': 4}
True
{'speed': 30.0, 'crashed': False, 'action': 2}
False
{'speed': 30.0, 'crashed': False, 'action': 1}
False
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 25.146013943668194, 'crashed': False, 'action': 1}
False
{'speed': 25.024952065064255, 'crashed': False, 'action': 2}
False
{'speed': 20.858705187479806, 'crashed': False, 'action': 4}
False
{'speed': 20.146742613552732, 'crashed': False, 'action': 4}
False
{'speed': 20.02507658617445, 'crashed': False, 'action': 2}
False
{'speed': 24.1498441204111, 'crashed': False, 'action': 3}
False
{'speed': 29.000277189449317, 'crashed': False, 'action': 3}
False
{'speed': 29.829159133789506, 'crashed': False, 'action': 0}
False
{'speed': 29.97080530597129, 'crashed': False, 'action': 0}
False
{'speed': 29.995010970276986, 'crashed': False, 'action': 0}
False
{'speed': 21.246279786352957, 'crashed': True, 'action': 2}
True
{'speed': 30.0, 'crashed': False

{'speed': 20.854438161279898, 'crashed': False, 'action': 4}
True
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 25.146013943668194, 'crashed': False, 'action': 2}
False
{'speed': 25.024952065064255, 'crashed': False, 'action': 2}
False
{'speed': 29.14982284121841, 'crashed': False, 'action': 3}
False
{'speed': 29.854714726216347, 'crashed': False, 'action': 2}
False
{'speed': 25.829613629176635, 'crashed': False, 'action': 4}
False
{'speed': 25.14177120851178, 'crashed': False, 'action': 2}
False
{'speed': 20.878668204674096, 'crashed': False, 'action': 4}
False
{'speed': 24.295712885188177, 'crashed': False, 'action': 3}
False
{'speed': 20.734086791410576, 'crashed': False, 'action': 4}
False
{'speed': 20.125446795846518, 'crashed': False, 'action': 4}
False
{'speed': 20.021437381481714, 'crashed': False, 'action': 4}
False
{'speed': 20.003663396276416, 'crashed': False, 'action': 4}
False
{'speed': 24.14618485819171, 'crashed': False, 'action': 3}
False
{

{'speed': 24.14626623623804, 'crashed': False, 'action': 3}
True
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 25.146013943668194, 'crashed': False, 'action': 2}
False
{'speed': 29.17051089193356, 'crashed': False, 'action': 3}
False
{'speed': 29.85825007068091, 'crashed': False, 'action': 2}
False
{'speed': 29.97577660482028, 'crashed': False, 'action': 2}
False
{'speed': 29.995860506761083, 'crashed': False, 'action': 1}
False
{'speed': 29.999292609308153, 'crashed': False, 'action': 3}
False
{'speed': 29.99987911525348, 'crashed': False, 'action': 2}
False
{'speed': 29.999979342219078, 'crashed': False, 'action': 2}
False
{'speed': 25.854437642958988, 'crashed': False, 'action': 4}
False
{'speed': 25.14601334040338, 'crashed': False, 'action': 0}
False
{'speed': 20.8793931351041, 'crashed': False, 'action': 4}
False
{'speed': 20.150277940400287, 'crashed': False, 'action': 1}
True
{'speed': 30.0, 'crashed': False, 'action': 3}
False
{'speed': 30.0, 'cras

{'speed': 20.146742613552732, 'crashed': False, 'action': 0}
False
{'speed': 20.02507658617445, 'crashed': False, 'action': 4}
False
{'speed': 13.226327175073452, 'crashed': True, 'action': 2}
True
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 29.29157277053749, 'crashed': False, 'action': 3}
False
{'speed': 25.733379294526763, 'crashed': False, 'action': 4}
False
{'speed': 29.270884719822345, 'crashed': False, 'action': 3}
False
{'speed': 25.7298439500622, 'crashed': False, 'action': 4}
False
{'speed': 25.124721744178697, 'crashed': False, 'action': 0}
False
{'speed': 25.021313478682192, 'crashed': False, 'action': 2}
False
{'speed': 29.14920104961504, 'crashed': False, 'action': 3}
False
{'speed': 25.70904964247764, 'crashed': False, 'action': 4}
False
{'speed': 25.121168241665302, 'crashed': False, 'action': 1}
False
{'speed': 25.02070622691094, 'crashed': False, 'action': 1}
False
{'speed': 20.857979623693282, 'crashed': False, 'action': 4}
False
{'spee

{'speed': 29.854714726216347, 'crashed': False, 'action': 2}
False
{'speed': 24.362370125541286, 'crashed': True, 'action': 3}
True
{'speed': 30.0, 'crashed': False, 'action': 0}
False
{'speed': 30.0, 'crashed': False, 'action': 1}
False
{'speed': 30.0, 'crashed': False, 'action': 3}
False
{'speed': 30.0, 'crashed': False, 'action': 0}
False
{'speed': 30.0, 'crashed': False, 'action': 2}
False
{'speed': 30.0, 'crashed': False, 'action': 1}
False
{'speed': 30.0, 'crashed': False, 'action': 2}
False
{'speed': 30.0, 'crashed': False, 'action': 0}
False
{'speed': 30.0, 'crashed': False, 'action': 3}
False
{'speed': 30.0, 'crashed': False, 'action': 2}
False
{'speed': 30.0, 'crashed': False, 'action': 2}
False
{'speed': 30.0, 'crashed': False, 'action': 1}
True
{'speed': 30.0, 'crashed': False, 'action': 3}
False
{'speed': 30.0, 'crashed': False, 'action': 0}
False
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 29.29157277053749, 'crashed': False, 'action': 3}
Fa

{'speed': 30.0, 'crashed': False, 'action': 3}
False
{'speed': 30.0, 'crashed': False, 'action': 1}
False
{'speed': 30.0, 'crashed': False, 'action': 0}
False
{'speed': 30.0, 'crashed': False, 'action': 3}
False
{'speed': 30.0, 'crashed': False, 'action': 0}
False
{'speed': 22.76503703703704, 'crashed': True, 'action': 2}
True
{'speed': 30.0, 'crashed': False, 'action': 3}
False
{'speed': 30.0, 'crashed': False, 'action': 2}
False
{'speed': 30.0, 'crashed': False, 'action': 3}
False
{'speed': 30.0, 'crashed': False, 'action': 3}
False
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 25.146013943668194, 'crashed': False, 'action': 1}
False
{'speed': 20.87939323819496, 'crashed': False, 'action': 4}
False
{'speed': 20.150277958017302, 'crashed': False, 'action': 2}
False
{'speed': 10.788664075647732, 'crashed': True, 'action': 0}
True
{'speed': 30.0, 'crashed': False, 'action': 2}
False
{'speed': 30.0, 'crashed': False, 'action': 2}
False
{'speed': 30.0, 'crashe

The agent taking random actions in the merge environment performs really good in some environment and really poorly in others. The environment is thrown randomly. Overall the performance is between 0.76 to 0.96 which is a very high range. Overall this is not a reliable agent and we should consider agents that are policy-based. The crashes ranges from 14-38

It can be seen that even with a higher avg-rewards value the number of crashed was very high. 

## DQN WITH Merge

For more information on the algorithm look at the report attached with this notebook

### Creating a DQN model for the merge agent


I set the timesteps at 1000.

In [23]:
dqn_highway = DQN('MlpPolicy', env, verbose=1)

In [24]:
dqn_highway.learn(10000)

--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 100      |
| mean 100 episode reward | 11.5     |
| steps                   | 1210     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 200      |
| mean 100 episode reward | 11.7     |
| steps                   | 2421     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 300      |
| mean 100 episode reward | 11.6     |
| steps                   | 3621     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 400      |
| mean 100 episode reward | 11.8     |
| steps                   | 4865     |
--------------------------------------
--------------------------------------
| % time spent exploring 

<stable_baselines.deepq.dqn.DQN at 0x20bad39fb48>

## Performance of DQN on merge env

In [25]:
crashcount = 0
for i in range(100):
    done = False
    obs = env.reset()
    total_reward = []
    while not done:
        env.render()
        action, _states = dqn_highway.predict(obs)
        next_state, reward, done, info = env.step(action)
        if info['crashed']==True : crashcount +=1
        total_reward.append(reward)
        print(info)
        print(done)
        
rewards_average = Average(total_reward) 
  
print("The average of rewards: ",rewards_average)      
print("Number of Crashes: ", crashcount)
env.close()

{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 21.000455116798893, 'crashed': False, 'action': 4}
False
{'speed': 20.170966008732453, 'crashed': False, 'action': 4}
False
{'speed': 20.029216079413366, 'crashed': False, 'action': 4}
False
{'speed': 20.00499268423365, 'crashed': False, 'action': 4}
False
{'speed': 20.000853190994736, 'crashed': False, 'action': 4}
False
{'speed': 20.00014580030289, 'crashed': False, 'action': 4}
False
{'speed': 20.00002491555637, 'crashed': False, 'action': 4}
False
{'speed': 20.000004257775444, 'crashed': False, 'action': 4}
False
{'speed': 20.000000727603734, 'crashed': False, 'action': 4}
False
{'speed': 20.000000124338918, 'crashed': False, 'action': 4}
False
{'speed': 20.000000021248056, 'crashed': False, 'action': 4}
False
{'speed': 20.000000003631044, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000620503, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000106038, 'crashed': False, 'action': 4}
False


{'speed': 20.00002491555637, 'crashed': False, 'action': 4}
False
{'speed': 20.000004257775444, 'crashed': False, 'action': 4}
False
{'speed': 20.000000727603734, 'crashed': False, 'action': 4}
False
{'speed': 20.000000124338918, 'crashed': False, 'action': 4}
False
{'speed': 20.000000021248056, 'crashed': False, 'action': 4}
False
{'speed': 20.000000003631044, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000620503, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000106038, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000018122, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000003094, 'crashed': False, 'action': 4}
True
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 21.000455116798893, 'crashed': False, 'action': 4}
False
{'speed': 20.170966008732453, 'crashed': False, 'action': 4}
False
{'speed': 20.029216079413366, 'crashed': False, 'action': 4}
False
{'speed': 20.00499268423365, 'crashed': False, 'action': 4}
False


{'speed': 20.000000003631044, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000620503, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000106038, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000018122, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000003094, 'crashed': False, 'action': 4}
True
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 21.000455116798893, 'crashed': False, 'action': 4}
False
{'speed': 20.170966008732453, 'crashed': False, 'action': 4}
False
{'speed': 20.029216079413366, 'crashed': False, 'action': 4}
False
{'speed': 20.00499268423365, 'crashed': False, 'action': 4}
False
{'speed': 20.000853190994736, 'crashed': False, 'action': 4}
False
{'speed': 20.00014580030289, 'crashed': False, 'action': 4}
False
{'speed': 20.00002491555637, 'crashed': False, 'action': 4}
False
{'speed': 20.000004257775444, 'crashed': False, 'action': 4}
False
{'speed': 20.000000727603734, 'crashed': False, 'action': 4}
False
{

{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 21.000455116798893, 'crashed': False, 'action': 4}
False
{'speed': 20.170966008732453, 'crashed': False, 'action': 4}
False
{'speed': 20.029216079413366, 'crashed': False, 'action': 4}
False
{'speed': 20.00499268423365, 'crashed': False, 'action': 4}
False
{'speed': 20.000853190994736, 'crashed': False, 'action': 4}
False
{'speed': 20.00014580030289, 'crashed': False, 'action': 4}
False
{'speed': 20.00002491555637, 'crashed': False, 'action': 4}
False
{'speed': 20.000004257775444, 'crashed': False, 'action': 4}
False
{'speed': 20.000000727603734, 'crashed': False, 'action': 4}
False
{'speed': 20.000000124338918, 'crashed': False, 'action': 4}
False
{'speed': 20.000000021248056, 'crashed': False, 'action': 4}
False
{'speed': 20.000000003631044, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000620503, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000106038, 'crashed': False, 'action': 4}
False


{'speed': 20.00002491555637, 'crashed': False, 'action': 4}
False
{'speed': 20.000004257775444, 'crashed': False, 'action': 4}
False
{'speed': 20.000000727603734, 'crashed': False, 'action': 4}
False
{'speed': 20.000000124338918, 'crashed': False, 'action': 4}
False
{'speed': 20.000000021248056, 'crashed': False, 'action': 4}
False
{'speed': 20.000000003631044, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000620503, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000106038, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000018122, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000003094, 'crashed': False, 'action': 4}
True
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 21.000455116798893, 'crashed': False, 'action': 4}
False
{'speed': 20.170966008732453, 'crashed': False, 'action': 4}
False
{'speed': 20.029216079413366, 'crashed': False, 'action': 4}
False
{'speed': 20.00499268423365, 'crashed': False, 'action': 4}
False


{'speed': 20.000000000106038, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000018122, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000003094, 'crashed': False, 'action': 4}
True
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 21.000455116798893, 'crashed': False, 'action': 4}
False
{'speed': 20.170966008732453, 'crashed': False, 'action': 4}
False
{'speed': 20.029216079413366, 'crashed': False, 'action': 4}
False
{'speed': 20.00499268423365, 'crashed': False, 'action': 4}
False
{'speed': 20.000853190994736, 'crashed': False, 'action': 4}
False
{'speed': 20.00014580030289, 'crashed': False, 'action': 4}
False
{'speed': 20.00002491555637, 'crashed': False, 'action': 4}
False
{'speed': 20.000004257775444, 'crashed': False, 'action': 4}
False
{'speed': 20.000000727603734, 'crashed': False, 'action': 4}
False
{'speed': 20.000000124338918, 'crashed': False, 'action': 4}
False
{'speed': 20.000000021248056, 'crashed': False, 'action': 4}
False
{

{'speed': 20.00499268423365, 'crashed': False, 'action': 4}
False
{'speed': 20.000853190994736, 'crashed': False, 'action': 4}
False
{'speed': 20.00014580030289, 'crashed': False, 'action': 4}
False
{'speed': 20.00002491555637, 'crashed': False, 'action': 4}
False
{'speed': 20.000004257775444, 'crashed': False, 'action': 4}
False
{'speed': 20.000000727603734, 'crashed': False, 'action': 4}
False
{'speed': 20.000000124338918, 'crashed': False, 'action': 4}
False
{'speed': 20.000000021248056, 'crashed': False, 'action': 4}
False
{'speed': 20.000000003631044, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000620503, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000106038, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000018122, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000003094, 'crashed': False, 'action': 4}
True
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 21.000455116798893, 'crashed': False, 'action': 4}
False
{

{'speed': 20.000004257775444, 'crashed': False, 'action': 4}
False
{'speed': 20.000000727603734, 'crashed': False, 'action': 4}
False
{'speed': 20.000000124338918, 'crashed': False, 'action': 4}
False
{'speed': 20.000000021248056, 'crashed': False, 'action': 4}
False
{'speed': 20.000000003631044, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000620503, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000106038, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000018122, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000003094, 'crashed': False, 'action': 4}
True
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 21.000455116798893, 'crashed': False, 'action': 4}
False
{'speed': 20.170966008732453, 'crashed': False, 'action': 4}
False
{'speed': 20.029216079413366, 'crashed': False, 'action': 4}
False
{'speed': 20.00499268423365, 'crashed': False, 'action': 4}
False
{'speed': 20.000853190994736, 'crashed': False, 'action': 4}
False

{'speed': 20.000000000018122, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000003094, 'crashed': False, 'action': 4}
True
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 21.000455116798893, 'crashed': False, 'action': 4}
False
{'speed': 20.170966008732453, 'crashed': False, 'action': 4}
False
{'speed': 20.029216079413366, 'crashed': False, 'action': 4}
False
{'speed': 20.00499268423365, 'crashed': False, 'action': 4}
False
{'speed': 20.000853190994736, 'crashed': False, 'action': 4}
False
{'speed': 20.00014580030289, 'crashed': False, 'action': 4}
False
{'speed': 20.00002491555637, 'crashed': False, 'action': 4}
False
{'speed': 20.000004257775444, 'crashed': False, 'action': 4}
False
{'speed': 20.000000727603734, 'crashed': False, 'action': 4}
False
{'speed': 20.000000124338918, 'crashed': False, 'action': 4}
False
{'speed': 20.000000021248056, 'crashed': False, 'action': 4}
False
{'speed': 20.000000003631044, 'crashed': False, 'action': 4}
False
{

{'speed': 20.170966008732453, 'crashed': False, 'action': 4}
False
{'speed': 20.029216079413366, 'crashed': False, 'action': 4}
False
{'speed': 20.00499268423365, 'crashed': False, 'action': 4}
False
{'speed': 20.000853190994736, 'crashed': False, 'action': 4}
False
{'speed': 20.00014580030289, 'crashed': False, 'action': 4}
False
{'speed': 20.00002491555637, 'crashed': False, 'action': 4}
False
{'speed': 20.000004257775444, 'crashed': False, 'action': 4}
False
{'speed': 20.000000727603734, 'crashed': False, 'action': 4}
False
{'speed': 20.000000124338918, 'crashed': False, 'action': 4}
False
{'speed': 20.000000021248056, 'crashed': False, 'action': 4}
False
{'speed': 20.000000003631044, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000620503, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000106038, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000018122, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000003094, 'crashed': False, 'action': 4}
True

{'speed': 20.00014580030289, 'crashed': False, 'action': 4}
False
{'speed': 20.00002491555637, 'crashed': False, 'action': 4}
False
{'speed': 20.000004257775444, 'crashed': False, 'action': 4}
False
{'speed': 20.000000727603734, 'crashed': False, 'action': 4}
False
{'speed': 20.000000124338918, 'crashed': False, 'action': 4}
False
{'speed': 20.000000021248056, 'crashed': False, 'action': 4}
False
{'speed': 20.000000003631044, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000620503, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000106038, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000018122, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000003094, 'crashed': False, 'action': 4}
True
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 21.000455116798893, 'crashed': False, 'action': 4}
False
{'speed': 20.170966008732453, 'crashed': False, 'action': 4}
False
{'speed': 20.029216079413366, 'crashed': False, 'action': 4}
False


{'speed': 20.000000003631044, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000620503, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000106038, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000018122, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000003094, 'crashed': False, 'action': 4}
True
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 21.000455116798893, 'crashed': False, 'action': 4}
False
{'speed': 20.170966008732453, 'crashed': False, 'action': 4}
False
{'speed': 20.029216079413366, 'crashed': False, 'action': 4}
False
{'speed': 20.00499268423365, 'crashed': False, 'action': 4}
False
{'speed': 20.000853190994736, 'crashed': False, 'action': 4}
False
{'speed': 20.00014580030289, 'crashed': False, 'action': 4}
False
{'speed': 20.00002491555637, 'crashed': False, 'action': 4}
False
{'speed': 20.000004257775444, 'crashed': False, 'action': 4}
False
{'speed': 20.000000727603734, 'crashed': False, 'action': 4}
False
{

{'speed': 20.000000000003094, 'crashed': False, 'action': 4}
True
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 21.000455116798893, 'crashed': False, 'action': 4}
False
{'speed': 20.170966008732453, 'crashed': False, 'action': 4}
False
{'speed': 20.029216079413366, 'crashed': False, 'action': 4}
False
{'speed': 20.00499268423365, 'crashed': False, 'action': 4}
False
{'speed': 20.000853190994736, 'crashed': False, 'action': 4}
False
{'speed': 20.00014580030289, 'crashed': False, 'action': 4}
False
{'speed': 20.00002491555637, 'crashed': False, 'action': 4}
False
{'speed': 20.000004257775444, 'crashed': False, 'action': 4}
False
{'speed': 20.000000727603734, 'crashed': False, 'action': 4}
False
{'speed': 20.000000124338918, 'crashed': False, 'action': 4}
False
{'speed': 20.000000021248056, 'crashed': False, 'action': 4}
False
{'speed': 20.000000003631044, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000620503, 'crashed': False, 'action': 4}
False
{

{'speed': 20.00002491555637, 'crashed': False, 'action': 4}
False
{'speed': 20.000004257775444, 'crashed': False, 'action': 4}
False
{'speed': 20.000000727603734, 'crashed': False, 'action': 4}
False
{'speed': 20.000000124338918, 'crashed': False, 'action': 4}
False
{'speed': 20.000000021248056, 'crashed': False, 'action': 4}
False
{'speed': 20.000000003631044, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000620503, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000106038, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000018122, 'crashed': False, 'action': 4}
False
{'speed': 20.000000000003094, 'crashed': False, 'action': 4}
True
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 21.000455116798893, 'crashed': False, 'action': 4}
False
{'speed': 20.170966008732453, 'crashed': False, 'action': 4}
False
{'speed': 20.029216079413366, 'crashed': False, 'action': 4}
False
{'speed': 20.00499268423365, 'crashed': False, 'action': 4}
False


Performance of DQN is definitely better than the agent taking greedy actions, the rwards range from 0.91 to 0.98 

Moreover, when the average of the rewards have a value of 0.98-0.99 there are no crashes. The number of crashes go from 0-7

## PPO WITH Merge

More information on the algorithm can be found in the report. 

### Creating a ppo model


I set the timesteps at 1000.

In [26]:
ppo_highway = PPO2('MlpPolicy', env, verbose=1)

Wrapping the env in a DummyVecEnv.


In [27]:
ppo_highway.learn(10000)

--------------------------------------
| approxkl           | 0.00016339326 |
| clipfrac           | 0.0           |
| explained_variance | -0.0627       |
| fps                | 39            |
| n_updates          | 1             |
| policy_entropy     | 1.6092753     |
| policy_loss        | -0.002627396  |
| serial_timesteps   | 128           |
| time_elapsed       | 0             |
| total_timesteps    | 128           |
| value_loss         | 15.065617     |
--------------------------------------
---------------------------------------
| approxkl           | 7.802517e-05   |
| clipfrac           | 0.0            |
| explained_variance | -0.0463        |
| fps                | 40             |
| n_updates          | 2              |
| policy_entropy     | 1.6084849      |
| policy_loss        | -0.00033789547 |
| serial_timesteps   | 256            |
| time_elapsed       | 3.28           |
| total_timesteps    | 256            |
| value_loss         | 12.765794      |
-------------

--------------------------------------
| approxkl           | 3.5348745e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.183         |
| fps                | 38            |
| n_updates          | 17            |
| policy_entropy     | 1.6054527     |
| policy_loss        | -0.0003295316 |
| serial_timesteps   | 2176          |
| time_elapsed       | 51.5          |
| total_timesteps    | 2176          |
| value_loss         | 4.3648877     |
--------------------------------------
---------------------------------------
| approxkl           | 1.635074e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.248          |
| fps                | 39             |
| n_updates          | 18             |
| policy_entropy     | 1.6055706      |
| policy_loss        | -0.00031231483 |
| serial_timesteps   | 2304           |
| time_elapsed       | 54.8           |
| total_timesteps    | 2304           |
| value_loss         | 3.8504505      |
-------------

--------------------------------------
| approxkl           | 0.00080801884 |
| clipfrac           | 0.0           |
| explained_variance | 0.764         |
| fps                | 40            |
| n_updates          | 34            |
| policy_entropy     | 1.5395051     |
| policy_loss        | -0.0044628037 |
| serial_timesteps   | 4352          |
| time_elapsed       | 106           |
| total_timesteps    | 4352          |
| value_loss         | 1.3398424     |
--------------------------------------
-------------------------------------
| approxkl           | 0.0009382582 |
| clipfrac           | 0.0          |
| explained_variance | 0.695        |
| fps                | 40           |
| n_updates          | 35           |
| policy_entropy     | 1.5226321    |
| policy_loss        | -0.004279631 |
| serial_timesteps   | 4480         |
| time_elapsed       | 110          |
| total_timesteps    | 4480         |
| value_loss         | 1.6046668    |
-------------------------------------

--------------------------------------
| approxkl           | 0.00029937414 |
| clipfrac           | 0.0           |
| explained_variance | 0.911         |
| fps                | 39            |
| n_updates          | 51            |
| policy_entropy     | 1.4329295     |
| policy_loss        | 0.0012356911  |
| serial_timesteps   | 6528          |
| time_elapsed       | 161           |
| total_timesteps    | 6528          |
| value_loss         | 0.507741      |
--------------------------------------
--------------------------------------
| approxkl           | 0.00046542377 |
| clipfrac           | 0.0           |
| explained_variance | 0.86          |
| fps                | 39            |
| n_updates          | 52            |
| policy_entropy     | 1.4101921     |
| policy_loss        | -0.004195827  |
| serial_timesteps   | 6656          |
| time_elapsed       | 164           |
| total_timesteps    | 6656          |
| value_loss         | 0.768744      |
-------------------------

--------------------------------------
| approxkl           | 0.0008506901  |
| clipfrac           | 0.0           |
| explained_variance | 0.576         |
| fps                | 39            |
| n_updates          | 68            |
| policy_entropy     | 1.2981253     |
| policy_loss        | -0.0069168555 |
| serial_timesteps   | 8704          |
| time_elapsed       | 217           |
| total_timesteps    | 8704          |
| value_loss         | 2.557672      |
--------------------------------------
------------------------------------
| approxkl           | 0.001765356 |
| clipfrac           | 0.0         |
| explained_variance | 0.663       |
| fps                | 40          |
| n_updates          | 69          |
| policy_entropy     | 1.3188369   |
| policy_loss        | -0.00482329 |
| serial_timesteps   | 8832        |
| time_elapsed       | 220         |
| total_timesteps    | 8832        |
| value_loss         | 1.998293    |
------------------------------------
------------

<stable_baselines.ppo2.ppo2.PPO2 at 0x20ba6430408>

## Performance of PPO on the merge environment

Let us evaluate the performance of the PPO on the merge environment

In [28]:


crashcount = 0
for i in range(10):
    done = False
    obs = env.reset()
    total_reward = []
    while not done:
        env.render()
        action, _states = ppo_highway.predict(obs)
        next_state, reward, done, info = env.step(action)
        if info['crashed']==True : crashcount +=1
        total_reward.append(reward)
        print(info)
        print(done)


rewards_average = Average(total_reward) 
  
print("The average of rewards: ",rewards_average)
print("Number of Crashes: ", crashcount)
env.close()

{'speed': 30.0, 'crashed': False, 'action': 0}
False
{'speed': 30.0, 'crashed': False, 'action': 3}
False
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed': 25.146013943668194, 'crashed': False, 'action': 0}
False
{'speed': 25.024952065064255, 'crashed': False, 'action': 1}
False
{'speed': 25.004264014349108, 'crashed': False, 'action': 0}
False
{'speed': 25.00072866988454, 'crashed': False, 'action': 0}
False
{'speed': 20.854565694240893, 'crashed': False, 'action': 4}
False
{'speed': 20.14603522286089, 'crashed': False, 'action': 4}
False
{'speed': 20.024955701427935, 'crashed': False, 'action': 0}
False
{'speed': 20.00426463576088, 'crashed': False, 'action': 0}
False
{'speed': 11.517674763018597, 'crashed': True, 'action': 2}
True
{'speed': 30.0, 'crashed': False, 'action': 0}
False
{'speed': 30.0, 'crashed': False, 'action': 0}
False
{'speed': 30.0, 'crashed': False, 'action': 0}
False
{'speed': 25.8544411731307, 'crashed': False, 'action': 4}
False
{'speed

The performance of the PPO algorithm is worse than the performance of the DQN algorithm and better than the performance of the agent chosing actions randomly. The average of the rewards range from 0.85-0.96 and the numbe rof crashes are 0-8