# Experiments with Highway Driving Agents

This repository tests out and evaluates some agents from https://github.com/eleurent/rl-agents

In [29]:
import os
if os.path.exists("rl-agents"):
    os.chdir("rl-agents/scripts")
try:
    from rl_agents.agents.common.factory import load_agent, load_environment
    from rl_agents.trainer.evaluation import Evaluation
except:
    raise ModuleNotFoundError("rl-agents module not installed.\nPlease install the module from https://github.com/eleurent/rl-agents")
from pprint import pprint
import json
import time
from copy import deepcopy

In [30]:
env_config_path = "configs/HighwayEnv/env_easy.json"
config_file = json.load(open(env_config_path))
env = load_environment(env_config_path)
print("env_config:")
pprint(config_file)
agent_config_path = "configs/HighwayEnv/agents/DQNAgent/dueling_ddqn.json"
agent_file = json.load(open(agent_config_path))
agent = load_agent(agent_config_path, env)
print("agent_config:")
pprint(agent_file)

  logger.warn(
[INFO] Choosing GPU device: 0, memory used: 2431 


env_config:
{'centering_position': [0.3, 0.5],
 'duration': 20,
 'id': 'highway-v0',
 'import_module': 'highway_env',
 'lanes_count': 2,
 'other_vehicles_type': 'highway_env.vehicle.behavior.LinearVehicle',
 'vehicles_count': 1}
agent_config:
{'base_config': 'configs/HighwayEnv/agents/DQNAgent/ddqn.json',
 'model': {'advantage': {'layers': []},
           'base_module': {'layers': [256, 256]},
           'type': 'DuelingNetwork',
           'value': {'layers': []}}}


In [4]:
state, meta = env.reset()
print("meta:")
pprint(meta)
print("state:")
pprint(state)

meta:
{'action': 1,
 'crashed': False,
 'rewards': {'collision_reward': 0.0,
             'high_speed_reward': 0.5,
             'on_road_reward': 1.0,
             'right_lane_reward': 1.0},
 'speed': 25}
state:
array([[ 1.        ,  1.        ,  0.5       ,  0.3125    ,  0.        ],
       [ 1.        ,  0.12608375, -0.5       , -0.02771554,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ]],
      dtype=float32)


In [6]:
def visualize_episodes(agent, env, episode_num=10,max_iter=100):
    for episode in range(episode_num):
        state, meta = env.reset()
        for i in range(max_iter):
            time.sleep(0.05)
            action = agent.act(state)
            next_state, reward, done, info, _ = env.step(action)
            agent.record(state, action, reward, next_state, done, info)
            env.render()
            state = next_state
            if done:
                break
    env.close()

Let's look at 10 examples to see, how a untrained model performs.

In [6]:
agent_untrained = deepcopy(agent)
visualize_episodes(agent_untrained, env)

  return self.value_net(torch.tensor(states, dtype=torch.float).to(self.device)).data.cpu().numpy()


In [6]:
evaluation = Evaluation(env,
                        agent,
                        run_directory=f"runs/{time.ctime(time.time())}",
                        num_episodes=1000,
                        sim_seed=42,
                        recover=None,
                        display_env=False,
                        display_agent=False,
                        display_rewards=False)
evaluation.train()

[INFO] Episode 0 score: 3.8 
[INFO] Saved DQNAgent model to out/HighwayEnv/DQNAgent/runs/Wed Apr  5 12:12:25 2023/checkpoint-0.tar 
[INFO] Episode 1 score: 3.7 
[INFO] Saved DQNAgent model to out/HighwayEnv/DQNAgent/runs/Wed Apr  5 12:12:25 2023/checkpoint-1.tar 
[INFO] Episode 2 score: 11.6 
[INFO] Episode 3 score: 6.5 
[INFO] Episode 4 score: 8.0 
[INFO] Episode 5 score: 3.0 
[INFO] Episode 6 score: 18.8 
[INFO] Episode 7 score: 4.5 
[INFO] Episode 8 score: 15.5 
[INFO] Saved DQNAgent model to out/HighwayEnv/DQNAgent/runs/Wed Apr  5 12:12:25 2023/checkpoint-8.tar 
[INFO] Episode 9 score: 16.1 
[INFO] Episode 10 score: 3.0 
[INFO] Episode 11 score: 17.7 
[INFO] Episode 12 score: 12.6 
[INFO] Episode 13 score: 10.6 
[INFO] Episode 14 score: 3.7 
[INFO] Episode 15 score: 4.5 
[INFO] Episode 16 score: 14.3 
[INFO] Episode 17 score: 15.8 
[INFO] Episode 18 score: 11.0 
[INFO] Episode 19 score: 17.9 
[INFO] Episode 20 score: 2.9 
[INFO] Episode 21 score: 14.4 
[INFO] Episode 22 score: 16.4

In [7]:
agent_env_easy = deepcopy(agent)
visualize_episodes(agent_env_easy, env)

In [31]:
env_config_path = "configs/HighwayEnv/env_medium.json"
config_file = json.load(open(env_config_path))
env = load_environment(env_config_path)
print("env_config:")
pprint(config_file)
agent_config_path = "configs/HighwayEnv/agents/DQNAgent/dueling_ddqn.json"
agent_file = json.load(open(agent_config_path))
agent = load_agent(agent_config_path, env)
print("agent_config:")
pprint(agent_file)

  logger.warn(
[INFO] Choosing GPU device: 0, memory used: 2432 


env_config:
{'duration': 40,
 'id': 'highway-v0',
 'import_module': 'highway_env',
 'lanes_count': 3,
 'policy_frequency': 2,
 'vehicles_count': 15}
agent_config:
{'base_config': 'configs/HighwayEnv/agents/DQNAgent/ddqn.json',
 'model': {'advantage': {'layers': []},
           'base_module': {'layers': [256, 256]},
           'type': 'DuelingNetwork',
           'value': {'layers': []}}}


In [13]:
visualize_episodes(agent, env)

In [14]:
evaluation = Evaluation(env,
                        agent,
                        run_directory=f"runs/{time.ctime(time.time())}",
                        num_episodes=1000,
                        sim_seed=42,
                        recover=None,
                        display_env=False,
                        display_agent=False,
                        display_rewards=False)
evaluation.train()

[INFO] Episode 0 score: 3.9 
[INFO] Saved DQNAgent model to out/HighwayEnv/DQNAgent/runs/Wed Apr  5 12:42:06 2023/checkpoint-0.tar 
[INFO] Episode 1 score: 22.7 
[INFO] Saved DQNAgent model to out/HighwayEnv/DQNAgent/runs/Wed Apr  5 12:42:06 2023/checkpoint-1.tar 
[INFO] Episode 2 score: 11.1 
[INFO] Episode 3 score: 14.3 
[INFO] Episode 4 score: 19.2 
[INFO] Episode 5 score: 12.4 
[INFO] Episode 6 score: 9.4 
[INFO] Episode 7 score: 7.8 
[INFO] Episode 8 score: 11.5 
[INFO] Saved DQNAgent model to out/HighwayEnv/DQNAgent/runs/Wed Apr  5 12:42:06 2023/checkpoint-8.tar 
[INFO] Episode 9 score: 16.5 
[INFO] Episode 10 score: 25.2 
[INFO] Episode 11 score: 10.5 
[INFO] Episode 12 score: 5.8 
[INFO] Episode 13 score: 29.7 
[INFO] Episode 14 score: 10.5 
[INFO] Episode 15 score: 18.8 
[INFO] Episode 16 score: 29.0 
[INFO] Episode 17 score: 14.0 
[INFO] Episode 18 score: 14.3 
[INFO] Episode 19 score: 13.7 
[INFO] Episode 20 score: 14.6 
[INFO] Episode 21 score: 52.1 
[INFO] Episode 22 score

In [15]:
agent_env_medium = deepcopy(agent)
visualize_episodes(agent_env_medium, env)

In [35]:
agent_config_path = "configs/HighwayEnv/agents/MCTSAgent/iterations100.json"
agent_file = json.load(open(agent_config_path))
print("agent_config:")
pprint(agent_file)
agent = load_agent(agent_config_path, env)

agent_config:
{'base_config': 'configs/HighwayEnv/agents/MCTSAgent/baseline.json',
 'budget': 600}


In [36]:
evaluation = Evaluation(env,
                        agent,
                        run_directory=f"runs/{time.ctime(time.time())}",
                        num_episodes=100,
                        sim_seed=42,
                        recover=None,
                        display_env=False,
                        display_agent=False,
                        display_rewards=False)
evaluation.train()

[INFO] Episode 0 score: 60.7 


KeyboardInterrupt: 

In [11]:
agent_mcts = deepcopy(agent)
visualize_episodes(agent_mcts, env)

AttributeError: 'NoneType' object has no attribute 'get_image'

In [8]:
agent_config_path = "configs/HighwayEnv/agents/ValueIterationAgent/baseline.json"
agent_file = json.load(open(agent_config_path))
print("agent_config:")
pprint(agent_file)
agent = load_agent(agent_config_path, env)

agent_config:
{'__class__': '<class '
              "'rl_agents.agents.dynamic_programming.value_iteration.ValueIterationAgent'>",
 'iterations': 10}


In [9]:
evaluation = Evaluation(env,
                        agent,
                        run_directory=f"runs/{time.ctime(time.time())}",
                        num_episodes=500,
                        sim_seed=42,
                        recover=None,
                        display_env=False,
                        display_agent=False,
                        display_rewards=False)
evaluation.train()

[INFO] Episode 0 score: 78.4 
[INFO] Episode 1 score: 78.7 
[INFO] Episode 2 score: 9.7 
[INFO] Episode 3 score: 7.8 
[INFO] Episode 4 score: 19.8 
[INFO] Episode 5 score: 78.2 
[INFO] Episode 6 score: 53.0 
[INFO] Episode 7 score: 26.4 
[INFO] Episode 8 score: 78.4 
[INFO] Episode 9 score: 23.8 
[INFO] Episode 10 score: 41.6 
[INFO] Episode 11 score: 78.2 
[INFO] Episode 12 score: 14.9 
[INFO] Episode 13 score: 30.1 
[INFO] Episode 14 score: 27.3 
[INFO] Episode 15 score: 78.4 
[INFO] Episode 16 score: 61.1 
[INFO] Episode 17 score: 78.4 
[INFO] Episode 18 score: 78.0 
[INFO] Episode 19 score: 4.0 
[INFO] Episode 20 score: 4.0 
[INFO] Episode 21 score: 18.8 
[INFO] Episode 22 score: 78.7 
[INFO] Episode 23 score: 8.6 
[INFO] Episode 24 score: 4.0 
[INFO] Episode 25 score: 42.8 
[INFO] Episode 26 score: 48.6 
[INFO] Episode 27 score: 78.3 
[INFO] Episode 28 score: 78.7 
[INFO] Episode 29 score: 78.6 
[INFO] Episode 30 score: 17.3 
[INFO] Episode 31 score: 78.8 
[INFO] Episode 32 score:

In [10]:
agent_value_iteration = deepcopy(agent)
visualize_episodes(agent_value_iteration, env)