In [None]:
# Installing libraries
!pip install gym
!pip install highway_env
!pip install stable_baselines3
!pip install stable-baselines==2.7.0

In [None]:
import highway_env
import gym
import numpy as np
import random

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from collections import deque


from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt

In [None]:
# Configuration paramaters for the whole setup
seed = 0
gamma = 0.99  # Discount factor for past rewards
epsilon = 1.0  # Epsilon greedy parameter
episodes = 100 # M
steps = 10000

In [None]:
def train_env():
    env = gym.make('intersection-v0')
    env.configure({
    "observation": {
    "type": "GrayscaleObservation",
    "observation_shape": (128, 64),
    "stack_size": 4,
    "weights": [0.2989, 0.5870, 0.1140],  # weights for RGB conversion
    "scaling": 1.75,
            },
        })
    env.reset()
    return env

In [None]:
def test_env():
    env = train_env()
    env.configure({"policy_frequency": 15, "duration": 20 * 15})
    env.reset()
    return env

# Code for training the agent

In [None]:
env = train_env()

In [None]:
model = DQN('CnnPolicy', env,
                learning_rate=5e-4,
                buffer_size=15000,
                learning_starts=200,
                batch_size=32,
                gamma=0.8,
                train_freq=1,
                gradient_steps=1,
                target_update_interval=50,
                exploration_fraction=0.7,
                verbose=1,
                tensorboard_log="intersection_cnn/")

model.learn(total_timesteps=int(1e5))
model.save("DQN_cnn_stable_baselines")

# Code for Loading the Agent

In [None]:
env = train_env()

In [None]:
model = DQN.load("DQN_cnn_stable_baselines")

In [None]:
episode = 0
episode_reward = 0
episode_rewards = []
state = env.reset()
while True:
    action = model.predict(state, deterministic=True)[0]
    next_state, reward, done, info = env.step(action)
    env.render()
    episode_reward += reward
    state = next_state
    if done:
        episode += 1
        episode_rewards.append(episode_reward)
        print(("episode", episode, "reward", episode_reward))
        episode_reward = 0
        obs = env.reset()
    if episode == 100:
        break
avg_reward = sum(episode_rewards) / len(episode_rewards)
print(("average_reward", avg_reward))