Model

In [21]:
from keras.models import Sequential
from keras.layers import Dense, Flatten, Convolution2D


def build_model(height, width, actions):
    model = Sequential()
    model.add(
        Convolution2D(
            64,
            (4, 4),
            strides=(2, 2),
            activation="relu",
            input_shape=(3, height, width),
            padding="same",
        )
    )
    model.add(
        Convolution2D(32, (2, 2), strides=(1, 1), activation="relu", padding="same")
    )
    model.add(Convolution2D(32, (2, 2), activation="relu", padding="same"))
    model.add(Flatten())
    model.add(Dense(512, activation="relu"))
    model.add(Dense(256, activation="relu"))
    model.add(Dense(actions, activation="linear"))
    return model

Konfiguracja

In [22]:
config = {
    "screen_width": 640,
    "screen_height": 480,
    "observation": {
        "type": "Kinematics",
        "observation_shape": (10, 10),
        "vehicles_count": 10,
        "features": ["presence", "x", "y", "vx", "vy"],
        "features_range": {
            "x": [-100, 100],
            "y": [-100, 100],
            "vx": [-20, 20],
            "vy": [-20, 20],
        },
        "absolute": False,
        "order": "sorted",
    },
}

Agent

In [23]:
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

NB_STEPS = 1000000  # Amount of steps to be used to train the model


def build_agent(model, actions):
    policy = LinearAnnealedPolicy(
        EpsGreedyQPolicy(),
        attr="eps",
        value_max=1.0,
        value_min=0.1,
        value_test=0.2,
        nb_steps=NB_STEPS,
    )
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(
        model=model,
        memory=memory,
        policy=policy,
        enable_dueling_network=True,
        dueling_type="avg",
        nb_actions=actions,
        nb_steps_warmup=NB_STEPS / 100,
    )
    return dqn

Gra

In [24]:
import gym
import highway_env
from agent import build_agent, NB_STEPS
from conf import config
from model import build_model
from keras.optimizers import Adam
import matplotlib.pyplot as plt
import os
import random

HOW_MANY_EPISODES = 10  # Amount of episodes for testing purposes
# ACTIONS_ALL = {0: "LANE_LEFT", 1: "IDLE", 2: "LANE_RIGHT", 3: "FASTER", 4: "SLOWER"}


def plot_results(data):
    plt.plot(
        data.history["nb_steps"],
        data.history["episode_reward"],
    )
    plt.ylabel("Reward")
    plt.xlabel("Iteration")
    plt.show()


env = gym.make("highway-fast-v0")
env.configure(config)
actions = env.get_available_actions()
# print(actions)
env.reset()
height, width = env.observation_space.shape

# episodes = 3
# for episode in range(1, episodes):
#     state = env.reset()
#     done = False

#     while not done:
#         env.render()
#         # print(f"{n_state=}")
# env.close()

model = build_model(height, width, len(actions))
dqn = build_agent(model, len(actions))

load = input("Do You want to load a already trained model?(y/n): ").lower()
if len(os.listdir("saved_weights/")) == 0 and load == "y":
    print("Directory with models is empty. Exiting.")
    exit()
elif load == "y":
    dqn.load_weights("saved_weights/1k-fast.h5f")
    scores = dqn.test(env, nb_episodes=HOW_MANY_EPISODES, visualize=True)
    plot_results(scores)
else:
    dqn.compile(Adam(lr=1e-4))
    training = dqn.fit(env, nb_steps=NB_STEPS, visualize=False, verbose=2)
    scores = dqn.test(env, nb_episodes=HOW_MANY_EPISODES, visualize=True)
    plot_results(training)
    dqn.save_weights("saved_weights/1k-fast.h5f")
# print(np.mean(scores.history["episode_reward"]))

  super().__init__(name, **kwargs)


Training for 1000000 steps ...


  updates=self.state_updates,
  logger.deprecation(


      4/1000000: episode: 1, duration: 0.661s, episode steps:   4, steps per second:   6, episode reward:  2.566, mean reward:  0.642 [ 0.000,  0.867], mean action: 0.750 [0.000, 2.000],  loss: --, mean_q: --, mean_eps: --
     23/1000000: episode: 2, duration: 1.123s, episode steps:  19, steps per second:  17, episode reward: 16.643, mean reward:  0.876 [ 0.033,  0.967], mean action: 1.263 [0.000, 3.000],  loss: --, mean_q: --, mean_eps: --
     28/1000000: episode: 3, duration: 0.316s, episode steps:   5, steps per second:  16, episode reward:  3.465, mean reward:  0.693 [ 0.067,  0.867], mean action: 1.600 [0.000, 3.000],  loss: --, mean_q: --, mean_eps: --
     35/1000000: episode: 4, duration: 0.442s, episode steps:   7, steps per second:  16, episode reward:  5.032, mean reward:  0.719 [ 0.133,  0.833], mean action: 0.714 [0.000, 2.000],  loss: --, mean_q: --, mean_eps: --
     39/1000000: episode: 5, duration: 0.254s, episode steps:   4, steps per second:  16, episode reward:  2

  logger.warn(


Episode 1: reward: 2.499, steps: 4
Episode 2: reward: 4.365, steps: 6
Episode 3: reward: 7.833, steps: 10
Episode 4: reward: 6.233, steps: 8
Episode 5: reward: 2.633, steps: 4


KeyboardInterrupt: 