In [1]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random

from Backend.Junction.main import Simulation

In [2]:
class JunctionEnv(Env):
    def __init__(self, sim: Simulation):
        self.sim = sim
        self.action_space = Discrete(3)
        self.observation_space = Box(low=np.array([0]), high=np.array([10]))
        self.state = 0
        self.iteration = 0
        
    def step(self, action):
        self.sim.update(self.iteration)
        self.iteration += 1

        if self.iteration >= 10001:
            done = True
        else:
            done = False
            
        
        lights = self.sim.get_lights()
        if action == 0:
            lights[0].set_state("red")
            lights[1].set_state("green")
        elif action == 1:
            lights[0].set_state("green")
            lights[1].set_state("red")
        elif action == 2:
            lights[0].set_state("red")
            lights[1].set_state("red")
        
        self.state = self.sim.get_state()

        reward = 1 - self.sim.get_mean_wait_time()**2
        if self.sim.collision is not None:
            reward -= 1

        info = {}

        return self.state, reward, done, info
    
    
    def render(self):
        pass
        
    def reset(self):
        self.sim.reset()
        self.iteration = 0
        self.state = 1
        return self.state
        

In [3]:
env = JunctionEnv(Simulation("cross_road.junc", visualise=False))

  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [4]:
episode = 1
for episode in range(1, episode + 1):
    state = env.reset()
    done = False
    score = 0
    
    while not done:
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score += reward
    print(f"Episode: {episode} Score: {score}")

Mean wait time: 0.00min
08:00:00:100
Mean wait time: 1.34min
08:16:40:100
Episode: 1 Score: -8445.110591396457


In [5]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers.legacy import Adam

In [6]:
states = env.observation_space.shape
actions = env.action_space.n

In [7]:
def build_model(states, actions):
    model = Sequential()
    model.add(Dense(24, activation='relu', input_shape=states))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [14]:
del model

In [15]:
model = build_model(states, actions)

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 24)                48        
                                                                 
 dense_1 (Dense)             (None, 24)                600       
                                                                 
 dense_2 (Dense)             (None, 3)                 75        
                                                                 
Total params: 723
Trainable params: 723
Non-trainable params: 0
_________________________________________________________________


In [11]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [12]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [16]:
dqn = build_agent(model, actions)
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


2023-02-07 10:51:24.318352: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-02-07 10:51:24.318385: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2023-02-07 10:51:24.323176: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:357] MLIR V1 optimization pass is not enabled
2023-02-07 10:51:24.324490: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-02-07 10:51:24.324961: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-02-07 10:51:24.335140: W tensorflow/c/c_api.cc:291] Operation '{name:'dens

Training for 50000 steps ...
Interval 1 (0 steps performed)
Mean wait time: 0.00min
08:00:00:100
    1/10000 [..............................] - ETA: 8:19 - reward: 1.0000

  updates=self.state_updates,
2023-02-07 10:51:25.221045: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_5/BiasAdd' id:145 op device:{requested: '', assigned: ''} def:{{{node dense_5/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_5/MatMul, dense_5/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-02-07 10:51:25.226039: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-02-07 10:51:25.231261: W tensorflow/c/c_api.cc:291] Operation '{name:'count_2/Assign' id:377 op device:{requested: '', assigned: ''} def:{{{node count_2/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](count_2, count_2/Initializer/zeros)}}' wa

   18/10000 [..............................] - ETA: 8:17 - reward: 1.0000

2023-02-07 10:51:25.943499: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-02-07 10:51:25.957765: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-02-07 10:51:25.972037: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-02-07 10:51:25.986348: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Interval 2 (10000 steps performed)
Mean wait time: 1.18min
08:16:40:100
    1/10000 [..............................] - ETA: 3:20 - reward: 0.0000e+00Mean wait time: 0.00min
08:00:00:100
1 episodes - episode_reward: -5238.947 [-5238.947, -5238.947] - loss: 218048.891 - mae: 13357.958 - mean_q: 20073.463

Interval 3 (20000 steps performed)
    1/10000 [..............................] - ETA: 3:47 - reward: -1.0171Mean wait time: 1.01min
08:16:40:100
Mean wait time: 0.00min
08:00:00:100
1 episodes - episode_reward: -5650.219 [-5650.219, -5650.219] - loss: 1200354.375 - mae: 13496.206 - mean_q: 20279.240

Interval 4 (30000 steps performed)
    1/10000 [..............................] - ETA: 3:45 - reward: -0.6423Mean wait time: 0.80min
08:16:40:100
Mean wait time: 0.00min
08:00:00:100
1 episodes - episode_reward: -605.974 [-605.974, -605.974] - loss: 250490.578 - mae: 4866.854 - mean_q: 7317.952

Interval 5 (40000 steps performed)
    1/10000 [..............................] - ETA: 3:56 - r

<keras.callbacks.History at 0x15d282070>

In [None]:
dqn.forward(1)

In [None]:
scores = dqn.test(env, nb_episodes=2, visualize=False)
print(np.mean(scores.history['episode_reward']))

In [None]:
env = JunctionEnv(Simulation("cross_road.junc", visualise=True, dqn_agent=dqn))