In [1]:
import gym
from gym import Env
from gym.spaces import Discrete, Box, Dict
import numpy as np
import random

from Backend.Junction.main import Simulation

In [2]:
class JunctionEnv(Env):
    def __init__(self, sim: Simulation):
        self.sim = sim
        self.action_space = Discrete(3)
        self.observation_space_size = 6
        self.observation_space = Box(low=0, high=10, shape=(1, self.observation_space_size))
        self.state = np.zeros(self.observation_space_size)
        self.iteration = 0
        
    def step(self, action):
        self.sim.update(self.iteration)
        self.iteration += 1

        if self.iteration >= 1000:
            done = True
        else:
            done = False
            
        
        lights = self.sim.get_lights()
        if action == 0:
            lights[0].set_state("red")
            lights[1].set_state("green")
        elif action == 1:
            lights[0].set_state("green")
            lights[1].set_state("red")
        elif action == 2:
            lights[0].set_state("red")
            lights[1].set_state("red")
        
        
        self.state = self.sim.get_state()
        info = {}

        return self.state, self.sim.reward, done, info
    
    
    def render(self):
        pass
        
    def reset(self):
        self.sim.reset()
        self.iteration = 0
        self.state = np.zeros(self.observation_space_size)
        return self.state
        

In [3]:
env = JunctionEnv(Simulation("cross_road.junc", visualise=False))

None


In [4]:
episode = 1
for episode in range(1, episode + 1):
    state = env.reset()
    done = False
    score = 0
    
    while not done:
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score += reward
    print(f"Episode: {episode} Score: {score}")

Episode: 1 Score: 997.43560128296


In [5]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers.legacy import Adam

In [6]:
states = env.observation_space.shape
actions = env.action_space.n
print(states)

(1, 6)


In [7]:
def build_model(states, actions):
    model = Sequential()
    model.add(Dense(12, activation='relu', input_shape=states))
    model.add(Flatten())
    model.add(Dense(actions , activation='linear'))
    return model

In [14]:
del model

In [15]:
model = build_model(states, actions)

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1, 12)             84        
                                                                 
 flatten (Flatten)           (None, 12)                0         
                                                                 
 dense_1 (Dense)             (None, 3)                 39        
                                                                 
Total params: 123
Trainable params: 123
Non-trainable params: 0
_________________________________________________________________


In [11]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [12]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=100000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [16]:
dqn = build_agent(model, actions)
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=5000, visualize=False, verbose=1)

2023-02-16 19:07:12.430495: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-02-16 19:07:12.430527: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2023-02-16 19:07:12.449144: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:357] MLIR V1 optimization pass is not enabled
2023-02-16 19:07:12.462210: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-02-16 19:07:12.484403: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-02-16 19:07:12.561183: W tensorflow/c/c_api.cc:291] Operation '{name:'dens

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
Training for 5000 steps ...
Interval 1 (0 steps performed)


  updates=self.state_updates,
2023-02-16 19:07:13.935393: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_3/BiasAdd' id:141 op device:{requested: '', assigned: ''} def:{{{node dense_3/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_3/MatMul, dense_3/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-02-16 19:07:13.962279: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-02-16 19:07:13.978821: W tensorflow/c/c_api.cc:291] Operation '{name:'total/Assign' id:320 op device:{requested: '', assigned: ''} def:{{{node total/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](total, total/Initializer/zeros)}}' was change

    7/10000 [..............................] - ETA: 1:30 - reward: 1.0000 

2023-02-16 19:07:14.230012: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_3_1/BiasAdd' id:211 op device:{requested: '', assigned: ''} def:{{{node dense_3_1/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_3_1/MatMul, dense_3_1/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-02-16 19:07:14.236167: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-02-16 19:07:14.723604: W tensorflow/c/c_api.cc:291] Operation '{name:'loss_3/AddN' id:451 op device:{requested: '', assigned: ''} def:{{{node loss_3/AddN}} = AddN[N=2, T=DT_FLOAT, _has_manual_control_dependencies=true](loss_3/mul, loss_3/mul_1)}}' was changed by setting attribute after it was run by a session. This muta

   13/10000 [..............................] - ETA: 18:37 - reward: 1.0000

2023-02-16 19:07:15.280713: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-02-16 19:07:15.321444: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-02-16 19:07:15.348404: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-02-16 19:07:15.380171: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




<keras.callbacks.History at 0x16337e9d0>

In [None]:
dqn.save_weights('dqn_weights.h5f', overwrite=True)

In [None]:
# del model
# del dqn
# del env

env = JunctionEnv(Simulation("cross_road.junc", visualise=False))
states = env.observation_space.shape
actions = env.action_space.n

model = build_model(states, actions)
dqn = build_agent(model, actions)
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
dqn.load_weights('dqn_weights.h5f')

In [None]:
sim = Simulation("cross_road.junc", visualise=True, dqn_agent=dqn)
for i in range(50001):
    sim.update(i)

In [None]:
dqn.forward(np.array([1, 1, 1, 1, 1, 1]))