In [79]:
# Load Vissim
# Documents here
# C:\Users\Public\Documents\PTV Vision\PTV Vissim 11\Examples Training
# Signal Control files here
# C:\Program Files\PTV Vision\PTV Vissim 11\API\SignalControl_DLLs

# for loading client
import win32com.client as com
import os
# standard libraries 
import numpy as np
import math
import time
from IPython import display
# For Q-function
from collections import defaultdict
# For saving
import datetime
import dill
import pickle

'''
This loads up a Vissim instance 
    -- Required: inpx file , layx file
    -- warning: quite flakey loading but once loaded okay
'''
def Load_Vissim(End_of_simulation = 10000,\
                Quick_Mode=1,\
                Path_to_network = 'C:\\Users\\Public\\Documents\\PTV Vision\\PTV Vissim 11\\Examples Training\\COM\\Basic Commands\\',\
                inpx_Filename = 'COM Basic Commands.inpx',\
                layx_Filename = 'COM Basic Commands.layx'\
               ):
    Vissim = None
    # Load Vissim
    Vissim = com.gencache.EnsureDispatch("Vissim.Vissim")
    # Load file
    inpx_Filename                = os.path.join(Path_to_network, inpx_Filename)
    flag_read_additionally  = False # you can read network(elements) additionally, in this case set "flag_read_additionally" to true
    Vissim.LoadNet(inpx_Filename, flag_read_additionally)
    # Load a Layout:
    layx_Filename = os.path.join(Path_to_network, layx_Filename)
    Vissim.LoadLayout(layx_Filename)
    # Configure non-GUI for training
    Vissim.Simulation.SetAttValue('UseMaxSimSpeed', True)
    Vissim.Simulation.AttValue('UseAllCores')
    Vissim.Graphics.CurrentNetworkWindow.SetAttValue("QuickMode",Quick_Mode)
    # Set a long simulation time
    Vissim.Simulation.SetAttValue('SimPeriod', End_of_simulation)
    return Vissim

# '''
# Set up Vissim and the parameters for optimizing
# (Worth commenting out after set up)
# if ERROR like ''has no attribute 'CLSIDToClassMap' ''
# DELETE folders:
# C:\Users\nwalton\AppData\Local\Temp\gen_py
# C:\Users\nwalton\AppData\Local\Temp\VISSIM
# Then Restart PC...
# '''

# We attempt 5 times to load vissim

# Attempts = 5 
# for _ in range(Attempts):
#     try:
#         Vissim \
#         = \
#         Load_Vissim(
#         Path_to_network = 'C:\\Users\\nwalton\\OneDrive - The Alan Turing Institute\\Documents\\MLforFlowOptimisation\\Vissim\\Single_Cross_Straight',\
#         inpx_Filename = 'Single_Cross_Straight.inpx',\
#         layx_Filename = 'Single_Cross_Straight.layx'\
#         )
#         print("Success")
#         break
#     except:
#         print("Fail")

In [80]:
# Get simulation parameters
'''
Set up the parameters of the optimization
'''

'''
makes a dictionary for all signals and thier positions
'''
def Get_Signal_Positions(Signal_Groups):
    Signal_Positions = dict()
    for SG in Signal_Groups:
        for SH in SG.SigHeads:
            Lane = SH.AttValue('Lane')
            Position = SH.AttValue('Pos')
            Signal_Positions[Lane] = Position
    return Signal_Positions

# Lanes for detection
Lane_List = ['3-1','1-1','7-1','5-1']

# which lane signals can be green at the same time
actions = [(1,0,1,0),\
            (0,1,0,1)]
# Define the Q-function
# Q_fn = Q_function(actions)

# round the state space
rounding = 1.
sim_steps = 100 # number of simulation steps before update
# set the load to be light
number_of_inputs = len(Vissim.Net.VehicleInputs)
new_volume = 400
for key in range(1,number_of_inputs+1):
    Vissim.Net.VehicleInputs.ItemByKey(key).SetAttValue('Volume(1)', new_volume)
    
# get the list of signal controllers
Signal_Controller = Vissim.Net.SignalControllers.GetAll()[0]
Signal_Groups = Signal_Controller.SGs.GetAll()
Signal_Positions = Get_Signal_Positions(Signal_Groups)

# These are states and rewards which are global variables 
# Assigned None for now

Q_Size = None # Queue sizes at junctions
delays = dict() # Total delay and change in delay for each vehicle

In [90]:
# Get state and reward info
'''
This code gets the average delay from 1000 steps under MaxWeight
'''
def Get_Q_Size(Lane_List=None, rounding=None):
    # Loads globals if variables not specfied
    if Lane_List is None :
        Lane_List = globals()['Lane_List']
    if rounding is None :
        rounding = globals()['rounding']
        
    # initialize with zero queues
    Qsum = 0
    Q_sizes = dict.fromkeys(Lane_List)
    for key in Q_sizes.keys():
        Q_sizes[key]=0

    # initialize with zero numbers of non-waiting cars
    nonQsum = 0
    nonQ_sizes = dict.fromkeys(Lane_List)
    for key in nonQ_sizes.keys():
        nonQ_sizes[key]=0

    # get all Q lengths    
    All_Vehicles = Vissim.Net.Vehicles.GetAll() 
    for Veh in All_Vehicles:
        lane = Veh.AttValue('Lane')
        if lane in Lane_List : 
            if Veh.AttValue('InQueue') == 1 :
                Q_sizes[lane] += 1
            else : 
                nonQ_sizes[lane] += 1

    state = []

    for lane in Lane_List :
        state.append(math.ceil(Q_sizes[lane] / rounding))
        
    return tuple(state)

'''
state is now the closest vehicle to the junction
reward is now the total delay
'''


def Get_First_Vehicle(Lane_List=None, rounding=None):
    # Loads globals if variables not specfied
    if Lane_List is None :
        Lane_List = globals()['Lane_List']
    if rounding is None :
        rounding = globals()['rounding']
    
    All_Vehicles = Vissim.Net.Vehicles.GetAll()
    
    lane_state = dict()

    for cnt_Veh in range(len(All_Vehicles)):
        veh_position = All_Vehicles[cnt_Veh].AttValue('Pos')
        veh_lane = All_Vehicles[cnt_Veh].AttValue('Lane')

        if veh_lane in Signal_Positions.keys():

            rel_position = rounding * math.ceil((Signal_Positions[veh_lane] - veh_position) / rounding) 

            if  rel_position >= 0 :

                if veh_lane in lane_state.keys():    
                    if rel_position < lane_state[veh_lane]:
                        lane_state[veh_lane] = rel_position
                else :
                    lane_state[veh_lane] = rel_position
    
    state = []
    for lane in Lane_List:    
        if lane in lane_state.keys():
            state.append(lane_state[lane])
        else:
            state.append(np.nan)
    return tuple(state)


'''
Gets the delays of all vehicles in the network:
    -- dictionary keys are vehicle numbers
    -- 1st entry is delay
    -- 2nd entry is change in delay
'''
state = None

def Delay_Dictionary(Current_Dict=None):
    # make sure current state is defined
    if Current_Dict is None:
        try :
            Current_Dict = globals()['delays']
        except NameError:
            Current_Dict = dict()

    Delay_Dict= dict()
    All_Vehicles = Vissim.Net.Vehicles.GetAll() # get all vehicles in the network at the actual simulation second
    for cnt_Veh in range(len(All_Vehicles)):
        veh_number      = All_Vehicles[cnt_Veh].AttValue('No')
        delay           = All_Vehicles[cnt_Veh].AttValue('DelayTm')  

        if veh_number in Current_Dict.keys():
            old_delay = Current_Dict[veh_number][0]
            Delay_Dict[veh_number] = [delay,delay-old_delay]
        else :
            Delay_Dict[veh_number] = [delay,0.]
    return Delay_Dict

'''
state is now the closest vehicle to the junction
reward is now the total delay
'''

def Get_Delay(delays=None):
    # Use global as default
    if delays is None:
        delays = globals()['delays']
        
    total_delay = 0
    for key, val in delays.items():
        total_delay += val[1]
    return -total_delay

def Get_Total_Queue(Q_Size=None):
    # Use global as default
    if Q_Size is None:
        Q_Size = Get_Q_Size()
        
    return -sum(Q_Size)

In [82]:
# Actions

# Simple RED and GREEN Actions
def Do_Action_Easy(action=None,Signal_Groups=None):
    # Set global as default
    if action is None:
        action = globals()['action']
    if Signal_Groups is None:
        Signal_Groups = globals()['Signal_Groups']
    
    for i, sg in enumerate(Signal_Groups):
        if action[i] == 1:
            new_state = "GREEN"
        else :
            new_state = "RED"
        sg.SetAttValue("SigState", new_state)

# GREEN/AMBER/RED/REDAMBER Actions
def Do_Action_RGA(action=None,Signal_Groups=None):
# Consist of 4 steps: 
# Greens go Amber
# Ambers go Red
# Reds go RedAmber
# RedAmbers go Green

    # Set global as default
    if action is None:
        action = globals()['action']
    if Signal_Groups is None:
        Signal_Groups = globals()['Signal_Groups']

    # Initial Parameters
    Sim_Period = Vissim.Simulation.AttValue('SimPeriod') #End of Simulation
    Amber_Time = 4. #One second of Amber
    Red_Time = 1.
    RedAmber_Time = 1.

    # If current_state = 'GREEN' and next_state = 'RED'
    # Then go AMBER
    for i, sg in enumerate(Signal_Groups):
        current_state = sg.AttValue("SigState")
        if current_state == "GREEN" and action[i] == 0 :
            sg.SetAttValue("SigState", "AMBER")

    # Simulate 4 seconds for Amber
    Sim_Time = Vissim.Simulation.AttValue('SimSec')
    Amber_Break = min(Sim_Time+Amber_Time,Sim_Period)
    Vissim.Simulation.SetAttValue('SimBreakAt', Amber_Break)
    Vissim.Simulation.RunContinuous()

    # Set the AMBER lights red
    for i, sg in enumerate(Signal_Groups):
        current_state = sg.AttValue("SigState")
        if current_state == "AMBER":
            sg.SetAttValue("SigState", "RED")

    # Simulate 1 second for Red
    Sim_Time = Vissim.Simulation.AttValue('SimSec')
    Red_Break = min(Sim_Time+Red_Time,Sim_Period)
    Vissim.Simulation.SetAttValue('SimBreakAt', Red_Break)
    Vissim.Simulation.RunContinuous()

    # If current state "RED" and next_state = "GREEN"
    # Then go RedAmber
    for i, sg in enumerate(Signal_Groups):
        current_state = sg.AttValue("SigState")
        if current_state == "RED" and action[i] == 1 :
            sg.SetAttValue("SigState", "REDAMBER")

    # Simulate 1 second for RedAmber
    Sim_Time = Vissim.Simulation.AttValue('SimSec')
    RedAmber_Break = min(Sim_Time+RedAmber_Time,Sim_Period)
    Vissim.Simulation.SetAttValue('SimBreakAt', RedAmber_Break)
    Vissim.Simulation.RunContinuous()
    
    # Finally set all RedAmbers to Green
    for i, sg in enumerate(Signal_Groups):
        current_state = sg.AttValue("SigState")
        if current_state == "REDAMBER":
            sg.SetAttValue("SigState", "GREEN")

In [83]:
# Controllers / Learners
'''
MaxWeight
'''
def MaxWeight(state,actions):
    opt_val = 0
    for action in actions : 
        val = np.dot(action,state)
        if val >= opt_val :
            opt_val = val
            opt_act = action
    return opt_act

'''
Easy Q_learner Q_Function
'''
class Q_function():
    def __init__(self, actions = actions):
        # Q function
        self.Q = defaultdict(lambda: defaultdict(float))
        # number of visits
        self.N = defaultdict(lambda: defaultdict(float))
        self.actions = actions

    def Check(self,state,actions=None):
        if actions is None :
            actions = self.actions
        
        if state not in self.Q.keys():
            for action in actions:
                self.Q[state][action] = 0

    def Max(self,state):
        Q_maximum = np.max(list(self.Q[state].values()))
        return Q_maximum

    def Action(self,state,epsilon=0):
        if np.random.rand() < epsilon :
            idx = np.random.randint(len(actions))
            action = actions[idx]
        else :
            self.Check(state,actions)
            action = max(self.Q[state], key=self.Q[state].get)
        return action

    def Learn(self,state,action,reward,next_state,learning_rate=0.1,discount_factor=0.5):
        # Check if state,action and next_state are in Q
        self.Check(state)
        self.Check(next_state)
        self.N_update(state,action)

        dQ = reward \
            + discount_factor * self.Max(next_state) \
            - self.Q[state][action]
        self.Q[state][action] = self.Q[state][action] + learning_rate * dQ 
        
        return self.Q

    def N_update(self,state,action,actions=None):
        if actions is None :
            actions = self.actions
        
        if state not in self.N.keys():
            for action in actions:
                self.N[state][action] = 0 
        self.N[state][action] = self.N[state][action] + 1
        return self.N[state][action]

    def Print(self):
        for state in Q_fn.Q.keys():
            for action in Q_fn.Q[state].keys():
                print(state,action,Q_fn.N[state][action],Q_fn.Q[state][action])

In [104]:
# MaxWeight implementation
# Defined above: MaxWeight(state=Q_Size,actions=actions) 

Get_State = Get_Q_Size
Get_Reward = Get_Delay
Do_Action = Do_Action_RGA
sim_steps = 1
sim_length = 100

Vissim.Graphics.CurrentNetworkWindow.SetAttValue("QuickMode",False)
Vissim.Simulation.SetAttValue('UseMaxSimSpeed', True)
Vissim.Simulation.AttValue('UseAllCores')
delays = dict()
rewards = []
Queues = []

for _ in range(sim_length):
    if Vissim.Simulation.AttValue('SimSec') == 0.0 :
        for _ in range(sim_steps):
            Vissim.Simulation.RunSingleStep()
    Q_Size = Get_Q_Size()
    delays = Delay_Dictionary()
    state = Get_State()
    action = MaxWeight(Q_Size,actions)
    Do_Action()
    for _ in range(sim_steps):              # Take a few simulation steps
        Vissim.Simulation.RunSingleStep()
    reward = Get_Reward()              # Get the reward
    rewards.append(reward)
    Queues.append(Get_Total_Queue())
    print(np.mean(rewards),np.mean(Queues))
    display.clear_output(wait=True)

-31.464167868058816 -2.7


In [None]:
# Easy Q learner 
# -- 
'''
Set up the parameters of the optimization
'''


Get_State = Get_Q_Size
Get_Reward = Get_Total_Queue
Do_Action = Do_Action_RGA

Vissim.Graphics.CurrentNetworkWindow.SetAttValue("QuickMode",True)
#comment below gives lower resolution to simulation
#Vissim.Simulation.Stop()
#Vissim.Simulation.SetAttValue("SimRes",1)

Q_fn = Q_function(actions)

if __name__ == "__main__": 
    sars = []
    start_time = time.time()
    for iters in range(10000):
        # Take a few sets if at the start of simluation 
        # (This corrects a bug)
        if Vissim.Simulation.AttValue('SimSec') == 0.0 :
            for _ in range(sim_steps):
                Vissim.Simulation.RunSingleStep()

        state = Get_State(Lane_List, rounding)  # Get the current state
        action = Q_fn.Action(state,0.1)         # Get the current action
        Do_Action(action,Signal_Groups)         # Implement the action
        for _ in range(sim_steps):              # Take a few simulation steps
            Vissim.Simulation.RunSingleStep()
        reward = Get_Reward(state)              # Get the reward
        next_state = Get_State(Lane_List, rounding)     # Get next state
        Q_fn.Learn(state,action,reward,next_state)      # Apply Q-Learning
        sars.append([state,action,reward,next_state])    # Save data
        Q_fn.N_update(state,action)
        print(iters, int(time.time()-start_time))
        display.clear_output(wait=True)
        
        

In [None]:
# Pickle Q_Learner & SARS
'''
This save the Q-function (and the time of training)
'''
# now = str(datetime.datetime.now())
# now = now.replace(" ","")
# now = now.replace(":","-")
# pickle.dump(sars, open( "SARS"+now+".p", "wb" ))
# dill.dump(Q_fn, open( "Q"+now+".p", "wb" ))

In [None]:
# Easy Q learner - sars learner 
sars = pickle.load( open("SARS.p", "rb" ))
Q_fn_load = Q_function()

for state, action, reward, next_state in sars:
    Q_fn_load.Learn(state,action,reward,next_state) 
    Q_fn_load.N_update(state,action)



In [None]:
# First Vehicle Q-learner
# Good for light load

Get_State = Get_First_Vehicle
Get_Reward = Get_Delay
Do_Action = Do_Action_RGA
sim_steps = 20 # needs higher sim steps (I think).
rounding = 5.
Q_fn = Q_function(actions)

Vissim.Graphics.CurrentNetworkWindow.SetAttValue("QuickMode",True)
#comment below gives lower resolution to simulation
#Vissim.Simulation.Stop()
#Vissim.Simulation.SetAttValue("SimRes",1)


if __name__ == "__main__": 
    sars = []
    rewards = []
    start_time = time.time()
    delays = dict()
    for iters in range(10000):
        # Take a few sets if at the start of simluation 
        # (This corrects a bug)
        if Vissim.Simulation.AttValue('SimSec') == 0.0 :
            for _ in range(sim_steps):
                Vissim.Simulation.RunSingleStep()

        state = Get_State(Lane_List, rounding)  # Get the current state
        action = Q_fn.Action(state,0.1)         # Get the current action
        delays = Delay_Dictionary(delays)
        Do_Action(action,Signal_Groups)         # Implement the action
        for _ in range(sim_steps):              # Take a few simulation steps
            Vissim.Simulation.RunSingleStep()
        reward = Get_Reward(state)              # Get the reward
        rewards.append(reward)
        next_state = Get_State(Lane_List, rounding)     # Get next state
        Q_fn.Learn(state,action,reward,next_state)      # Apply Q-Learning
        sars.append([state,action,reward,next_state])    # Save data
        Q_fn.N_update(state,action)
        print(iters, int(time.time()-start_time), np.mean(rewards))
        display.clear_output(wait=True)
        
# dill.dump(Q_fn, open( "Q_First_Vehicle_Learner.p", "wb" ))

In [None]:
# Delay based Q-learner  

# First Vehicle Q-learner
# Good for light load

Get_State = Get_Q_Size
Get_Reward = Get_Delay

Vissim.Graphics.CurrentNetworkWindow.SetAttValue("QuickMode",True)
#comment below gives lower resolution to simulation
#Vissim.Simulation.Stop()
#Vissim.Simulation.SetAttValue("SimRes",1)


if __name__ == "__main__": 
    sars = []
    rewards = []
    start_time = time.time()
    delays = dict()
    for iters in range(10000):
        # Take a few sets if at the start of simluation 
        # (This corrects a bug)
        if Vissim.Simulation.AttValue('SimSec') == 0.0 :
            for _ in range(sim_steps):
                Vissim.Simulation.RunSingleStep()

        state = Get_State(Lane_List, rounding)  # Get the current state
        action = Q_fn.Action(state,0.1)         # Get the current action
        delays = Delay_Dictionary(delays)
        Do_Action(action,Signal_Groups)         # Implement the action
        for _ in range(sim_steps):              # Take a few simulation steps
            Vissim.Simulation.RunSingleStep()
        reward = Get_Reward(state)              # Get the reward
        rewards.append(reward)
        next_state = Get_State(Lane_List, rounding)     # Get next state
        Q_fn.Learn(state,action,reward,next_state)      # Apply Q-Learning
        sars.append([state,action,reward,next_state])    # Save data
        Q_fn.N_update(state,action)
        print(iters, int(time.time()-start_time), np.mean(rewards))
        display.clear_output(wait=True)

In [None]:
# Show current Q_function

Vissim.Graphics.CurrentNetworkWindow.SetAttValue("QuickMode",False)
Vissim.Simulation.SetAttValue('UseMaxSimSpeed', True)
Vissim.Simulation.AttValue('UseAllCores')

for _ in range(50):
    if Vissim.Simulation.AttValue('SimSec') == 0.0 :
        for _ in range(sim_steps):
            Vissim.Simulation.RunSingleStep()

    state = Get_State(Lane_List,rounding)
    action = Q_fn.Action(state,0.0)
    Do_Action(action,Signal_Groups)
    for _ in range(10):              # Take a few simulation steps
        Vissim.Simulation.RunSingleStep()

In [25]:
# Load Alvaro Parameters
from keras.models import load_model
model = load_model('Single_Cross_Straight_Episodes400_ProgramDQN_Agent0.h5')

## Network Model Parameters
Random_Seed = 42
model_name  = 'Single_Cross_Straight'
#vissim_working_directory = 'C:\\Users\\acabrejasegea\\OneDrive - The Alan Turing Institute\\Desktop\\ATI\\0_TMF\\MLforFlowOptimisation\\Vissim\\'
vissim_working_directory = 'C:\\Users\\nwalton\\OneDrive - The Alan Turing Institute\\Documents\\MLforFlowOptimisation\\Vissim\\'
program = 'DQN' # DQN, DuelingDQN, DDQN, DuelingDDQN
reward_type = 'Queues'
state_type  = 'Queues'
PER_activated = True
## Use of additional files?
flag_read_additionally  = False
## Load trained model?
Demo_Mode = False
load_trained = False
Quickmode = True
SaveResultsAgent = True
# Random demand
Random_Demand = False

## Data handling flags
# Flag for restarting the COM Server
reset_flag = True
#cache_flag = False
# If a fresh start is needed, all previous results from simulations are deleted
Start_Fresh = True
# Debug action
debug_action = False

## RL Hyperparamenters
# Number of simulations, save every "n" episodes and copy weights with frequency "f"
episodes = 100
partial_save_at = 100
copy_weights_frequency = 5
reset_frequency = 101

# Timesteps per simulation (1 timestep = 0.1 sec), length for random population is a multiple of episode
simulation_length = 3600*1 + 1
memory_population_length = simulation_length*5

## State-Action Parameters
state_size = 4
action_size = 5

# Hyperparameters
batch_size = 64
memory_size = 1000
alpha   = 0.0001
#alpha   = 0.001

gamma   = 0.95

# Exploration Schedule
epsilon_start = 1
epsilon_end   = 0.001
epsilon_decay = 1.2*(epsilon_end - epsilon_start)/(episodes-1)
#epsilon_decay = np.power(epsilon_end/epsilon_start, 1./(episodes+1)) # Geometric decay

# Demand Schedule
demands = [100, 200, 400, 600, 800, 1000]
# Session ID
Session_ID = 'Episodes'+str(episodes)+'_Program'+program

Using TensorFlow backend.


In [108]:
# ALvaro DQN agent
class DQNAgent:
    def __init__(self, state_size, action_size, ID, state_type, npa, memory_size, gamma, epsilon_start, epsilon_end, epsilon_decay, alpha, copy_weights_frequency, Vissim, PER_activated, DoubleDQN, Dueling):
        # Agent Junction ID and Controller ID
        self.signal_id = ID
        self.signal_controller = npa.signal_controllers[self.signal_id]
        
        # Number of states, action space and memory
        self.state_size = state_size
        self.action_size = action_size

        # Agent Hyperparameters
        self.gamma = gamma                    # discount rate
        self.epsilon = epsilon_start          # starting exploration rate
        self.epsilon_min = epsilon_end        # final exploration rate
        self.epsilon_decay = epsilon_decay    # decay of exploration rate
        self.learning_rate = alpha            # learning rate

        # Agent Architecture
        self.DoubleDQN = DoubleDQN            # Double Deep Q Network Flag
        self.Dueling = Dueling                # Dueling Q Networks Flag
        self.PER_activated = PER_activated    # Prioritized Experience Replay Flag

        # Model and target networks
        self.copy_weights_frequency = copy_weights_frequency    # Frequency to copy weights to target network
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.target_model.set_weights(self.model.get_weights())
        
        # Architecture Debug Messages
        if self.DoubleDQN:
            if self.Dueling:
                print("Deploying instance of Dueling Double Deep Q Learning Agent(s)")
            else:
                print("Deploying instance of Double Deep Q Learning Agent(s)")
        else:
            if self.Dueling:
                print("Deploying instance of Dueling Deep Q Learning Agent(s)")
            else:
                print("Deploying instance of Standard Deep Q Learning Agent(s)")

        # Initial Setup of S, A, R, S_
        self.state = np.reshape([0,0,0,0], [1,state_size])
        self.newstate = np.reshape([0,0,0,0], [1,state_size])
        self.action = 0
        self.reward = 0
        
        # Metrics Storage Initialization
        self.episode_reward = []
        self.loss = []

        if self.PER_activated:
            # If PER_activated spawn BinaryTree and Memory object to store priorities and experiences
            self.memory = PER.Memory(memory_size)
        else:
            # Else use the deque structure to only store experiences which will be sampled uniformly
            self.memory = deque(maxlen=memory_size)

    # Update the Junction IDs for the agent
    def update_IDS(self, ID, npa):
        self.signal_id = ID
        self.signal_controller = npa.signal_controllers[self.signal_id]
    
    # Agent Neural Network definition
    def _build_model(self):
        if self.Dueling:
            # Architecture for the Neural Net in the Dueling Deep Q-Learning Model
            #model = Sequential()
            input_layer = Input(shape = (self.state_size,))
            dense1 = Dense(24, input_dim=self.state_size, activation='relu')(input_layer)
            #dense2 = Dense(48, activation='relu')(dense1)
            #flatten = Flatten()(dense2)
            fc1 = Dense(48)(dense1)
            dueling_actions = Dense(self.action_size)(fc1)
            fc2 = Dense(48)(dense1)
            dueling_values = Dense(1)(fc2)

            def dueling_operator(duel_input):
                duel_v = duel_input[0]
                duel_a = duel_input[1]
                return (duel_v + (duel_a - K.mean(duel_a, axis = 1, keepdims = True)))

            policy = Lambda(dueling_operator, name = 'policy')([dueling_values, dueling_actions])
            model = Model(inputs=[input_layer], outputs=[policy])
            model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
            return(model)
        else:
            # Architecture for the Neural Net in Deep-Q learning Model (also Double version)
            model = Sequential()
            model.add(Dense(24, input_dim=self.state_size, activation='relu'))
            model.add(Dense(48, activation='relu'))
            model.add(Dense(self.action_size, activation='linear'))
            model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
            return model
    
    # Obtain the state based on different state definitions
    def get_state(self, state_type, state_size, Vissim):
        if state_type == 'Queues':
            #Obtain Queue Values (average value over the last period)
            West_Queue  = Vissim.Net.QueueCounters.ItemByKey(1).AttValue('QLen(Current,Last)')
            South_Queue = Vissim.Net.QueueCounters.ItemByKey(2).AttValue('QLen(Current,Last)')
            East_Queue  = Vissim.Net.QueueCounters.ItemByKey(3).AttValue('QLen(Current,Last)')
            North_Queue = Vissim.Net.QueueCounters.ItemByKey(4).AttValue('QLen(Current,Last)')
            state = [West_Queue, South_Queue, East_Queue, North_Queue]
            state = np.reshape(state, [1,state_size])
            return(state)
        elif state_type == 'Delay':
            # Obtain Delay Values (average delay in lane * nr cars in queue)
            West_Delay    = Vissim.Net.DelayMeasurements.ItemByKey(1).AttValue('VehDelay(Current,Last,All)') 
            West_Stopped  = Vissim.Net.QueueCounters.ItemByKey(1).AttValue('QStops(Current,Last)')
            South_Delay   = Vissim.Net.DelayMeasurements.ItemByKey(2).AttValue('VehDelay(Current,Last,All)') 
            South_Stopped = Vissim.Net.QueueCounters.ItemByKey(2).AttValue('QStops(Current,Last)')
            East_Delay    = Vissim.Net.DelayMeasurements.ItemByKey(3).AttValue('VehDelay(Current,Last,All)') 
            East_Stopped  = Vissim.Net.QueueCounters.ItemByKey(3).AttValue('QStops(Current,Last)')
            North_Delay   = Vissim.Net.DelayMeasurements.ItemByKey(4).AttValue('VehDelay(Current,Last,All)') 
            North_Stopped = Vissim.Net.QueueCounters.ItemByKey(4).AttValue('QStops(Current,Last)')
            
            pre_state = [West_Delay, South_Delay, East_Delay, North_Delay, West_Stopped, South_Stopped, East_Stopped, North_Stopped]
            pre_state = [0 if state is None else state for state in pre_state]
            
            state = [pre_state[0]*pre_state[4], pre_state[1]*pre_state[5], pre_state[2]*pre_state[6], pre_state[3]*pre_state[7]]
            state = np.reshape(state, [1,state_size])
            return(state)
        elif state_type == 'MaxFlow':
            pass
        elif state_type == 'FuelConsumption':
            pass
        elif state_type == 'NOx':
            pass
        elif state_type == "COM":
            pass
    
    # Add memory on the right, if over memory limit, pop leftmost item
    def remember(self, state, action, reward, next_state):
        if self.PER_activated:
            experience = (state, action, reward, next_state)
            self.memory.store(experience)
        else:
            self.memory.append((state, action, reward, next_state))
    
    # Choosing actions
    def act(self, state):
        if np.random.rand() <= self.epsilon:
            action = random.randrange(self.action_size) 
            self.signal_controller.SetAttValue('ProgNo', int(action+1))
            #print('Chosen Random Action {}'.format(action+1))
            return action
        else:
            act_values = self.model.predict(state)
            action = np.argmax(act_values[0]) 
            self.signal_controller.SetAttValue('ProgNo', int(action+1))
            #print('Chosen Not-Random Action {}'.format(action+1))
            return action  # returns action
    
    def get_reward(self):
        #reward = -np.absolute((self.newstate[0][0]-self.newstate[0][2])-(self.newstate[0][1]-self.newstate[0][3])) - 
        #reward = -np.sum(Agents[0].newstate[0])
        reward = -np.sum([0 if state is None else state for state in self.newstate[0]])
        #print(reward)

        self.episode_reward.append(reward)
        return reward
    
    def replay_single(self, batch_size, episode, loss):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state in minibatch:

            if self.DoubleDQN:
                next_action = np.argmax(self.target_model.predict(np.reshape(next_state,(1,self.state_size))), axis=1)
                target = reward + self.gamma * self.target_model.predict(np.reshape(next_state,(1,self.state_size)))[0][next_action]
            else:
                target = reward + self.gamma * np.max(self.target_model.predict(np.reshape(next_state,(1,self.state_size))))
                # No fixed targets version
                #target = reward + self.gamma * np.max(self.model.predict(np.reshape(next_state,(1,self.state_size))))

            target_f = self.model.predict(state)
            target_f[0][action] = target

            self.model.fit(state, target_f, epochs=1, verbose=0)
            self.loss.append(self.model.history.history['loss'][0])

        # Exploration rate decay
        if self.epsilon > self.epsilon_min:
            self.epsilon += self.epsilon_decay
        # Copy weights every 5 episodes
        if (episode+1) % self.copy_weights_frequency == 0 and episode != 0:
            self.copy_weights()   
   
    def replay_batch(self, batch_size, episode, loss):
        state_vector = []
        target_f_vector = []
        absolute_errors = [] 

        if self.PER_activated:
            tree_idx, minibatch, ISWeights_mb = self.memory.sample(batch_size)
            minibatch = [item[0] for item in minibatch]
            #return(minibatch)
        else:
            minibatch = random.sample(self.memory, batch_size)

        for state, action, reward, next_state in minibatch:
            if self.DoubleDQN:
                next_action = np.argmax(self.target_model.predict(np.reshape(next_state,(1,self.state_size))), axis=1)
                target = reward + self.gamma * self.target_model.predict(np.reshape(next_state,(1,self.state_size)))[0][next_action]
            else:
                # Fixed Q-Target
                target = reward + self.gamma * np.max(self.target_model.predict(np.reshape(next_state,(1,self.state_size))))
                # No fixed targets version
                #target = reward + self.gamma * np.max(self.model.predict(np.reshape(next_state,(1,self.state_size))))

            # This section incorporates the reward into the prediction and calculates the absolute error between old and new
            target_f = self.model.predict(state)
            absolute_errors.append(abs(target_f[0][action] - target)[0])
            target_f[0][action] = target

            state_vector.append(state[0])
            target_f_vector.append(target_f[0])

        state_matrix = np.asarray(state_vector)
        target_f_matrix = np.asarray(target_f_vector)

        self.model.fit(state_matrix, target_f_matrix, epochs=1, verbose=0)
        self.loss.append(self.model.history.history['loss'])

        if self.PER_activated:
            #Update priority
            self.memory.batch_update(tree_idx, absolute_errors)

        # Exploration rate decay
        if self.epsilon > self.epsilon_min:
            self.epsilon += self.epsilon_decay
        # Copy weights every 5 episodes
        if (episode+1) % self.copy_weights_frequency == 0 and episode != 0:
            self.copy_weights()   

    # Copy weights function
    def copy_weights(self):
        self.target_model.set_weights(self.model.get_weights())
        print("Weights succesfully copied to Target model.")  

In [109]:
# Load Alvaro DQN agent

import sys
sys.path.append("..")
from NParser import NetworkParser
import tensorflow as tf
from keras import backend as K
from keras.models import load_model, Sequential, Model
from keras.layers import merge, Dense, Input, Lambda
from keras.layers.core import Activation, Flatten
from keras.optimizers import Adam
import PER

npa = NetworkParser(Vissim)

Agents = [DQNAgent(state_size, action_size, ID, state_type, npa, memory_size, gamma, 0 if Demo_Mode else epsilon_start,\
                   epsilon_end, epsilon_decay, alpha, copy_weights_frequency, Vissim, PER_activated, DoubleDQN = False,\
               Dueling = True) for ID in npa.signal_controllers_ids]

from keras.models import load_model
model = load_model('Single_Cross_Straight_Episodes400_ProgramDQN_Agent0.h5')
Agents[0].model = model

Deploying instance of Dueling Deep Q Learning Agent(s)


In [110]:
# Run Alvaro DQN agent
import Simulator_Functions as SF 
import random

Get_Reward = Get_Delay

# Run a Single Episode for a set simulation length
def run_simulation_episode(Agents, Vissim, state_type, state_size, simulation_length, Demo_Mode, PER_activated):
    cycle_t = 0
    Vissim.Simulation.SetAttValue('SimRes', 1)
    #Vissim.Simulation.RunContinuous()
    rewards = []
    Queues = []
    for time_t in range(simulation_length):
        if cycle_t == 900:
            for agent in Agents:

                agent.newstate = agent.get_state(state_type, state_size, Vissim)
                agent.reward   = agent.get_reward()
                agent.remember(agent.state, agent.action, agent.reward, agent.newstate)
                agent.action = agent.act(agent.newstate)
                if Demo_Mode:
                    print('Agent Reward in this cycle is : {}'.format(round(agent.reward,2)))

                agent.state    = agent.newstate

            cycle_t = 0
        else:
            cycle_t += 1
        
        delays = Delay_Dictionary()
        reward = Get_Reward()              # Get the reward
        Q_Size = Get_Total_Queue()
        
        rewards.append(reward)
        Queues.append(Q_Size)
        
        print(np.mean(rewards),np.mean(Queues))
        display.clear_output(wait=True)
            
        # Advance the game to the next frame based on the action.
        Vissim.Simulation.RunSingleStep()
    # Stop the simulation    
    Vissim.Simulation.Stop()

    return np.mean(rewards), np.mean(Queues)

Vissim.Simulation.Stop()
simulation_length = 500
run_simulation_episode(Agents, Vissim, state_type, state_size, simulation_length, Demo_Mode, PER_activated)

(-25.033817739753996, -21.594)

## Debug

1. Make Actions a dictionary
2. No model load functionality in Alvaro code

In [111]:
# Load Alvaro Parameters
from keras.models import load_model
model = load_model('Single_Cross_Straight_Episodes400_ProgramDQN_Agent0.h5')

## Network Model Parameters
Random_Seed = 42
model_name  = 'Single_Cross_Straight'
#vissim_working_directory = 'C:\\Users\\acabrejasegea\\OneDrive - The Alan Turing Institute\\Desktop\\ATI\\0_TMF\\MLforFlowOptimisation\\Vissim\\'
vissim_working_directory = 'C:\\Users\\nwalton\\OneDrive - The Alan Turing Institute\\Documents\\MLforFlowOptimisation\\Vissim\\'
program = 'DQN' # DQN, DuelingDQN, DDQN, DuelingDDQN
reward_type = 'Queues'
state_type  = 'Queues'
PER_activated = True
## Use of additional files?
flag_read_additionally  = False
## Load trained model?
Demo_Mode = False
load_trained = False
Quickmode = True
SaveResultsAgent = True
# Random demand
Random_Demand = False

## Data handling flags
# Flag for restarting the COM Server
reset_flag = True
#cache_flag = False
# If a fresh start is needed, all previous results from simulations are deleted
Start_Fresh = True
# Debug action
debug_action = False

## RL Hyperparamenters
# Number of simulations, save every "n" episodes and copy weights with frequency "f"
episodes = 100
partial_save_at = 100
copy_weights_frequency = 5
reset_frequency = 101

# Timesteps per simulation (1 timestep = 0.1 sec), length for random population is a multiple of episode
simulation_length = 3600*1 + 1
memory_population_length = simulation_length*5

## State-Action Parameters
state_size = 4
action_size = 5

# Hyperparameters
batch_size = 64
memory_size = 1000
alpha   = 0.0001
#alpha   = 0.001

gamma   = 0.95

# Exploration Schedule
epsilon_start = 1
epsilon_end   = 0.001
epsilon_decay = 1.2*(epsilon_end - epsilon_start)/(episodes-1)
#epsilon_decay = np.power(epsilon_end/epsilon_start, 1./(episodes+1)) # Geometric decay

# Demand Schedule
demands = [100, 200, 400, 600, 800, 1000]
# Session ID
Session_ID = 'Episodes'+str(episodes)+'_Program'+program

In [112]:
# ALvaro DQN agent
class DQNAgent:
    def __init__(self, state_size, action_size, ID, state_type, npa, memory_size, gamma, epsilon_start, epsilon_end, epsilon_decay, alpha, copy_weights_frequency, Vissim, PER_activated, DoubleDQN, Dueling):
        # Agent Junction ID and Controller ID
        self.signal_id = ID
        self.signal_controller = npa.signal_controllers[self.signal_id]
        
        # Number of states, action space and memory
        self.state_size = state_size
        self.action_size = action_size

        # Agent Hyperparameters
        self.gamma = gamma                    # discount rate
        self.epsilon = epsilon_start          # starting exploration rate
        self.epsilon_min = epsilon_end        # final exploration rate
        self.epsilon_decay = epsilon_decay    # decay of exploration rate
        self.learning_rate = alpha            # learning rate

        # Agent Architecture
        self.DoubleDQN = DoubleDQN            # Double Deep Q Network Flag
        self.Dueling = Dueling                # Dueling Q Networks Flag
        self.PER_activated = PER_activated    # Prioritized Experience Replay Flag

        # Model and target networks
        self.copy_weights_frequency = copy_weights_frequency    # Frequency to copy weights to target network
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.target_model.set_weights(self.model.get_weights())
        
        # Architecture Debug Messages
        if self.DoubleDQN:
            if self.Dueling:
                print("Deploying instance of Dueling Double Deep Q Learning Agent(s)")
            else:
                print("Deploying instance of Double Deep Q Learning Agent(s)")
        else:
            if self.Dueling:
                print("Deploying instance of Dueling Deep Q Learning Agent(s)")
            else:
                print("Deploying instance of Standard Deep Q Learning Agent(s)")

        # Initial Setup of S, A, R, S_
        self.state = np.reshape([0,0,0,0], [1,state_size])
        self.newstate = np.reshape([0,0,0,0], [1,state_size])
        self.action = 0
        self.reward = 0
        
        # Metrics Storage Initialization
        self.episode_reward = []
        self.loss = []

        if self.PER_activated:
            # If PER_activated spawn BinaryTree and Memory object to store priorities and experiences
            self.memory = PER.Memory(memory_size)
        else:
            # Else use the deque structure to only store experiences which will be sampled uniformly
            self.memory = deque(maxlen=memory_size)

    # Update the Junction IDs for the agent
    def update_IDS(self, ID, npa):
        self.signal_id = ID
        self.signal_controller = npa.signal_controllers[self.signal_id]
    
    # Agent Neural Network definition
    def _build_model(self):
        if self.Dueling:
            # Architecture for the Neural Net in the Dueling Deep Q-Learning Model
            #model = Sequential()
            input_layer = Input(shape = (self.state_size,))
            dense1 = Dense(24, input_dim=self.state_size, activation='relu')(input_layer)
            #dense2 = Dense(48, activation='relu')(dense1)
            #flatten = Flatten()(dense2)
            fc1 = Dense(48)(dense1)
            dueling_actions = Dense(self.action_size)(fc1)
            fc2 = Dense(48)(dense1)
            dueling_values = Dense(1)(fc2)

            def dueling_operator(duel_input):
                duel_v = duel_input[0]
                duel_a = duel_input[1]
                return (duel_v + (duel_a - K.mean(duel_a, axis = 1, keepdims = True)))

            policy = Lambda(dueling_operator, name = 'policy')([dueling_values, dueling_actions])
            model = Model(inputs=[input_layer], outputs=[policy])
            model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
            return(model)
        else:
            # Architecture for the Neural Net in Deep-Q learning Model (also Double version)
            model = Sequential()
            model.add(Dense(24, input_dim=self.state_size, activation='relu'))
            model.add(Dense(48, activation='relu'))
            model.add(Dense(self.action_size, activation='linear'))
            model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
            return model
    
    # Obtain the state based on different state definitions
    def get_state(self, state_type, state_size, Vissim):
        if state_type == 'Queues':
            #Obtain Queue Values (average value over the last period)
            West_Queue  = Vissim.Net.QueueCounters.ItemByKey(1).AttValue('QLen(Current,Last)')
            South_Queue = Vissim.Net.QueueCounters.ItemByKey(2).AttValue('QLen(Current,Last)')
            East_Queue  = Vissim.Net.QueueCounters.ItemByKey(3).AttValue('QLen(Current,Last)')
            North_Queue = Vissim.Net.QueueCounters.ItemByKey(4).AttValue('QLen(Current,Last)')
            state = [West_Queue, South_Queue, East_Queue, North_Queue]
            state = np.reshape(state, [1,state_size])
            return(state)
        elif state_type == 'Delay':
            # Obtain Delay Values (average delay in lane * nr cars in queue)
            West_Delay    = Vissim.Net.DelayMeasurements.ItemByKey(1).AttValue('VehDelay(Current,Last,All)') 
            West_Stopped  = Vissim.Net.QueueCounters.ItemByKey(1).AttValue('QStops(Current,Last)')
            South_Delay   = Vissim.Net.DelayMeasurements.ItemByKey(2).AttValue('VehDelay(Current,Last,All)') 
            South_Stopped = Vissim.Net.QueueCounters.ItemByKey(2).AttValue('QStops(Current,Last)')
            East_Delay    = Vissim.Net.DelayMeasurements.ItemByKey(3).AttValue('VehDelay(Current,Last,All)') 
            East_Stopped  = Vissim.Net.QueueCounters.ItemByKey(3).AttValue('QStops(Current,Last)')
            North_Delay   = Vissim.Net.DelayMeasurements.ItemByKey(4).AttValue('VehDelay(Current,Last,All)') 
            North_Stopped = Vissim.Net.QueueCounters.ItemByKey(4).AttValue('QStops(Current,Last)')
            
            pre_state = [West_Delay, South_Delay, East_Delay, North_Delay, West_Stopped, South_Stopped, East_Stopped, North_Stopped]
            pre_state = [0 if state is None else state for state in pre_state]
            
            state = [pre_state[0]*pre_state[4], pre_state[1]*pre_state[5], pre_state[2]*pre_state[6], pre_state[3]*pre_state[7]]
            state = np.reshape(state, [1,state_size])
            return(state)
        elif state_type == 'MaxFlow':
            pass
        elif state_type == 'FuelConsumption':
            pass
        elif state_type == 'NOx':
            pass
        elif state_type == "COM":
            pass
    
    # Add memory on the right, if over memory limit, pop leftmost item
    def remember(self, state, action, reward, next_state):
        if self.PER_activated:
            experience = (state, action, reward, next_state)
            self.memory.store(experience)
        else:
            self.memory.append((state, action, reward, next_state))
    
    # Choosing actions
    def act(self, state):
        if np.random.rand() <= self.epsilon:
            action = random.randrange(self.action_size) 
            self.signal_controller.SetAttValue('ProgNo', int(action+1))
            #print('Chosen Random Action {}'.format(action+1))
            return action
        else:
            act_values = self.model.predict(state)
            action = np.argmax(act_values[0]) 
            self.signal_controller.SetAttValue('ProgNo', int(action+1))
            #print('Chosen Not-Random Action {}'.format(action+1))
            return action  # returns action
    
    def get_reward(self):
        #reward = -np.absolute((self.newstate[0][0]-self.newstate[0][2])-(self.newstate[0][1]-self.newstate[0][3])) - 
        #reward = -np.sum(Agents[0].newstate[0])
        reward = -np.sum([0 if state is None else state for state in self.newstate[0]])
        #print(reward)

        self.episode_reward.append(reward)
        return reward
    
    def replay_single(self, batch_size, episode, loss):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state in minibatch:

            if self.DoubleDQN:
                next_action = np.argmax(self.target_model.predict(np.reshape(next_state,(1,self.state_size))), axis=1)
                target = reward + self.gamma * self.target_model.predict(np.reshape(next_state,(1,self.state_size)))[0][next_action]
            else:
                target = reward + self.gamma * np.max(self.target_model.predict(np.reshape(next_state,(1,self.state_size))))
                # No fixed targets version
                #target = reward + self.gamma * np.max(self.model.predict(np.reshape(next_state,(1,self.state_size))))

            target_f = self.model.predict(state)
            target_f[0][action] = target

            self.model.fit(state, target_f, epochs=1, verbose=0)
            self.loss.append(self.model.history.history['loss'][0])

        # Exploration rate decay
        if self.epsilon > self.epsilon_min:
            self.epsilon += self.epsilon_decay
        # Copy weights every 5 episodes
        if (episode+1) % self.copy_weights_frequency == 0 and episode != 0:
            self.copy_weights()   
   
    def replay_batch(self, batch_size, episode, loss):
        state_vector = []
        target_f_vector = []
        absolute_errors = [] 

        if self.PER_activated:
            tree_idx, minibatch, ISWeights_mb = self.memory.sample(batch_size)
            minibatch = [item[0] for item in minibatch]
            #return(minibatch)
        else:
            minibatch = random.sample(self.memory, batch_size)

        for state, action, reward, next_state in minibatch:
            if self.DoubleDQN:
                next_action = np.argmax(self.target_model.predict(np.reshape(next_state,(1,self.state_size))), axis=1)
                target = reward + self.gamma * self.target_model.predict(np.reshape(next_state,(1,self.state_size)))[0][next_action]
            else:
                # Fixed Q-Target
                target = reward + self.gamma * np.max(self.target_model.predict(np.reshape(next_state,(1,self.state_size))))
                # No fixed targets version
                #target = reward + self.gamma * np.max(self.model.predict(np.reshape(next_state,(1,self.state_size))))

            # This section incorporates the reward into the prediction and calculates the absolute error between old and new
            target_f = self.model.predict(state)
            absolute_errors.append(abs(target_f[0][action] - target)[0])
            target_f[0][action] = target

            state_vector.append(state[0])
            target_f_vector.append(target_f[0])

        state_matrix = np.asarray(state_vector)
        target_f_matrix = np.asarray(target_f_vector)

        self.model.fit(state_matrix, target_f_matrix, epochs=1, verbose=0)
        self.loss.append(self.model.history.history['loss'])

        if self.PER_activated:
            #Update priority
            self.memory.batch_update(tree_idx, absolute_errors)

        # Exploration rate decay
        if self.epsilon > self.epsilon_min:
            self.epsilon += self.epsilon_decay
        # Copy weights every 5 episodes
        if (episode+1) % self.copy_weights_frequency == 0 and episode != 0:
            self.copy_weights()   

    # Copy weights function
    def copy_weights(self):
        self.target_model.set_weights(self.model.get_weights())
        print("Weights succesfully copied to Target model.")  

In [117]:
# Load Alvaro DQN agent

import sys
sys.path.append("..")
from NParser import NetworkParser
import tensorflow as tf
from keras import backend as K
from keras.models import load_model, Sequential, Model
from keras.layers import merge, Dense, Input, Lambda
from keras.layers.core import Activation, Flatten
from keras.optimizers import Adam
import PER

npa = NetworkParser(Vissim)

Agents = [DQNAgent(state_size, action_size, ID, state_type, npa, memory_size, gamma, 0 if Demo_Mode else epsilon_start,\
                   epsilon_end, epsilon_decay, alpha, copy_weights_frequency, Vissim, PER_activated, DoubleDQN = False,\
               Dueling = True) for ID in npa.signal_controllers_ids]

from keras.models import load_model
model = load_model('PER_DuelingDDQN_400ep_queues_fixed_linear/Single_Cross_Straight_Episodes400_ProgramDuelingDDQN_Agent0.h5')
Agents[0].model = model

Deploying instance of Dueling Deep Q Learning Agent(s)


In [118]:
# Run Alvaro DQN agent
import Simulator_Functions as SF 
import random

Get_Reward = Get_Delay

# Run a Single Episode for a set simulation length
def run_simulation_episode(Agents, Vissim, state_type, state_size, simulation_length, Demo_Mode, PER_activated):
    cycle_t = 0
    Vissim.Simulation.SetAttValue('SimRes', 1)
    #Vissim.Simulation.RunContinuous()
    rewards = []
    Queues = []
    for time_t in range(simulation_length):
        if cycle_t == 900:
            for agent in Agents:

                agent.newstate = agent.get_state(state_type, state_size, Vissim)
                agent.reward   = agent.get_reward()
                agent.remember(agent.state, agent.action, agent.reward, agent.newstate)
                agent.action = agent.act(agent.newstate)
                if Demo_Mode:
                    print('Agent Reward in this cycle is : {}'.format(round(agent.reward,2)))

                agent.state    = agent.newstate

            cycle_t = 0
        else:
            cycle_t += 1
        
        delays = Delay_Dictionary()
        reward = Get_Reward()              # Get the reward
        Q_Size = Get_Total_Queue()
        
        rewards.append(reward)
        Queues.append(Q_Size)
        
        print(np.mean(rewards),np.mean(Queues))
        display.clear_output(wait=True)
            
        # Advance the game to the next frame based on the action.
        Vissim.Simulation.RunSingleStep()
    # Stop the simulation    
    Vissim.Simulation.Stop()

    return np.mean(rewards), np.mean(Queues)

Vissim.Simulation.Stop()
simulation_length = 500
run_simulation_episode(Agents, Vissim, state_type, state_size, simulation_length, Demo_Mode, PER_activated)

(-25.033817739753996, -21.594)

In [1]:
Vissim.Net.SignalControllers.GetAll()[0]

NameError: name 'Vissim' is not defined