In [8]:
# for loading client
import win32com.client as com
import os
# standard libraries 
import numpy as np
import math
import time
from IPython import display
# For Q-function
from collections import defaultdict
# For saving
import datetime
import dill
import pickle

'''
This loads up a Vissim instance 
    -- Required: inpx file , layx file
    -- warning: quite flakey loading but once loaded okay
'''
def Load_Vissim(End_of_simulation = 10000, Quick_Mode=1):
    Vissim = None
    # Load Vissim
    Vissim = com.gencache.EnsureDispatch("Vissim.Vissim")
    # Load file
    Path_of_COM_Basic_Commands_network = 'C:\\Users\\Public\\Documents\\PTV Vision\\PTV Vissim 11\\Examples Training\\COM\\Basic Commands\\'
    inpx_Filename                = os.path.join(Path_of_COM_Basic_Commands_network, 'COM Basic Commands.inpx')
    flag_read_additionally  = False # you can read network(elements) additionally, in this case set "flag_read_additionally" to true
    Vissim.LoadNet(inpx_Filename, flag_read_additionally)
    # Load a Layout:
    layx_Filename = os.path.join(Path_of_COM_Basic_Commands_network, 'COM Basic Commands.layx')
    Vissim.LoadLayout(layx_Filename)
    # Configure non-GUI for training
    Vissim.Simulation.SetAttValue('UseMaxSimSpeed', True)
    Vissim.Simulation.AttValue('UseAllCores')
    Vissim.Graphics.CurrentNetworkWindow.SetAttValue("QuickMode",Quick_Mode)
    # Set a long simulation time
    Vissim.Simulation.SetAttValue('SimPeriod', End_of_simulation)
    return Vissim

# '''
# Set up Vissim and the parameters for optimizing
# (Worth commenting out after set up)
# if ERROR like ''has no attribute 'CLSIDToClassMap' ''
# DELETE folders:
# C:\Users\nwalton\AppData\Local\Temp\gen_py
# C:\Users\nwalton\AppData\Local\Temp\VISSIM
# Then Restart PC...
# '''

# Vissim = Load_Vissim()

In [31]:
# Get simulation parameters
'''
Set up the parameters of the optimization
'''

'''
makes a dictionary for all signals and thier positions
'''
def Get_Signal_Positions(Signal_Groups):
    Signal_Positions = dict()
    for SG in Signal_Groups:
        for SH in SG.SigHeads:
            Lane = SH.AttValue('Lane')
            Position = SH.AttValue('Pos')
            Signal_Positions[Lane] = Position
    return Signal_Positions

# Lanes for detection
Lane_List = ['3-1','3-2','17-1','17-2','8-1']
# which lane signals can be green at the same time
actions = [(1,1,1,0,0),\
            (0,1,1,1,0),\
            (0,1,0,1,1),\
            (1,1,0,0,1)]
# Define the Q-function
# Q_fn = Q_function(actions)

# round the state space
rounding = 10.
sim_steps = 100 # number of simulation steps before update
# set the load to be light
number_of_inputs = len(Vissim.Net.VehicleInputs)
new_volume = 100
for key in range(1,number_of_inputs+1):
    Vissim.Net.VehicleInputs.ItemByKey(key).SetAttValue('Volume(1)', new_volume)
    
# get the list of signal controllers
Signal_Controller = Vissim.Net.SignalControllers.GetAll()[0]
Signal_Groups = Signal_Controller.SGs.GetAll()
Signal_Positions = Get_Signal_Positions(Signal_Groups)

# These are states and rewards which are global variables 
# Assigned None for now

Q_Size = None # Queue sizes at junctions
delays = dict() # Total delay and change in delay for each vehicle

In [33]:
# Get state and reward info
'''
This code gets the average delay from 1000 steps under MaxWeight
'''
def Get_Q_Size(Lane_List=Lane_List, rounding=rounding):

    # initialize with zero queues
    Qsum = 0
    Q_sizes = dict.fromkeys(Lane_List)
    for key in Q_sizes.keys():
        Q_sizes[key]=0

    # initialize with zero numbers of non-waiting cars
    nonQsum = 0
    nonQ_sizes = dict.fromkeys(Lane_List)
    for key in nonQ_sizes.keys():
        nonQ_sizes[key]=0

    # get all Q lengths    
    All_Vehicles = Vissim.Net.Vehicles.GetAll() 
    for Veh in All_Vehicles:
        lane = Veh.AttValue('Lane')
        if lane in Lane_List : 
            if Veh.AttValue('InQueue') == 1 :
                Q_sizes[lane] += 1
            else : 
                nonQ_sizes[lane] += 1

    state = []

    for lane in Lane_List :
        state.append(math.ceil(Q_sizes[lane] / rounding))
        
    return tuple(state)

'''
state is now the closest vehicle to the junction
reward is now the total delay
'''


def Get_First_Vehicle(Lane_List=Lane_List, rounding=1.):
    All_Vehicles = Vissim.Net.Vehicles.GetAll()
    
    lane_state = dict()

    for cnt_Veh in range(len(All_Vehicles)):
        veh_position = All_Vehicles[cnt_Veh].AttValue('Pos')
        veh_lane = All_Vehicles[cnt_Veh].AttValue('Lane')

        if veh_lane in Signal_Positions.keys():

            rel_position = rounding * math.ceil((Signal_Positions[veh_lane] - veh_position) / rounding) 

            if  rel_position >= 0 :

                if veh_lane in lane_state.keys():    
                    if rel_position < lane_state[veh_lane]:
                        lane_state[veh_lane] = rel_position
                else :
                    lane_state[veh_lane] = rel_position
    
    state = []
    for lane in Lane_List:    
        if lane in lane_state.keys():
            state.append(lane_state[lane])
        else:
            state.append(np.nan)
    return tuple(state)


'''
Gets the delays of all vehicles in the network:
    -- dictionary keys are vehicle numbers
    -- 1st entry is delay
    -- 2nd entry is change in delay
'''
state = None

def Delay_Dictionary(Current_Dict=delays):
    # make sure current state is defined
    try:
        delays
    except NameError:
        delays = dict()

    Delay_Dict= dict()
    All_Vehicles = Vissim.Net.Vehicles.GetAll() # get all vehicles in the network at the actual simulation second
    for cnt_Veh in range(len(All_Vehicles)):
        veh_number      = All_Vehicles[cnt_Veh].AttValue('No')
        delay           = All_Vehicles[cnt_Veh].AttValue('DelayTm')  

        if veh_number in Current_Dict.keys():
            old_delay = Current_Dict[veh_number][0]
            Delay_Dict[veh_number] = [delay,delay-old_delay]
        else :
            Delay_Dict[veh_number] = [delay,0.]
    return Delay_Dict

'''
state is now the closest vehicle to the junction
reward is now the total delay
'''

def Get_Delay(delays=delays):
    total_delay = 0
    for key, val in delays.items():
        total_delay += val[1]
    return -total_delay

def Get_Total_Queue(Q_Size=Q_size):
    return -sum(Q_Size)

In [27]:
# Do an action
def Do_Action(action,Signal_Groups):
    for i, sg in enumerate(Signal_Groups):
        if action[i] == 1:
            new_state = "GREEN"
        else :
            new_state = "RED"
        sg.SetAttValue("SigState", new_state)

In [54]:
# Controllers / Learners
'''
MaxWeight
'''
def MaxWeight(state=Q_Size,actions=actions):
    opt_val = 0
    for action in actions : 
        val = np.dot(action,state)
        if val >= opt_val :
            opt_val = val
            opt_act = action
    return opt_act

'''
Easy Q_learner Q_Function
'''
class Q_function():
    def __init__(self, actions = actions):
        # Q function
        self.Q = defaultdict(lambda: defaultdict(float))
        # number of visits
        self.N = defaultdict(lambda: defaultdict(float))
        self.actions = actions

    def Check(self,state,actions=None):
        if actions is None :
            actions = self.actions
        
        if state not in self.Q.keys():
            for action in actions:
                self.Q[state][action] = 0

    def Max(self,state):
        Q_maximum = np.max(list(self.Q[state].values()))
        return Q_maximum

    def Action(self,state,epsilon=0):
        if np.random.rand() < epsilon :
            idx = np.random.randint(len(actions))
            action = actions[idx]
        else :
            self.Check(state,actions)
            action = max(self.Q[state], key=self.Q[state].get)
        return action

    def Learn(self,state,action,reward,next_state,learning_rate=0.1,discount_factor=0.5):
        # Check if state,action and next_state are in Q
        self.Check(state)
        self.Check(next_state)
        self.N_update(state,action)

        dQ = reward \
            + discount_factor * self.Max(next_state) \
            - self.Q[state][action]
        self.Q[state][action] = self.Q[state][action] + learning_rate * dQ 
        
        return self.Q

    def N_update(self,state,action,actions=None):
        if actions is None :
            actions = self.actions
        
        if state not in self.N.keys():
            for action in actions:
                self.N[state][action] = 0 
        self.N[state][action] = self.N[state][action] + 1
        return self.N[state][action]

    def Print(self):
        for state in Q_fn.Q.keys():
            for action in Q_fn.Q[state].keys():
                print(state,action,Q_fn.N[state][action],Q_fn.Q[state][action])

In [58]:
# MaxWeight implementation
# Defined above: MaxWeight(state=Q_Size,actions=actions) 

Get_State = Get_Q_Size
Get_Reward = Get_Delay

Vissim.Graphics.CurrentNetworkWindow.SetAttValue("QuickMode",False)
Vissim.Simulation.SetAttValue('UseMaxSimSpeed', True)
Vissim.Simulation.AttValue('UseAllCores')
delays = dict()
rewards = []
for _ in range(10):
    if Vissim.Simulation.AttValue('SimSec') == 0.0 :
        for _ in range(sim_steps):
            Vissim.Simulation.RunSingleStep()
    Q_size = Get_Q_Size(Lane_List, rounding)
    delays = Delay_Dictionary(delays)
    state = Get_State(Lane_List, rounding)
    action = MaxWeight(Q_size,actions)
    Do_Action(action,Signal_Groups)
    for _ in range(sim_steps):              # Take a few simulation steps
        Vissim.Simulation.RunSingleStep()
    reward = Get_Reward()              # Get the reward
    rewards.append(reward)
    print(np.mean(rewards))
    display.clear_output(wait=True)

0.0


In [34]:
# Easy Q learner 
# -- 
'''
Set up the parameters of the optimization
'''


Get_State = Get_Q_Size
Get_Reward = Get_Total_Queue

Vissim.Graphics.CurrentNetworkWindow.SetAttValue("QuickMode",True)
#comment below gives lower resolution to simulation
#Vissim.Simulation.Stop()
#Vissim.Simulation.SetAttValue("SimRes",1)

Q_fn = Q_function(actions)

if __name__ == "__main__": 
    sars = []
    start_time = time.time()
    for iters in range(10000):
        # Take a few sets if at the start of simluation 
        # (This corrects a bug)
        if Vissim.Simulation.AttValue('SimSec') == 0.0 :
            for _ in range(sim_steps):
                Vissim.Simulation.RunSingleStep()

        state = Get_State(Lane_List, rounding)  # Get the current state
        action = Q_fn.Action(state,0.1)         # Get the current action
        Do_Action(action,Signal_Groups)         # Implement the action
        for _ in range(sim_steps):              # Take a few simulation steps
            Vissim.Simulation.RunSingleStep()
        reward = Get_Reward(state)              # Get the reward
        next_state = Get_State(Lane_List, rounding)     # Get next state
        Q_fn.Learn(state,action,reward,next_state)      # Apply Q-Learning
        sars.append([state,action,reward,next_state])    # Save data
        Q_fn.N_update(state,action)
        print(iters, int(time.time()-start_time))
        display.clear_output(wait=True)
        
        

9999 1674


In [35]:
# Pickle Q_Learner & SARS
'''
This save the Q-function (and the time of training)
'''
now = str(datetime.datetime.now())
now = now.replace(" ","")
now = now.replace(":","-")
pickle.dump(sars, open( "SARS"+now+".p", "wb" ))
dill.dump(Q_fn, open( "Q"+now+".p", "wb" ))

In [55]:
# Easy Q learner - sars learner 
sars = pickle.load( open("SARS.p", "rb" ))
Q_fn_load = Q_function()

for state, action, reward, next_state in sars:
    Q_fn_load.Learn(state,action,reward,next_state) 
    Q_fn_load.N_update(state,action)



In [57]:
Q_fn_load.Print()

(0, 0, 0, 0, 0) (1, 1, 1, 0, 0) 5537 -0.6427781776479715
(0, 0, 0, 0, 0) (0, 1, 1, 1, 0) 8214 -0.6739121010482179
(0, 0, 0, 0, 0) (0, 1, 0, 1, 1) 10380 -0.6276343968464432
(0, 0, 0, 0, 0) (1, 1, 0, 0, 1) 7873 -0.5119867186993312
(0, 0, 0, 1, 0) (1, 1, 1, 0, 0) 347 -1.814467106979124
(0, 0, 0, 1, 0) (0, 1, 1, 1, 0) 3300 -1.4690427772484382
(0, 0, 0, 1, 0) (0, 1, 0, 1, 1) 2178 -1.5758603560601323
(0, 0, 0, 1, 0) (1, 1, 0, 0, 1) 295 -1.8529934902166978
(1, 0, 0, 0, 1) (1, 1, 1, 0, 0) 167 -2.9099556148013823
(1, 0, 0, 0, 1) (0, 1, 1, 1, 0) 138 -3.2459373987836107
(1, 0, 0, 0, 1) (0, 1, 0, 1, 1) 174 -2.822362404049054
(1, 0, 0, 0, 1) (1, 1, 0, 0, 1) 739 -2.4995378343144155
(0, 0, 0, 0, 1) (1, 1, 1, 0, 0) 263 -1.9129771456882716
(0, 0, 0, 0, 1) (0, 1, 1, 1, 0) 264 -1.8749185372346406
(0, 0, 0, 0, 1) (0, 1, 0, 1, 1) 948 -1.6526088311399376
(0, 0, 0, 0, 1) (1, 1, 0, 0, 1) 3415 -1.4121280135724399
(1, 0, 0, 0, 0) (1, 1, 1, 0, 0) 4049 -1.5978687883166183
(1, 0, 0, 0, 0) (0, 1, 1, 1, 0) 336 -1.85

In [98]:
# First Vehicle Q-learner
# Good for light load

Get_State = Get_First_Vehicle
Get_Reward = Get_Delay
sim_steps = 20 # needs higher sim steps (I think).
rounding = 5.
Q_fn = Q_function(actions)

Vissim.Graphics.CurrentNetworkWindow.SetAttValue("QuickMode",True)
#comment below gives lower resolution to simulation
#Vissim.Simulation.Stop()
#Vissim.Simulation.SetAttValue("SimRes",1)


if __name__ == "__main__": 
    sars = []
    rewards = []
    start_time = time.time()
    delays = dict()
    for iters in range(10000):
        # Take a few sets if at the start of simluation 
        # (This corrects a bug)
        if Vissim.Simulation.AttValue('SimSec') == 0.0 :
            for _ in range(sim_steps):
                Vissim.Simulation.RunSingleStep()

        state = Get_State(Lane_List, rounding)  # Get the current state
        action = Q_fn.Action(state,0.1)         # Get the current action
        delays = Delay_Dictionary(delays)
        Do_Action(action,Signal_Groups)         # Implement the action
        for _ in range(sim_steps):              # Take a few simulation steps
            Vissim.Simulation.RunSingleStep()
        reward = Get_Reward(state)              # Get the reward
        rewards.append(reward)
        next_state = Get_State(Lane_List, rounding)     # Get next state
        Q_fn.Learn(state,action,reward,next_state)      # Apply Q-Learning
        sars.append([state,action,reward,next_state])    # Save data
        Q_fn.N_update(state,action)
        print(iters, int(time.time()-start_time), np.mean(rewards))
        display.clear_output(wait=True)
        
# dill.dump(Q_fn, open( "Q_First_Vehicle_Learner.p", "wb" ))

KeyboardInterrupt: 

In [102]:
Vissim.Graphics.CurrentNetworkWindow.SetAttValue("QuickMode",False)
Vissim.Simulation.SetAttValue('UseMaxSimSpeed', True)
Vissim.Simulation.AttValue('UseAllCores')

for _ in range(50):
    if Vissim.Simulation.AttValue('SimSec') == 0.0 :
        for _ in range(sim_steps):
            Vissim.Simulation.RunSingleStep()

    state = Get_State(Lane_List,rounding)
    action = Q_fn.Action(state,0.0)
    Do_Action(action,Signal_Groups)
    for _ in range(10):              # Take a few simulation steps
        Vissim.Simulation.RunSingleStep()

In [102]:
Vissim.Graphics.CurrentNetworkWindow.SetAttValue("QuickMode",True)
Vissim.Simulation.SetAttValue('UseMaxSimSpeed', True)
Vissim.Simulation.AttValue('UseAllCores')

for _ in range(50):
    if Vissim.Simulation.AttValue('SimSec') == 0.0 :
        for _ in range(sim_steps):
            Vissim.Simulation.RunSingleStep()

    state = Get_State(Lane_List,rounding)
    action = Q_fn.Action(state,0.0)
    Do_Action(action,Signal_Groups)
    for _ in range(10):              # Take a few simulation steps
        Vissim.Simulation.RunSingleStep()

In [94]:
for _ in range(10):              # Take a few simulation steps
    Vissim.Simulation.RunSingleStep()

In [79]:
Q_fn.Print()

(0, 0, 0, 0, 0) (1, 1, 1, 0, 0) 3126 -1.0553189776624292
(0, 0, 0, 0, 0) (0, 1, 1, 1, 0) 1801 -3.718708558527273
(0, 0, 0, 0, 0) (0, 1, 0, 1, 1) 1954 -4.3204038861669645
(0, 0, 0, 0, 0) (1, 1, 0, 0, 1) 4223 -3.4327596182501097
(10.0, nan, nan, nan, nan) (1, 1, 1, 0, 0) 376.0 -32.280520009642146
(10.0, nan, nan, nan, nan) (0, 1, 1, 1, 0) 90.0 -36.6245321989089
(10.0, nan, nan, nan, nan) (0, 1, 0, 1, 1) 116.0 -27.124160827895118
(10.0, nan, nan, nan, nan) (1, 1, 0, 0, 1) 1314.0 -1.9710427017050007
(1, 0, 0, 0, 0) (1, 1, 1, 0, 0) 297 -24.522309166118823
(1, 0, 0, 0, 0) (0, 1, 1, 1, 0) 212 -24.459918843901804
(1, 0, 0, 0, 0) (0, 1, 0, 1, 1) 212 -24.181520723163544
(1, 0, 0, 0, 0) (1, 1, 0, 0, 1) 1981 -2.326945040166538
(nan, nan, nan, nan, nan) (1, 1, 1, 0, 0) 1928.0 -0.9223359939075648
(nan, nan, nan, nan, nan) (0, 1, 1, 1, 0) 1106.0 -0.9563402646561419
(nan, nan, nan, nan, nan) (0, 1, 0, 1, 1) 2644.0 -0.3966694241829665
(nan, nan, nan, nan, nan) (1, 1, 0, 0, 1) 9798.0 -0.9569658418286954

(10.0, nan, 0.0, nan, 10.0) (1, 1, 0, 0, 1) 0.0 0
(nan, 30.0, nan, nan, nan) (1, 1, 1, 0, 0) 16.0 -3.675152350197308
(nan, 30.0, nan, nan, nan) (0, 1, 1, 1, 0) 14.0 -3.2838186895851846
(nan, 30.0, nan, nan, nan) (0, 1, 0, 1, 1) 20.0 -3.686753494900705
(nan, 30.0, nan, nan, nan) (1, 1, 0, 0, 1) 88.0 -0.683743188859829
(10.0, nan, nan, 30.0, nan) (1, 1, 1, 0, 0) 6.0 -7.62909326886906
(10.0, nan, nan, 30.0, nan) (0, 1, 1, 1, 0) 6.0 -4.419233092115454
(10.0, nan, nan, 30.0, nan) (0, 1, 0, 1, 1) 2.0 -4.372897919799299
(10.0, nan, nan, 30.0, nan) (1, 1, 0, 0, 1) 4.0 -7.946299787760401
(10.0, nan, 10.0, 70.0, nan) (1, 1, 1, 0, 0) 4.0 -6.872712363457092
(10.0, nan, 10.0, 70.0, nan) (0, 1, 1, 1, 0) 4.0 -11.135893706840243
(10.0, nan, 10.0, 70.0, nan) (0, 1, 0, 1, 1) 6.0 -9.525999522007334
(10.0, nan, 10.0, 70.0, nan) (1, 1, 0, 0, 1) 6.0 -9.080691964775909
(10.0, nan, 10.0, 10.0, 10.0) (1, 1, 1, 0, 0) 0.0 0
(10.0, nan, 10.0, 10.0, 10.0) (0, 1, 1, 1, 0) 0.0 0
(10.0, nan, 10.0, 10.0, 10.0) (0, 1, 

(nan, nan, nan, nan, 3.0) (1, 1, 0, 0, 1) 0.0 0
(nan, nan, 20.0, nan, 2.0) (1, 1, 1, 0, 0) 0.0 0
(nan, nan, 20.0, nan, 2.0) (0, 1, 1, 1, 0) 0.0 0
(nan, nan, 20.0, nan, 2.0) (0, 1, 0, 1, 1) 0.0 0
(nan, nan, 20.0, nan, 2.0) (1, 1, 0, 0, 1) 0.0 0
(37.0, 68.0, 1.0, 1.0, nan) (1, 1, 1, 0, 0) 0.0 0
(37.0, 68.0, 1.0, 1.0, nan) (0, 1, 1, 1, 0) 0.0 0
(37.0, 68.0, 1.0, 1.0, nan) (0, 1, 0, 1, 1) 0.0 0
(37.0, 68.0, 1.0, 1.0, nan) (1, 1, 0, 0, 1) 0.0 0
(nan, nan, 1.0, 1.0, nan) (1, 1, 1, 0, 0) 0.0 0
(nan, nan, 1.0, 1.0, nan) (0, 1, 1, 1, 0) 0.0 0
(nan, nan, 1.0, 1.0, nan) (0, 1, 0, 1, 1) 0.0 0
(nan, nan, 1.0, 1.0, nan) (1, 1, 0, 0, 1) 0.0 0
(nan, 2.0, 1.0, nan, nan) (1, 1, 1, 0, 0) 0.0 0
(nan, 2.0, 1.0, nan, nan) (0, 1, 1, 1, 0) 0.0 0
(nan, 2.0, 1.0, nan, nan) (0, 1, 0, 1, 1) 0.0 0
(nan, 2.0, 1.0, nan, nan) (1, 1, 0, 0, 1) 0.0 0
(24.0, nan, 1.0, nan, nan) (1, 1, 1, 0, 0) 0.0 0
(24.0, nan, 1.0, nan, nan) (0, 1, 1, 1, 0) 0.0 0
(24.0, nan, 1.0, nan, nan) (0, 1, 0, 1, 1) 0.0 0
(24.0, nan, 1.0, nan, nan

(nan, nan, 17.0, nan, nan) (1, 1, 0, 0, 1) 0.0 0
(nan, 3.0, nan, 1.0, nan) (1, 1, 1, 0, 0) 0.0 0
(nan, 3.0, nan, 1.0, nan) (0, 1, 1, 1, 0) 0.0 0
(nan, 3.0, nan, 1.0, nan) (0, 1, 0, 1, 1) 0.0 0
(nan, 3.0, nan, 1.0, nan) (1, 1, 0, 0, 1) 0.0 0
(nan, nan, 31.0, nan, nan) (1, 1, 1, 0, 0) 0.0 0
(nan, nan, 31.0, nan, nan) (0, 1, 1, 1, 0) 0.0 0
(nan, nan, 31.0, nan, nan) (0, 1, 0, 1, 1) 0.0 0
(nan, nan, 31.0, nan, nan) (1, 1, 0, 0, 1) 0.0 0
(nan, nan, 14.0, nan, nan) (1, 1, 1, 0, 0) 0.0 0
(nan, nan, 14.0, nan, nan) (0, 1, 1, 1, 0) 0.0 0
(nan, nan, 14.0, nan, nan) (0, 1, 0, 1, 1) 0.0 0
(nan, nan, 14.0, nan, nan) (1, 1, 0, 0, 1) 0.0 0
(nan, nan, nan, 28.0, nan) (1, 1, 1, 0, 0) 0.0 0
(nan, nan, nan, 28.0, nan) (0, 1, 1, 1, 0) 0.0 0
(nan, nan, nan, 28.0, nan) (0, 1, 0, 1, 1) 0.0 0
(nan, nan, nan, 28.0, nan) (1, 1, 0, 0, 1) 0.0 0
(72.0, nan, 1.0, nan, nan) (1, 1, 1, 0, 0) 0.0 0
(72.0, nan, 1.0, nan, nan) (0, 1, 1, 1, 0) 0.0 0
(72.0, nan, 1.0, nan, nan) (0, 1, 0, 1, 1) 0.0 0
(72.0, nan, 1.0, nan, na

(37.0, 68.0, nan, 1.0, nan) (1, 1, 0, 0, 1) 0.0 0
(24.0, nan, nan, nan, nan) (1, 1, 1, 0, 0) 0.0 0
(24.0, nan, nan, nan, nan) (0, 1, 1, 1, 0) 0.0 0
(24.0, nan, nan, nan, nan) (0, 1, 0, 1, 1) 0.0 0
(24.0, nan, nan, nan, nan) (1, 1, 0, 0, 1) 0.0 0
(nan, nan, nan, 31.0, 2.0) (1, 1, 1, 0, 0) 0.0 0
(nan, nan, nan, 31.0, 2.0) (0, 1, 1, 1, 0) 0.0 0
(nan, nan, nan, 31.0, 2.0) (0, 1, 0, 1, 1) 0.0 0
(nan, nan, nan, 31.0, 2.0) (1, 1, 0, 0, 1) 0.0 0
(nan, nan, 20.0, 1.0, 1.0) (1, 1, 1, 0, 0) 0.0 0
(nan, nan, 20.0, 1.0, 1.0) (0, 1, 1, 1, 0) 0.0 0
(nan, nan, 20.0, 1.0, 1.0) (0, 1, 0, 1, 1) 0.0 0
(nan, nan, 20.0, 1.0, 1.0) (1, 1, 0, 0, 1) 0.0 0
(72.0, nan, nan, nan, nan) (1, 1, 1, 0, 0) 0.0 0
(72.0, nan, nan, nan, nan) (0, 1, 1, 1, 0) 0.0 0
(72.0, nan, nan, nan, nan) (0, 1, 0, 1, 1) 0.0 0
(72.0, nan, nan, nan, nan) (1, 1, 0, 0, 1) 0.0 0
(4.0, nan, nan, nan, 1.0) (1, 1, 1, 0, 0) 0.0 0
(4.0, nan, nan, nan, 1.0) (0, 1, 1, 1, 0) 0.0 0
(4.0, nan, nan, nan, 1.0) (0, 1, 0, 1, 1) 0.0 0
(4.0, nan, nan, nan, 1

(nan, nan, nan, 35.0, 1.0) (1, 1, 0, 0, 1) 0.0 0
(2.0, nan, 1.0, 51.0, nan) (1, 1, 1, 0, 0) 0.0 0
(2.0, nan, 1.0, 51.0, nan) (0, 1, 1, 1, 0) 0.0 0
(2.0, nan, 1.0, 51.0, nan) (0, 1, 0, 1, 1) 0.0 0
(2.0, nan, 1.0, 51.0, nan) (1, 1, 0, 0, 1) 0.0 0
(13.0, nan, nan, 2.0, nan) (1, 1, 1, 0, 0) 0.0 0
(13.0, nan, nan, 2.0, nan) (0, 1, 1, 1, 0) 0.0 0
(13.0, nan, nan, 2.0, nan) (0, 1, 0, 1, 1) 0.0 0
(13.0, nan, nan, 2.0, nan) (1, 1, 0, 0, 1) 0.0 0
(nan, 4.0, 2.0, nan, nan) (1, 1, 1, 0, 0) 0.0 0
(nan, 4.0, 2.0, nan, nan) (0, 1, 1, 1, 0) 0.0 0
(nan, 4.0, 2.0, nan, nan) (0, 1, 0, 1, 1) 0.0 0
(nan, 4.0, 2.0, nan, nan) (1, 1, 0, 0, 1) 0.0 0
(2.0, nan, nan, 1.0, 1.0) (1, 1, 1, 0, 0) 0.0 0
(2.0, nan, nan, 1.0, 1.0) (0, 1, 1, 1, 0) 0.0 0
(2.0, nan, nan, 1.0, 1.0) (0, 1, 0, 1, 1) 0.0 0
(2.0, nan, nan, 1.0, 1.0) (1, 1, 0, 0, 1) 0.0 0
(25.0, nan, nan, 2.0, nan) (1, 1, 1, 0, 0) 0.0 0
(25.0, nan, nan, 2.0, nan) (0, 1, 1, 1, 0) 0.0 0
(25.0, nan, nan, 2.0, nan) (0, 1, 0, 1, 1) 0.0 0
(25.0, nan, nan, 2.0, nan) (

(10.0, 40.0, nan, 60.0, 10.0) (1, 1, 1, 0, 0) 1 -5.994710633472489
(10.0, 40.0, nan, 60.0, 10.0) (0, 1, 1, 1, 0) 0 0
(10.0, 40.0, nan, 60.0, 10.0) (0, 1, 0, 1, 1) 0 0
(10.0, 40.0, nan, 60.0, 10.0) (1, 1, 0, 0, 1) 1 0
(nan, 20.0, nan, 10.0, nan) (1, 1, 1, 0, 0) 1 -4.000000000000057
(nan, 20.0, nan, 10.0, nan) (0, 1, 1, 1, 0) 2 -4.508078180638321
(nan, 20.0, nan, 10.0, nan) (0, 1, 0, 1, 1) 2 -0.3839721708260748
(nan, 20.0, nan, 10.0, nan) (1, 1, 0, 0, 1) 5 -0.7559942679863008
(nan, 50.0, nan, 10.0, nan) (1, 1, 1, 0, 0) 1 -5.410221901271564
(nan, 50.0, nan, 10.0, nan) (0, 1, 1, 1, 0) 4 -3.360496461307709
(nan, 50.0, nan, 10.0, nan) (0, 1, 0, 1, 1) 4 -2.6725793212793123
(nan, 50.0, nan, 10.0, nan) (1, 1, 0, 0, 1) 19 -2.635556452748905
(nan, nan, 20.0, 50.0, nan) (1, 1, 1, 0, 0) 4 -2.2042150587750156
(nan, nan, 20.0, 50.0, nan) (0, 1, 1, 1, 0) 3 -0.5320936909521112
(nan, nan, 20.0, 50.0, nan) (0, 1, 0, 1, 1) 2 -1.1477021272787364
(nan, nan, 20.0, 50.0, nan) (1, 1, 0, 0, 1) 3 -1.806646007652

(nan, 60.0, 60.0, nan, nan) (1, 1, 1, 0, 0) 1 -0.0016745647363166861
(nan, 60.0, 60.0, nan, nan) (0, 1, 1, 1, 0) 2 -0.001984100647228582
(nan, 60.0, 60.0, nan, nan) (0, 1, 0, 1, 1) 2 -0.003674621864334616
(nan, 60.0, 60.0, nan, nan) (1, 1, 0, 0, 1) 1 0
(40.0, nan, nan, nan, 0.0) (1, 1, 1, 0, 0) 2 -0.039084861857655306
(40.0, nan, nan, nan, 0.0) (0, 1, 1, 1, 0) 0 0
(40.0, nan, nan, nan, 0.0) (0, 1, 0, 1, 1) 0 0
(40.0, nan, nan, nan, 0.0) (1, 1, 0, 0, 1) 2 -0.08661002823706898
(20.0, nan, nan, 50.0, nan) (1, 1, 1, 0, 0) 1 -0.24991392830763215
(20.0, nan, nan, 50.0, nan) (0, 1, 1, 1, 0) 2 -0.3327865496964034
(20.0, nan, nan, 50.0, nan) (0, 1, 0, 1, 1) 2 -0.34406265935214875
(20.0, nan, nan, 50.0, nan) (1, 1, 0, 0, 1) 1 0
(40.0, 10.0, nan, nan, nan) (1, 1, 1, 0, 0) 2 -0.18421359741829968
(40.0, 10.0, nan, nan, nan) (0, 1, 1, 1, 0) 2 -0.049914218170296114
(40.0, 10.0, nan, nan, nan) (0, 1, 0, 1, 1) 1 -0.557615345462958
(40.0, 10.0, nan, nan, nan) (1, 1, 0, 0, 1) 1 0
(70.0, nan, nan, 40.0, n

In [None]:
# Delay based Q-learner  

# First Vehicle Q-learner
# Good for light load

Get_State = Get_Q_Size
Get_Reward = Get_Delay

Vissim.Graphics.CurrentNetworkWindow.SetAttValue("QuickMode",True)
#comment below gives lower resolution to simulation
#Vissim.Simulation.Stop()
#Vissim.Simulation.SetAttValue("SimRes",1)


if __name__ == "__main__": 
    sars = []
    rewards = []
    start_time = time.time()
    delays = dict()
    for iters in range(10000):
        # Take a few sets if at the start of simluation 
        # (This corrects a bug)
        if Vissim.Simulation.AttValue('SimSec') == 0.0 :
            for _ in range(sim_steps):
                Vissim.Simulation.RunSingleStep()

        state = Get_State(Lane_List, rounding)  # Get the current state
        action = Q_fn.Action(state,0.1)         # Get the current action
        delays = Delay_Dictionary(delays)
        Do_Action(action,Signal_Groups)         # Implement the action
        for _ in range(sim_steps):              # Take a few simulation steps
            Vissim.Simulation.RunSingleStep()
        reward = Get_Reward(state)              # Get the reward
        rewards.append(reward)
        next_state = Get_State(Lane_List, rounding)     # Get next state
        Q_fn.Learn(state,action,reward,next_state)      # Apply Q-Learning
        sars.append([state,action,reward,next_state])    # Save data
        Q_fn.N_update(state,action)
        print(iters, int(time.time()-start_time), np.mean(rewards))
        display.clear_output(wait=True)

In [None]:
class Q_function():
    def __init__(self, actions = None):
        # Q function
        self.Q = defaultdict(lambda: defaultdict(float))
        # number of visits
        self.N = defaultdict(lambda: defaultdict(float))
        self.actions = actions

    def Check(self,state,actions=None):
        if actions is None :
            actions = self.actions
        
        if state not in self.Q.keys():
            for action in actions:
                self.Q[state][action] = 0

    def Max(self,state):
        Q_maximum = np.max(list(self.Q[state].values()))
        return Q_maximum

    def Action(self,state,epsilon=0):
        if np.random.rand() < epsilon :
            idx = np.random.randint(len(actions))
            action = actions[idx]
        else :
            self.Check(state,actions)
            action = max(self.Q[state], key=self.Q[state].get)
        return action

    def Learn(self,state,action,reward,next_state,learning_rate=0.1,discount_factor=0.5):
        # Check if state,action and next_state are in Q
        self.Check(state)
        self.Check(next_state)
        self.N_update(state,action)

        dQ = reward \
            + discount_factor * self.Max(next_state) \
            - self.Q[state][action]
        self.Q[state][action] = self.Q[state][action] + learning_rate * dQ 
        
        return self.Q

    def N_update(self,state,action,actions=None):
        if actions is None :
            actions = self.actions
        
        if state not in self.N.keys():
            for action in actions:
                self.N[state][action] = 0 
        self.N[state][action] = self.N[state][action] + 1
        return self.N[state][action]

    def Print(self):
        for state in Q_fn.Q.keys():
            for action in Q_fn.Q[state].keys():
                print(state,action,Q_fn.N[state][action],Q_fn.Q[state][action])

In [None]:
#
'''
Set up the parameters of the optimization
'''

# Lanes for detection
Lane_List = ['3-1','3-2','17-1','17-2','8-1']
# which lane signals can be green at the same time
actions = [(1,1,1,0,0),\
            (0,1,1,1,0),\
            (0,1,0,1,1),\
            (1,1,0,0,1)]
# Define the Q-function
Q_fn = Q_function(actions)

# round the state space to the nearest 4 cars
rounding = 10.
sim_steps = 100 # number of simulation steps before update
# set the load to be light
number_of_inputs = len(Vissim.Net.VehicleInputs)
#new_volume = 100
# for key in range(1,number_of_inputs+1):
#    Vissim.Net.VehicleInputs.ItemByKey(key).SetAttValue('Volume(1)', new_volume)
    
# get the list of signal controllers
Signal_Controller = Vissim.Net.SignalControllers.GetAll()[0]
Signal_Groups = Signal_Controller.SGs.GetAll()
Signal_Positions = Get_Signal_Positions(Signal_Groups)

In [None]:
'''
This is the main training loop
(1000 iterations takes about 10 mins)
'''

# Ensure GUI is not running

Vissim.Graphics.CurrentNetworkWindow.SetAttValue("QuickMode",True)
#comment below gives lower resolution to simulation
#Vissim.Simulation.Stop()
#Vissim.Simulation.SetAttValue("SimRes",1)

delays = dict()
rewards = []

if __name__ == "__main__": 
    sars = []
    start_time = time.time()
    for iters in range(10000):
        # Take a few sets if at the start of simluation 
        # (This corrects a bug)
        if Vissim.Simulation.AttValue('SimSec') == 0.0 :
            for _ in range(sim_steps):
                Vissim.Simulation.RunSingleStep()

        state = Get_State(Lane_List, rounding)  # Get the current state
        delays = Delay_Dictionary(delays)
        action = Q_fn.Action(state,0.1)         # Get the current action
        Do_Action(action,Signal_Groups)         # Implement the action
        for _ in range(sim_steps):              # Take a few simulation steps
            Vissim.Simulation.RunSingleStep()
        reward = Get_Reward(state,delays)              # Get the reward
        rewards.append(reward)
        next_state = Get_State(Lane_List, rounding)     # Get next state
        Q_fn.Learn(state,action,reward,next_state)      # Apply Q-Learning
        sars.append([state,action,reward,next_state])    # Save data
        Q_fn.N_update(state,action)
        print(iters, int(time.time()-start_time),np.mean(rewards))
        if iters % 1000 == 0:
            rewards = []
        display.clear_output(wait=True)

In [None]:
'''
This is the main training loop
(1000 iterations takes about 10 mins)
'''

# Ensure GUI is not running

Vissim.Graphics.CurrentNetworkWindow.SetAttValue("QuickMode",True)
#comment below gives lower resolution to simulation
#Vissim.Simulation.Stop()
#Vissim.Simulation.SetAttValue("SimRes",1)

delays = dict()
rewards = []

if __name__ == "__main__": 
    sars = []
    start_time = time.time()
    for iters in range(10000):
        # Take a few sets if at the start of simluation 
        # (This corrects a bug)
        if Vissim.Simulation.AttValue('SimSec') == 0.0 :
            for _ in range(sim_steps):
                Vissim.Simulation.RunSingleStep()

        state = Get_Q_Size(Lane_List,rounding)  # Get the current state
        delays = Delay_Dictionary(delays)
        action = Q_fn.Action(state,0.1)         # Get the current action
        Do_Action(action,Signal_Groups)         # Implement the action
        for _ in range(sim_steps):              # Take a few simulation steps
            Vissim.Simulation.RunSingleStep()
        reward = Get_Reward(state,delays)              # Get the reward
        rewards.append(reward)
        next_state = Get_State(Lane_List, 1.)     # Get next state
        Q_fn.Learn(state,action,reward,next_state)      # Apply Q-Learning
        sars.append([state,action,reward,next_state])    # Save data
        Q_fn.N_update(state,action)
        print(iters, int(time.time()-start_time),np.mean(rewards))
        if iters % 1000 == 0:
            rewards = []
        display.clear_output(wait=True)

In [None]:
'''
This save the Q-function (and the time of training)
'''
now = str(datetime.datetime.now())
now = now.replace(" ","")
now = now.replace(":","-")
#pickle.dump(sars, open( "save2.p", "wb" ))
dill.dump(Q_fn, open( "Q"+now+".p", "wb" ))

In [None]:
dill.dump(sars, open( "SARS"+now+".p", "wb" ))

In [None]:
'''
Print the Current Q-value
'''
Q_fn.Print()

In [None]:
for _ in range(sim_steps):              # Take a few simulation steps
    Vissim.Simulation.RunSingleStep()

In [86]:
Vissim.Graphics.CurrentNetworkWindow.SetAttValue("QuickMode",False)
Vissim.Simulation.SetAttValue('UseMaxSimSpeed', True)
Vissim.Simulation.AttValue('UseAllCores')
for _ in range(50):
    state = Get_State(Lane_List,rounding)
    action = Q_fn.Action(state,0.0)
    Do_Action(action,Signal_Groups)
    for _ in range(10):              # Take a few simulation steps
        Vissim.Simulation.RunSingleStep()

com_error: (-2147023170, 'The remote procedure call failed.', None, None)

In [None]:
'''
finish the session
'''
#Vissim = None

In [None]:
Get_Q_Size(Lane_List,rounding)

In [None]:
state

## Debug

In [None]:
def Get_Q_Size(Lane_List, rounding):

    # initialize with zero queues
    Qsum = 0
    Q_sizes = dict.fromkeys(Lane_List)
    for key in Q_sizes.keys():
        Q_sizes[key]=0

    # initialize with zero numbers of non-waiting cars
    nonQsum = 0
    nonQ_sizes = dict.fromkeys(Lane_List)
    for key in nonQ_sizes.keys():
        nonQ_sizes[key]=0

    # get all Q lengths    
    All_Vehicles = Vissim.Net.Vehicles.GetAll() 
    for Veh in All_Vehicles:
        lane = Veh.AttValue('Lane')
        if lane in Lane_List : 
            if Veh.AttValue('InQueue') == 1 :
                Q_sizes[lane] += 1
            else : 
                nonQ_sizes[lane] += 1

    state = []

    for lane in Lane_List :
        state.append(math.ceil(Q_sizes[lane] / rounding))
        
    return tuple(state)

In [None]:
def MaxWeight(state,actions):
    opt_val = 0
    for action in actions : 
        val = np.dot(action,state)
        if val >= opt_val :
            opt_val = val
            opt_act = action
    return opt_act

In [None]:
for _ in range(sim_steps):              # Take a few simulation steps
    Vissim.Simulation.RunSingleStep()

In [None]:
Vissim.Graphics.CurrentNetworkWindow.SetAttValue("QuickMode",True)
Vissim.Simulation.SetAttValue('UseMaxSimSpeed', True)
Vissim.Simulation.AttValue('UseAllCores')
delays = dict()
rewards = []
for _ in range(1000):
    Q_size = Get_Q_Size(Lane_List, rounding)
    delays = Delay_Dictionary(delays)
    state = Get_State(Lane_List, rounding)
    action = MaxWeight(Q_size,actions)
    Do_Action(action,Signal_Groups)
    for _ in range(sim_steps):              # Take a few simulation steps
        Vissim.Simulation.RunSingleStep()
    reward = Get_Reward(state,delays)              # Get the reward
    rewards.append(reward)
    print(np.mean(rewards))
    display.clear_output(wait=True)

In [None]:
Get_State(Lane_List, rounding)

In [None]:
for _ in range(sim_steps):              # Take a few simulation steps
    Vissim.Simulation.RunSingleStep()

In [None]:
for key in range(1,number_of_inputs+1):
    print(Vissim.Net.VehicleInputs.ItemByKey(key).AttValue('Volume(1)'))