In [1]:
## VISSIM Libraries
import win32com.client as com
import os

## RL Libraries
import tensorflow as tf
if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
    print("ERROR: GPU DEVICE NOT FOUND.")
from keras import backend as K
from keras.models import load_model
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

## Data Management Libraries
import pickle
from collections import deque

## Other Libraries
import numpy as np
import random
import matplotlib.pyplot as plt
%matplotlib inline

Default GPU Device: /device:GPU:0


Using TensorFlow backend.


In [2]:
## Network Model Parameters
Random_Seed = 42
model_name  = 'Single_Cross_Straight'
vissim_working_directory = 'C:\\Users\\acabrejasegea\\OneDrive - The Alan Turing Institute\\Desktop\\ATI\\0_TMF\\MLforFlowOptimisation\\Vissim\\'
program = 'DQN'
reward_type = 'Delay'
state_type  = 'Queues' 
## Use of additional files?
flag_read_additionally  = False
## Load trained model?
load_trained = False
Quickmode = True
# Random demand
Random_Demand = True

In [3]:
## Data handling flags
# Flag for restarting the COM Server
reset_flag = True
# If a fresh start is needed, all previous results from simulations are deleted
Start_Fresh = True
# Debug action
debug_action = False

In [4]:
## State-Action Parameters
state_size = 4
action_size = 5

In [5]:
## RL Hyperparamenters
# Number of simulations
episodes = 1000
# Timesteps per simulation (1 timestep = 0.1 sec)
simulation_length = 36000*5
# Memory Size
memory_size = 1000
# Learning Rate
alpha   = 0.9
# Discount Factor
gamma   = 0.3
# Exploration Schedule
epsilon_start = 1
epsilon_end   = 0.01
epsilon_decay = 0.955
# Demand Schedule
demands = [100,200, 400, 600, 800, 1000, 1200]

In [6]:
## Basic User Defined Functions
# Function to convert a nested tuple to a nested list
def toList(NestedTuple):
    return list(map(toList, NestedTuple)) if isinstance(NestedTuple, (list, tuple)) else NestedTuple
print ('Loaded user defined functions')

Loaded user defined functions


In [7]:
## DQN Agent Class
# To access memory of agent i for data about time t:
# s_t     = Agents[i].memory[t][0]
# a_t     = Agents[i].memory[t][1]
# r_t     = Agents[i].memory[t][2]
# s_(t+1) = Agents[i].memory[t][3]

class DQNAgent:
    def __init__(self, state_size, action_size, ID):
        self.signal_id = ID
        self.signal_controller = npa.signal_controllers[self.signal_id]
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=memory_size)
        self.gamma = gamma                    # discount rate
        self.epsilon = epsilon_start          # starting exploration rate
        self.epsilon_min = epsilon_end        # final exploration rate
        self.epsilon_decay = epsilon_decay    # decay of exploration rate
        self.learning_rate = alpha            # learning rate
        self.model = self._build_model()
        
        self.state = np.reshape([0,0,0,0], [1,4])
        self.newstate = np.reshape([0,0,0,0], [1,4])
        self.action = 0
        self.reward = 0
        
        self.episode_reward = []
        
    def update_IDS(self, ID):
        self.signal_id = ID
        self.signal_controller = npa.signal_controllers[self.signal_id]
    
    # DNN definition
    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(48, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        return model
    
    # Obtain the state based on different state definitions
    def get_state(self, state_type = state_type):
        if state_type == 'Queues':
            #Obtain Queue Values (average value over the last period)
            East_Queue  = Vissim.Net.QueueCounters.ItemByKey(1).AttValue('QLen(Current,Last)')
            South_Queue = Vissim.Net.QueueCounters.ItemByKey(2).AttValue('QLen(Current,Last)')
            West_Queue  = Vissim.Net.QueueCounters.ItemByKey(3).AttValue('QLen(Current,Last)')
            North_Queue = Vissim.Net.QueueCounters.ItemByKey(4).AttValue('QLen(Current,Last)')
            state = [East_Queue, South_Queue, West_Queue, North_Queue]
            state = np.reshape(state, [1,4])
            return(state)
        elif state_type == 'Delay':
            pass
        elif state_type == 'MaxFlow':
            pass
        elif state_type == 'FuelConsumption':
            pass
        elif state_type == 'NOx':
            pass
        elif state_type == "COM":
            pass
    
    # Add memory on the right, if over memory limit, pop leftmost item
    def remember(self, state, action, reward, next_state):
        self.memory.append((state, action, reward, next_state))
        return(self.memory)
    
    # Choosing actions
    def act(self, state):
        if np.random.rand() <= self.epsilon:
            action = random.randrange(self.action_size) 
            self.signal_controller.SetAttValue('ProgNo', int(action+1))
            #print('Chosen Random Action {}'.format(action+1))
            return action
        act_values = self.model.predict(state)
        action = np.argmax(act_values[0]) 
        self.signal_controller.SetAttValue('ProgNo', int(action+1))
        #print('Chosen Not-Random Action {}'.format(action+1))
        return action  # returns action
    
    def get_reward(self):
        reward = -np.absolute((self.newstate[0][0]-self.newstate[0][2])-(self.newstate[0][1]-self.newstate[0][3]))
        self.episode_reward.append(reward)
        return reward
    
    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * \
                       np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [8]:
## Network Parser (Crawler) class definition
class NetworkParser:
    
    ######################################################################################################################
    ## Nested data structure:
    ## 
    ## Signal Controllers = signal_controllers[signal_controller_ids]
    ## Signal Groups      = signal_groups     [signal_controller_ids] [signal_group_id]
    ## Signal Heads       = signal_heads      [signal_controller_ids] [signal_heads_id]
    ## Lanes              = lanes             [signal_controller_ids] [signal_heads_id] [lane_id]
    ##
    ######################################################################################################################
    ##
    ## Accessing attributes:
    ##
    ## AttValue('AttName(X,Y,bla)')
    ##
    ## X = Simulation Number.      Values: 1,2,3.. 'Current' [single case], Avg, StdDev, Min, Max [over several sims]
    ## Y = Time Interval Number    Values: 1,2,3, 'Current', 'Last', Avg, StdDev, Min, Max, Total
    ## All = All vehicle classes   Values: 10, 20, All
    ######################################################################################################################

    def __init__(self):
        ## Get all SignalControllers
        self.signal_controllers     = toList(Vissim.Net.SignalControllers.GetAll())
        self.signal_controllers_ids = range(len(self.signal_controllers)) #Vissim count starts at 1
                 
        ## Create SignalGroupContainers and unpack the SignalGroups into a list by SignalController
        self.signal_groups = [[] for _ in self.signal_controllers_ids]
        for SC in self.signal_controllers_ids:
            for SG in range(1,self.signal_controllers[SC].SGs.Count+1):
                self.signal_groups[SC].append(self.signal_controllers[SC].SGs.ItemByKey(SG))
                
        ## Create SignalHeadsCollection and unpack the SignalHeads into a list by SignalController
        self.signal_heads = [[] for _ in self.signal_controllers_ids]
        for SC in self.signal_controllers_ids:
            for SG in range(self.signal_controllers[SC].SGs.Count):
                self.signal_heads[SC].append(toList(self.signal_groups[SC][SG].SigHeads.GetAll())[0])
                
        self.lanes = [[[] for b in range(len(self.signal_heads[a])) ] for a in self.signal_controllers_ids]
        for SC in self.signal_controllers_ids:
            for SH in range(len(self.signal_heads[SC])):
                self.lanes[SC][SH].append(self.signal_heads[SC][SH].Lane)

In [9]:
def COMServerDispatch(reset_flag):
    ## Connecting the COM Server => Open a new Vissim Window:
    # Server should only be dispatched in first run. Otherwise reload model.
    # Setting Working Directory
    vissim_working_directory = 'C:\\Users\\acabrejasegea\\OneDrive - The Alan Turing Institute\\Desktop\\ATI\\0_TMF\\MLforFlowOptimisation\\Vissim\\'
    print ('Working Directory set to: ' + vissim_working_directory)
    # Check Chache
    try:
        print ('Checking Presence of Pregenerated Cache.')
        cache_flag
    # Re-generate Cache
    except:
        print ('Cache NOT Present.')
        print ('Generating Cache...')
        Vissim = com.gencache.EnsureDispatch("Vissim.Vissim") 
        print ('Cache generated.\n')
        cache_flag = True
        print ('****************************')
        print ('*   COM Server dispatched  *')
        print ('****************************\n')
    # Dispatch without re-generating Cache.
    else:
        print ('Previous Cache Found. Dispatching...\n')
        Vissim = com.Dispatch("Vissim.Vissim")
        print ('****************************')
        print ('*   COM Server dispatched  *')
        print ('****************************\n')

    ## Load the Network:
    Filename = os.path.join(vissim_working_directory, model_name, (model_name+'.inpx'))
    print ('Model File: ' + model_name+'.inpx')

    # Additional Files
    if flag_read_additionally == False:
        print ('No additional files will be loaded')
    print ('Loading...')
    Vissim.LoadNet(Filename, flag_read_additionally)
    print ('Load process successful')

    ## Setting Simulation End
    Vissim.Simulation.SetAttValue('SimPeriod', simulation_length)
    print ('Simulation length set to '+str(simulation_length/10) + ' seconds.')
    
    ## If a fresh start is needed
    if reset_flag == True:
        if Start_Fresh == True:
            # Delete all previous simulation runs first:
            for simRun in Vissim.Net.SimulationRuns:
                Vissim.Net.SimulationRuns.RemoveSimulationRun(simRun)
            print ('Results from Previous Simulations: Deleted. Fresh Start Available.')

    #Pre-fetch objects for stability
    Simulation = Vissim.Simulation
    print ('Fetched and containerized Simulation Object')
    Network = Vissim.Net
    print ('Fetched and containerized Network Object \n')
    print ('*******************************************************')
    print ('*                                                     *')
    print ('*                 SETUP COMPLETE                      *')
    print ('*                                                     *')
    print ('*******************************************************\n')
    return(Vissim,Simulation,Network, cache_flag)

def COMServerReload(Vissim, reset_flag):
    ## Connecting the COM Server => Open a new Vissim Window:
    # Server should only be dispatched in first run. Otherwise reload model.
    # Setting Working Directory
    vissim_working_directory = 'C:\\Users\\acabrejasegea\\OneDrive - The Alan Turing Institute\\Desktop\\ATI\\0_TMF\\MLforFlowOptimisation\\Vissim\\'
    ## Load the Network:
    Filename = os.path.join(vissim_working_directory, model_name, (model_name+'.inpx'))

    print('Reoading...')
    Vissim.LoadNet(Filename, flag_read_additionally)

    ## Setting Simulation End
    Vissim.Simulation.SetAttValue('SimPeriod', simulation_length)

    ## If a fresh start is needed
    if reset_flag == True:
        if Start_Fresh == True:
            # Delete all previous simulation runs first:
            for simRun in Vissim.Net.SimulationRuns:
                Vissim.Net.SimulationRuns.RemoveSimulationRun(simRun)
    
    #Pre-fetch objects for stability
    Simulation = Vissim.Simulation
    Network = Vissim.Net
    return(Simulation,Network)

In [11]:
if __name__ == "__main__":
    # Initialize simulation
    if 'Vissim' not in globals() or Vissim == None:
        Vissim, Simulation, Network, cache_flag = COMServerDispatch(reset_flag = True)
    else:
        Simulation, Network = COMServerReload(Vissim, reset_flag = True)
        
    # Setting Random Seed
    Vissim.Simulation.SetAttValue('RandSeed', Random_Seed)
    print ('Random seed set in simulator. Random Seed = '+str(Random_Seed))

    # Deploy Network Parser (crawl network)
    npa = NetworkParser()
    print('NetworkParser has succesfully crawled the model network.')
    
    # Initialize agents
    Agents = [DQNAgent(state_size, action_size, ID) for ID in npa.signal_controllers_ids] 
    
    # Load previous trained data
    if load_trained:
        print('Loading Pre-Trained Data')
        for index, agent in enumerate(Agents):
            Filename = os.path.join(vissim_working_directory, model_name, 'Agent'+str(index)+'_'+model_name+'.h5')
            agent.model = load_model(Filename)
    
    # Iterations of the simulation
    for e in range(episodes):
        done = False
        # If not the first episode, reset state at the start
        if e != 0:
            Simulation, Network = COMServerReload(Vissim, reset_flag = False)
            npa = NetworkParser() 
            for index, agent in enumerate(Agents):
                agent.update_IDS(npa.signal_controllers_ids[index])
                agent.episode_reward = []
        
        # Change demand for every episode
        if Random_Demand:
            for vehicle_input in range(1,5):
                Vissim.Net.VehicleInputs.ItemByKey(vehicle_input).SetAttValue('Volume(1)', demands[np.random.randint(0,6)])
        
        # Use max speed for Simulator
        if Quickmode:
            # Set speed parameters in Vissim
            Vissim.Simulation.SetAttValue('UseMaxSimSpeed', True)
            Vissim.Graphics.CurrentNetworkWindow.SetAttValue("QuickMode",1)
            Vissim.SuspendUpdateGUI()

        # Set cycle time to start of cycle
        cycle_t = 0

        # time_t represents each timestep of the simulation
        for time_t in range(simulation_length+1):
            # If the cycle for the current program is over
            if cycle_t == 900:
                for agent in Agents:
                    agent.newstate = agent.get_state()
                    agent.action   = agent.act(agent.newstate)
                    agent.reward   = agent.get_reward()
                    agent.memory   = agent.remember(agent.state, agent.action, agent.reward, agent.newstate)
                    agent.state    = agent.newstate
                cycle_t = 0
            else:
                cycle_t += 1

            # Advance the game to the next frame based on the action.
            Vissim.Simulation.RunSingleStep()

        # Stop the simulation    
        Vissim.Simulation.Stop()

        # Calculate episode average reward
        average_reward = []
        for agent in Agents:
            average_agent_reward = np.average(agent.episode_reward)
            average_reward.append(average_agent_reward)
        average_reward = np.average(average_reward)

        if len(Agents)>1:
            # Print the score and break out of the loop
            print("Episode: {}/{}, Average reward: {}".format(e+1, episodes, average_reward))
            for agent in enumerate(Agents):
                print("Agent {}, Average agent reward: {}".format(agent, average_agent_reward[agent]))
        else:
            print("Episode: {}/{}, Average reward: {}".format(e+1, episodes, average_reward))
        
        done = True
        # Train agent with experience of episode (indicated batch size)
        agent.replay(32)
        
        if e%200 == 0:
            for index, agent in enumerate(Agents):
                Filename = os.path.join(vissim_working_directory, model_name, 'PartialSave_'+str(e)+'_Agent'+str(index)+'_'+model_name+'.h5')
                agent.model.save(Filename)


    #Saving agents memory, weights and optimizer
    for index,agent in enumerate(Agents):    
        Filename = os.path.join(vissim_working_directory, model_name, 'Agent'+str(index)+'_'+model_name+'.h5')
        print('Saving architecture, weights and optimizer state for agent{}'.format(index))
        agent.model.save(Filename)
    print('Model Trained and Saved. Succesfully Terminated.')
    
    # Close Vissim
    Vissim = None

Working Directory set to: C:\Users\acabrejasegea\OneDrive - The Alan Turing Institute\Desktop\ATI\0_TMF\MLforFlowOptimisation\Vissim\
Checking Presence of Pregenerated Cache.
Cache NOT Present.
Generating Cache...
Cache generated.

****************************
*   COM Server dispatched  *
****************************

Model File: Single_Cross_Straight.inpx
No additional files will be loaded
Loading...
Load process successful
Simulation length set to 18000.0 seconds.
Results from Previous Simulations: Deleted. Fresh Start Available.
Fetched and containerized Simulation Object
Fetched and containerized Network Object 

*******************************************************
*                                                     *
*                 SETUP COMPLETE                      *
*                                                     *
*******************************************************

Random seed set in simulator. Random Seed = 42
NetworkParser has succesfully crawled the model

Episode: 111/1000, Average reward: -150.05794849196616
Reoading...
Episode: 112/1000, Average reward: -8.434443929392893
Reoading...
Episode: 113/1000, Average reward: -27.37796189730452
Reoading...
Episode: 114/1000, Average reward: -47.10864213193855
Reoading...
Episode: 115/1000, Average reward: -13.52950105127025
Reoading...
Episode: 116/1000, Average reward: -13.818504997491576
Reoading...
Episode: 117/1000, Average reward: -187.22944030041526
Reoading...
Episode: 118/1000, Average reward: -148.93763363342
Reoading...
Episode: 119/1000, Average reward: -61.570963318634696
Reoading...
Episode: 120/1000, Average reward: -34.86729558822377
Reoading...
Episode: 121/1000, Average reward: -225.04916765301837
Reoading...
Episode: 122/1000, Average reward: -32.05216432288999
Reoading...
Episode: 123/1000, Average reward: -26.904188443073828
Reoading...
Episode: 124/1000, Average reward: -211.37027488173104
Reoading...
Episode: 125/1000, Average reward: -43.057800847139795
Reoading...
Epis

Episode: 235/1000, Average reward: -238.51325373659768
Reoading...
Episode: 236/1000, Average reward: -200.4949435091604
Reoading...
Episode: 237/1000, Average reward: -222.8104955747995
Reoading...
Episode: 238/1000, Average reward: -21.746278806028933
Reoading...
Episode: 239/1000, Average reward: -215.5653209594323
Reoading...
Episode: 240/1000, Average reward: -17.396279672435927
Reoading...
Episode: 241/1000, Average reward: -12.749217969028372
Reoading...
Episode: 242/1000, Average reward: -18.246622934376035
Reoading...
Episode: 243/1000, Average reward: -12.356212319631867
Reoading...
Episode: 244/1000, Average reward: -31.971252833360598
Reoading...
Episode: 245/1000, Average reward: -27.184743575382274
Reoading...
Episode: 246/1000, Average reward: -10.060917231234736
Reoading...
Episode: 247/1000, Average reward: -9.049799964034085
Reoading...
Episode: 248/1000, Average reward: -18.77806497344904
Reoading...
Episode: 249/1000, Average reward: -10.793560654700986
Reoading...


Episode: 359/1000, Average reward: -14.874485794090768
Reoading...
Episode: 360/1000, Average reward: -5.766534066348461
Reoading...
Episode: 361/1000, Average reward: -6.461649292876534
Reoading...
Episode: 362/1000, Average reward: -229.39450553532805
Reoading...
Episode: 363/1000, Average reward: -5.837369518709666
Reoading...
Episode: 364/1000, Average reward: -7.286323614103021
Reoading...
Episode: 365/1000, Average reward: -6.072270582789088
Reoading...
Episode: 366/1000, Average reward: -6.873588553536047
Reoading...
Episode: 367/1000, Average reward: -11.95544119414678
Reoading...
Episode: 368/1000, Average reward: -235.93853126842802
Reoading...
Episode: 369/1000, Average reward: -238.57979977849675
Reoading...
Episode: 370/1000, Average reward: -221.28888782898946
Reoading...
Episode: 371/1000, Average reward: -221.39677700090886
Reoading...
Episode: 372/1000, Average reward: -7.254906434603169
Reoading...
Episode: 373/1000, Average reward: -238.73216494788656
Reoading...
Epi

com_error: (-2147023170, 'The remote procedure call failed.', None, None)

In [None]:
 np.reshape([0,0,0,0], [1,4])

In [31]:
a =[100,0,100,0]
b = np.reshape(a,[1,4])
a1=Agents[0].model.predict(b)
a1

array([[-186.65543, -200.45123, -114.64673, -199.88553, -198.01128]],
      dtype=float32)

In [29]:
a1=Agents[0].model.predict(np.reshape([0,0,0,0], [1,4]))
a2=Agents[0].model.predict(np.reshape([0,30,0,30], [1,4]))
a1-a2

array([[0., 0., 0., 0., 0.]], dtype=float32)

In [28]:
c = np.array([0,20,0,20])
b.shape

(1, 4)

In [None]:
minibatch = random.sample(Agents[0].memory, 3)
for state, action, reward, next_state in minibatch:
    st = state
    st = np.reshape(st, [1,4])

print(type(st))
st


In [None]:
Vissim = None

In [None]:
#Saving agents memory
    for index,agent in enumerate(Agents):    
        Filename = os.path.join(vissim_working_directory, model_name, 'Agent'+str(index)+'_'+model_name+'.h5')
        print('Saving architecture, weights and optimizer state for agent{}'.format(index))
        agent.model.save(Filename)
    print('Model Trained and Saved. Succesfully Terminated.')
