In [None]:
## VISSIM Modules
import win32com.client as com
import os

## RL Modules
import tensorflow as tf
if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
    print("ERROR: GPU DEVICE NOT FOUND.")

from keras.models import load_model
    
## Data Management Modules
import pickle

## User Defined Modules
import Simulator_Functions as SF
from RLAgents import DQNAgent
from NParser import NetworkParser
from COMServer import COMServerDispatch, COMServerReload
from TupleToList import toList
from Utilities import log_progress, pltlive
## Other Modules
import numpy as np
import random
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
## Operation mode (selects functionalities)
mode = "debug"
# "training" = training agents, maximum speed, frozen UI, mid amount of messages
# "debug"    = trains for 1 episode, minimum speed, working UI, all messages
# "demo"     = loads pretrained agent, minimum speed, working UI
# "test"     = executes evaluation, maximum speed

## Network Model Parameters

model_name  = 'Single_Cross_Straight'
vissim_working_directory = 'C:\\Users\\acabrejasegea\\OneDrive - The Alan Turing Institute\\Desktop\\ATI\\0_TMF\\MLforFlowOptimisation\\Vissim\\'
agent_type = 'DQN' # DQN, DuelingDQN, DDQN, DuelingDDQN
reward_type = 'Queues'
state_type  = 'Queues'
Random_Seed = 42

## Use of additional files?
flag_read_additionally  = False
## Load trained model?
Demo_Mode = False
load_trained = False
Quickmode = True
SaveResultsAgent = True
# Random demand
Random_Demand = False

In [None]:
## RL Hyperparamenters
# Number of simulations, save every "n" episodes and copy weights with frequency "f"
episodes = 400
partial_save_at = 100
copy_weights_frequency = 5

# Timesteps per simulation (1 timestep = 0.1 sec), length for random population is a multiple of episode
timesteps_per_second = 10
seconds_per_update = 3
simulation_length = 3600*1 + 1
memory_population_length = (simulation_length-1)*5+1

## State-Action Parameters
action_type = "phases"        # options are "phases" and "programs"
state_size = 4
action_size = 2

# Hyperparameters
PER_activated = True
batch_size = 64
memory_size = 1000
alpha   = 0.0001
gamma   = 0.95

# Exploration Schedule ("linear" or "geometric")
exploration_schedule = "geometric"
epsilon_start = 1
epsilon_end   = 0.001
if exploration_schedule == "linear":
    epsilon_decay = 1.2*(epsilon_end - epsilon_start)/(episodes-1)
    epsilon_sequence = [1 + epsilon_decay * entry for entry in range(episodes+1)]
    epsilon_sequence = [0 if entry < 0 else entry for entry in epsilon_sequence]
elif exploration_schedule == "geometric":
    epsilon_decay = np.power(epsilon_end/epsilon_start, 1./(episodes-1)) # Geometric decay
    epsilon_sequence = [1 * epsilon_decay ** entry for entry in range(episodes+1)]
else:
    print("ERROR: Unrecognized choice of exploration schedule.")

# Demand Schedule
high_demand = 600
low_demand = 300

# Session ID
Session_ID = 'Episodes'+str(episodes)+'_Program'+agent_type

In [None]:
# Plotting exploration schedule
plt.figure(figsize=(8,4.5))
x_series = np.array(range(1,episodes+1))
y_series = epsilon_sequence[0:episodes]
plt.plot(x_series, y_series, '-b')
plt.xlabel('Episodes')
plt.ylabel('Ratio of random exploration')
plt.title('Exploration schedule')
plt.show()

In [None]:
if __name__ == "__main__":
    # Initialize storage
    reward_storage = []
    best_agent_weights = []
    reward_plot = np.zeros([episodes,])
    loss_plot = np.zeros([episodes,])

    # Initialize simulation
    Vissim, Simulation, Network, cache_flag = COMServerDispatch(model_name, vissim_working_directory,\
                                                                memory_population_length, timesteps_per_second,\
                                                                delete_results = True, verbose = True)
        
    # Setting Random Seed
    Vissim.Simulation.SetAttValue('RandSeed', Random_Seed)
    print ('Random seed set in simulator. Random Seed = '+str(Random_Seed))

    # Deploy Network Parser (crawl network)
    npa = NetworkParser(Vissim)
    print('NetworkParser has succesfully crawled the model network.')
    
    # Initialize agents
    if agent_type == "DQN":
        Agents = [DQNAgent(state_size, action_size, action_type, ID, state_type, npa, memory_size,\
                           gamma, epsilon_sequence[0], alpha, copy_weights_frequency, Vissim, PER_activated,\
                           DoubleDQN = False, Dueling = False) for ID in npa.signal_controllers_ids] 
        agents_deployed = True
    elif agent_type == "DuelingDQN":
        Agents = [DQNAgent(state_size, action_size, action_type, ID, state_type, npa, memory_size,\
                           gamma, epsilon_sequence[0], alpha, copy_weights_frequency, Vissim, PER_activated,\
                           DoubleDQN = False, Dueling = True) for ID in npa.signal_controllers_ids] 
        agents_deployed = True
    elif agent_type == "DDQN":
        Agents = [DQNAgent(state_size, action_size, action_type, ID, state_type, npa, memory_size,\
                           gamma, epsilon_sequence[0], alpha, copy_weights_frequency, Vissim, PER_activated,\
                           DoubleDQN = True, Dueling = False) for ID in npa.signal_controllers_ids] 
        agents_deployed = True
    elif agent_type == "DuelingDDQN":
        Agents = [DQNAgent(state_size, action_size, action_type, ID, state_type, npa, memory_size,\
                           gamma, epsilon_sequence[0], alpha, copy_weights_frequency, Vissim, PER_activated,\
                           DoubleDQN = True, Dueling = True) for ID in npa.signal_controllers_ids] 
        agents_deployed = True
    else:
        print("Incorrect Agent Class selected. Deployment could not be completed.")
        quit()
    if agents_deployed:
        print("Deployed {} agent(s) of the Class {}.".format(len(Agents), agent_type))
    
    ## Execution of Demonstration
    if mode == "demo":
        timesteps_per_second = 10
        Vissim.Simulation.SetAttValue('SimRes', timesteps_per_second)
        Agents = SF.load_agents(vissim_working_directory, model_name, Agents, Session_ID, best = True)
        for agent in Agents:
            agent.epsilon = 0
        SF.run_simulation_episode(Agents, Vissim, state_type, state_size, memory_population_length,\
                                  timesteps_per_second, seconds_per_update, mode, PER_activated)
        Vissim = None
        
    elif mode == "debug":
        timesteps_per_second = 10
        Vissim.Simulation.SetAttValue('SimRes', timesteps_per_second)
        SF.run_simulation_episode(Agents, Vissim, state_type, state_size, memory_population_length,\
                                  timesteps_per_second, seconds_per_update, mode, PER_activated)

        
    # Load previous trained data
    elif load_trained:
        Agents = SF.load_agents(vissim_working_directory, model_name, Agents, Session_ID, best = False)
    # If previous data isn't to be loaded, have an initial longer random run to populate memory
    else:
        print('Pre-Populating memory with Random Actions....')
        SF.Set_Quickmode(Vissim, timesteps_per_second)
        if PER_activated:
            memory = SF.PER_prepopulate_memory(Agents, Vissim, state_type, state_size, memory_size,\
                                               vissim_working_directory, model_name)
        else:
            SF.run_simulation_episode(Agents, Vissim, state_type, state_size, memory_population_length,\
                                      timesteps_per_second, seconds_per_update, mode, PER_activated)
        print('Memory pre-populated. Starting Training.\n')
        
        
    if mode == "training":
        # Iterations of the simulation
        for episode in log_progress(range(episodes), every=1):
        
            # Completely re-dispatch server every N iterations for performance
            if episode !=0:
                Simulation, Network = COMServerReload(Vissim, model_name, vissim_working_directory,\
                                                    simulation_length, timesteps_per_second, delete_results = True)
        
            # Run Network Parser and ensure agents are linked to their intersections
            npa = NetworkParser(Vissim) 
            for index, agent in enumerate(Agents):
                agent.update_IDS(npa.signal_controllers_ids[index], npa)
                agent.episode_reward = []

            # Change the random seed
            Random_Seed += 1
            Vissim.Simulation.SetAttValue('RandSeed', Random_Seed)
        
            # Run Episode at maximum speed
            SF.Set_Quickmode(Vissim, timesteps_per_second)
            SF.run_simulation_episode(Agents, Vissim, state_type, state_size, simulation_length, timesteps_per_second,\
                                      seconds_per_update, mode, PER_activated)
        
            # Calculate episode average reward
            reward_storage, average_reward = SF.average_reward(reward_storage, Agents, episode, episodes)
            best_agent_weights = SF.best_agent(reward_storage, average_reward, best_agent_weights,\
                                               vissim_working_directory, model_name, Agents, Session_ID)
        
            # Train agent with experience of episode and copy weights when necessary
            # Update exploration rate
            for agent in Agents:
                agent.replay_batch(batch_size, episode)
                agent.epsilon = epsilon_sequence[episode+1]
            
            # Security save for long trainings
            if SaveResultsAgent:
                if (episode+1)%partial_save_at == 0:
                    SF.save_agents(vissim_working_directory, model_name, Agents, Session_ID, reward_storage)
                    print('Saved Partial results at the end of episode {}.'.format(episode+1))

        #Saving agents memory, weights and optimizer
        if SaveResultsAgent:
            SF.save_agents(vissim_working_directory, model_name, Agents, Session_ID, reward_storage)
            print("Model, architecture, weights, optimizer, memory and training results succesfully saved.\
            Succesfully Terminated.")
    
    # Close Vissim
    Vissim = None

In [None]:
# Plotting training progress
plt.figure(figsize=(8,4.5))
x_series = range(1,len(reward_storage)+1)
fit = np.polyfit(x_series,reward_storage,1)
fit_fn = np.poly1d(fit) 
plt.plot(x_series,reward_storage, '-b', x_series, fit_fn(x_series), '--r')
plt.xlabel('Episodes')
plt.ylabel('Average agent reward in episode')
plt.title('Training evolution and trend')
plt.gca().legend(('Episode Reward','Linear Trend'))
plt.show()

# Plotting training loss
plt.figure(figsize=(8,4.5))
x_series = range(1,len(Agents[0].loss)+1)
plt.plot(x_series,Agents[0].loss, '-b')
plt.xlabel('Training Epoch')
plt.ylabel('Loss')
plt.title('Model Loss')
plt.gca().legend(('Loss'))
plt.show()

In [None]:
np.linspace(1,episodes,episodes).shape

In [None]:
Vissim.Simulation.RunSingleStep()


In [None]:
Agents[0].signal_groups[2].SetAttValue("SigState", "GREEN")