In [None]:
## VISSIM Modules
import win32com.client as com
import os

## RL Modules
import tensorflow as tf
if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
    print("ERROR: GPU DEVICE NOT FOUND.")
from keras import backend as K
from keras.models import load_model
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

## Data Management Modules
import pickle

## User Defined Modules
import Simulator_Functions as SF
from RLAgents import DQNAgent
from NParser import NetworkParser
from COMServer import COMServerDispatch, COMServerReload
from TupleToList import toList
## Other Modules
import numpy as np
import random
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
## Network Model Parameters
Random_Seed = 42
model_name  = 'Single_Cross_Straight'
vissim_working_directory = 'C:\\Users\\acabrejasegea\\OneDrive - The Alan Turing Institute\\Desktop\\ATI\\0_TMF\\MLforFlowOptimisation\\Vissim\\'
program = 'DDQN'
reward_type = 'Queues'
state_type  = 'Queues' 
## Use of additional files?
flag_read_additionally  = False
## Load trained model?
load_trained = False
Quickmode = True
SaveResultAgent = True
# Random demand
Random_Demand = False

In [None]:
## Data handling flags
# Flag for restarting the COM Server
reset_flag = True
#cache_flag = False
# If a fresh start is needed, all previous results from simulations are deleted
Start_Fresh = True
#Save agent at the end
SaveResultsAgent = False
# Debug action
debug_action = False

In [None]:
## RL Hyperparamenters
# Number of simulations
episodes = 30
# Timesteps per simulation (1 timestep = 0.1 sec)
simulation_length = 36000*1
memory_population_length = simulation_length*5
## State-Action Parameters
state_size = 4
action_size = 5
# Memory Size
memory_size = 5000
# Batch Size
batch_size = 128
# Learning Rate
alpha   = 0.0005
# Discount Factor
gamma   = 0.9
# Exploration Schedule
epsilon_start = 1
epsilon_end   = 0.01
epsilon_decay = np.power(epsilon_end/epsilon_start, 1./(episodes+1)) # Geometric decay
# Demand Schedule
demands = [100,200, 400, 600, 800, 1000, 1200]

In [None]:
if __name__ == "__main__":
    # Initialize storage
    reward_storage = []
    
    # If previous agents aren't to be loaded, proceed to memory population
    if not load_trained:
        # Initialize simulation
        if 'Vissim' not in globals() or Vissim == None:
            Vissim, Simulation, Network, cache_flag = COMServerDispatch(model_name, vissim_working_directory,\
                                                                        memory_population_length, Start_Fresh, reset_flag = True)
        else:
            Vissim = com.Dispatch("Vissim.Vissim")
            Simulation, Network = COMServerReload(Vissim, model_name, vissim_working_directory,\
                                                  memory_population_length, Start_Fresh, reset_flag = True)
        
    # Setting Random Seed
    Vissim.Simulation.SetAttValue('RandSeed', Random_Seed)
    print ('Random seed set in simulator. Random Seed = '+str(Random_Seed))

    # Deploy Network Parser (crawl network)
    npa = NetworkParser(Vissim)
    print('NetworkParser has succesfully crawled the model network.')
    
    # Initialize agents
    if program == "DQN":
        Agents = [DQNAgent(state_size, action_size, ID, state_type, npa, memory_size, gamma, epsilon_start,\
                           epsilon_end, epsilon_decay, alpha, Vissim, DoubleDQN = False) for ID in npa.signal_controllers_ids] 
        agents_deployed = True
    elif program == "DDQN":
        Agents = [DQNAgent(state_size, action_size, ID, state_type, npa, memory_size, gamma, epsilon_start,\
                           epsilon_end, epsilon_decay, alpha, Vissim, DoubleDQN = True) for ID in npa.signal_controllers_ids] 
        agents_deployed = True
    else:
        print("Incorrect Agent Class selected. Deployment could not be completed.")
        quit()
    
    if agents_deployed:
        print("Deployed {} agent(s) of the Class {}.".format(len(Agents), program))

    # Load previous trained data
    if load_trained:
        Agents = SF.load_agents(vissim_working_directory, model_name, Agents)

    # If previous data isn't to be loaded, have an initial longer random run to populate memory
    else:
        print('Populating memory with Random Actions....')
        SF.Set_Quickmode(Vissim)
        SF.run_simulation_episode(Agents, Vissim, state_type, state_size, memory_population_length)
    
    # Iterations of the simulation
    for episode in range(episodes):
        # If not the first episode, reset state at the start
        Simulation, Network = COMServerReload(Vissim, model_name, vissim_working_directory,\
                                            simulation_length, Start_Fresh, reset_flag = False)
        npa = NetworkParser(Vissim) 
        for index, agent in enumerate(Agents):
            agent.update_IDS(npa.signal_controllers_ids[index], npa)
            agent.episode_reward = []
        
        # Change demand for every episode
        if Random_Demand:
            for vehicle_input in range(1,5):
                Vissim.Net.VehicleInputs.ItemByKey(vehicle_input).SetAttValue('Volume(1)', demands[np.random.randint(0,6)])
        
        # Use max speed for Simulator
        if Quickmode:
            SF.Set_Quickmode(Vissim)           
        
        # Run Episode
        SF.run_simulation_episode(Agents, Vissim, state_type, state_size, simulation_length)
        
        # Calculate episode average reward
        reward_storage, average_reward = SF.average_reward(reward_storage, Agents, episode, episodes)
        
        # Train agent with experience of episode (indicated batch size)
        for agent in Agents:
            agent.replay(batch_size, episode)
        # Security save for long trainings
        if SaveResultsAgent:
            if e%200 == 0:
                SF.save_agents(vissim_working_directory, model_name, Agents)

    #Saving agents memory, weights and optimizer
    if SaveResultAgent:
        SF.save_agents(vissim_working_directory, model_name, Agents)
    
    # Plotting training progress
    x_series = range(1,len(reward_storage)+1)
    fit = np.polyfit(x_series,reward_storage,1)
    fit_fn = np.poly1d(fit) 
    plt.plot(x_series,reward_storage, '-b', x_series, fit_fn(x_series), '--r')
    plt.xlabel('Episodes')
    plt.ylabel('Average ageng reward in episode')
    plt.title('Training evolution and trend')
    plt.gca().legend(('Episode Reward','Linear Trend'))
    plt.show()
    
    # Close Vissim
    Vissim = None