In [1]:
## VISSIM Modules
import win32com.client as com
import os

## RL Modules
import tensorflow as tf
if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
    print("ERROR: GPU DEVICE NOT FOUND.")

from keras.models import load_model
    
## Data Management Modules
import pickle

## User Defined Modules
import Simulator_Functions as SF

from RLAgents import DQNAgent
from NParser import NetworkParser
from COMServer import COMServerDispatch, COMServerReload
from TupleToList import toList
## Other Modules
import numpy as np
import random
import matplotlib.pyplot as plt
%matplotlib inline

Default GPU Device: /device:GPU:0


Using TensorFlow backend.


In [2]:
## Network Model Parameters
Random_Seed = 42
model_name  = 'Single_Cross_Straight'
vissim_working_directory = 'C:\\Users\\acabrejasegea\\OneDrive - The Alan Turing Institute\\Desktop\\ATI\\0_TMF\\MLforFlowOptimisation\\Vissim\\'
program = 'DuelingDDQN' # DQN, DuelingDQN, DDQN, DuelingDDQN
reward_type = 'Queues'
state_type  = 'Queues'
PER = True
## Use of additional files?
flag_read_additionally  = False
## Load trained model?
Demo_Mode = False
load_trained = False
Quickmode = True
SaveResultsAgent = True
# Random demand
Random_Demand = False

In [3]:
## Data handling flags
# Flag for restarting the COM Server
reset_flag = True
#cache_flag = False
# If a fresh start is needed, all previous results from simulations are deleted
Start_Fresh = True
# Debug action
debug_action = False

In [None]:
## RL Hyperparamenters
# Number of simulations, save every "n" episodes and copy weights with frequency "f"
episodes = 800
partial_save_at = 10
copy_weights_frequency = 5
reset_frequency = 10

# Timesteps per simulation (1 timestep = 0.1 sec), length for random population is a multiple of episode
simulation_length = 36000*1
memory_population_length = simulation_length*5

## State-Action Parameters
state_size = 4
action_size = 5

# Hyperparameters
batch_size = 128
memory_size = 10000
alpha   = 0.0001
gamma   = 0.95

# Exploration Schedule
epsilon_start = 1
epsilon_end   = 0.001
epsilon_decay = 1.2*(epsilon_end - epsilon_start)/(episodes-1)
#epsilon_decay = np.power(epsilon_end/epsilon_start, 1./(episodes+1)) # Geometric decay

# Demand Schedule
demands = [100, 200, 400, 600, 800, 1000]
# Session ID
Session_ID = 'Episodes'+str(episodes)+'_Program'+program

In [None]:
if __name__ == "__main__":
    # Initialize storage
    reward_storage = []
    best_agent_weights = []
    loss=[]
    
    # If previous agents aren't to be loaded, proceed to memory population
    if not load_trained:
        # Initialize simulation
        if 'Vissim' not in globals() or Vissim == None:
            Vissim, Simulation, Network, cache_flag = COMServerDispatch(model_name, vissim_working_directory,\
                                                                        memory_population_length, Start_Fresh,\
                                                                        reset_flag = True, verbose = True)
        else:
            Vissim = com.Dispatch("Vissim.Vissim")
            Simulation, Network = COMServerReload(Vissim, model_name, vissim_working_directory,\
                                                  memory_population_length, Start_Fresh, reset_flag = True)
        
    # Setting Random Seed
    Vissim.Simulation.SetAttValue('RandSeed', Random_Seed)
    print ('Random seed set in simulator. Random Seed = '+str(Random_Seed))

    # Deploy Network Parser (crawl network)
    npa = NetworkParser(Vissim)
    print('NetworkParser has succesfully crawled the model network.')
    
    # Initialize agents
    if program == "DQN":
        Agents = [DQNAgent(state_size, action_size, ID, state_type, npa, memory_size, gamma, 0 if Demo_Mode else epsilon_start,\
                           epsilon_end, epsilon_decay, alpha, copy_weights_frequency, Vissim, DoubleDQN = False, Dueling = False) for ID in npa.signal_controllers_ids] 
        agents_deployed = True
    elif program == "DuelingDQN":
        Agents = [DQNAgent(state_size, action_size, ID, state_type, npa, memory_size, gamma, 0 if Demo_Mode else epsilon_start,\
                           epsilon_end, epsilon_decay, alpha, copy_weights_frequency, Vissim, DoubleDQN = False, Dueling = True) for ID in npa.signal_controllers_ids] 
        agents_deployed = True
    elif program == "DDQN":
        Agents = [DQNAgent(state_size, action_size, ID, state_type, npa, memory_size, gamma, 0 if Demo_Mode else epsilon_start,\
                           epsilon_end, epsilon_decay, alpha, copy_weights_frequency, Vissim, DoubleDQN = True, Dueling = False) for ID in npa.signal_controllers_ids] 
        agents_deployed = True
    elif program == "DuelingDDQN":
        Agents = [DQNAgent(state_size, action_size, ID, state_type, npa, memory_size, gamma, 0 if Demo_Mode else epsilon_start,\
                           epsilon_end, epsilon_decay, alpha, copy_weights_frequency, Vissim, DoubleDQN = True, Dueling = True) for ID in npa.signal_controllers_ids] 
        agents_deployed = True
    else:
        print("Incorrect Agent Class selected. Deployment could not be completed.")
        quit()
    
    if agents_deployed:
        print("Deployed {} agent(s) of the Class {}.".format(len(Agents), program))
    
    if Demo_Mode:
        Agents, loss = SF.load_agents(vissim_working_directory, model_name, Agents, Session_ID, loss, best = True)
        SF.run_simulation_episode(Agents, Vissim, state_type, state_size, memory_population_length, Demo_Mode)
        Vissim = None
    # Load previous trained data
    elif load_trained:
        Agents, loss = SF.load_agents(vissim_working_directory, model_name, Agents, Session_ID, loss, best = False)
    # If previous data isn't to be loaded, have an initial longer random run to populate memory
    else:
        print('Populating memory with Random Actions....')
        SF.Set_Quickmode(Vissim)
        SF.run_simulation_episode(Agents, Vissim, state_type, state_size, memory_population_length, Demo_Mode)
    
    # Iterations of the simulation
    for episode in range(episodes):
        # Completely re-dispatch server every N iterations for performance
        if episode % reset_frequency == 0 and episode !=0:
            Vissim = None
            Vissim, Simulation, Network, cache_flag = COMServerDispatch(model_name, vissim_working_directory,\
                                                                        simulation_length, Start_Fresh,\
                                                                        reset_flag = True, verbose = False)
            print("Redispatched")
        else:
            # If not the first episode, reset state at the start
            Simulation, Network = COMServerReload(Vissim, model_name, vissim_working_directory,\
                                                simulation_length, Start_Fresh, reset_flag = True)
        npa = NetworkParser(Vissim) 
        for index, agent in enumerate(Agents):
            agent.update_IDS(npa.signal_controllers_ids[index], npa)
            agent.episode_reward = []
        
        # Change demand for every episode
        if Random_Demand:
            for vehicle_input in range(1,5):
                Vissim.Net.VehicleInputs.ItemByKey(vehicle_input).SetAttValue('Volume(1)', demands[np.random.randint(0,len(demands)-1)])    
        
        # Run Episode at maximum speed

        SF.Set_Quickmode(Vissim)
        SF.run_simulation_episode(Agents, Vissim, state_type, state_size, simulation_length, Demo_Mode)
        
        # Calculate episode average reward
        reward_storage, average_reward = SF.average_reward(reward_storage, Agents, episode, episodes)
        best_agent_weights = SF.best_agent(reward_storage, average_reward, best_agent_weights, vissim_working_directory, model_name, Agents, Session_ID)
        
        # Train agent with experience of episode (indicated batch size)
        for agent in Agents:
            loss = agent.replay_batch(batch_size, episode, loss)
        # Security save for long trainings
        if SaveResultsAgent:
            if (episode+1)%partial_save_at == 0:
                SF.save_agents(vissim_working_directory, model_name, Agents, Session_ID, reward_storage, loss)
                print('Saved Partial results at the end of episode {}.'.format(episode+1))

    #Saving agents memory, weights and optimizer
    if SaveResultsAgent:
        SF.save_agents(vissim_working_directory, model_name, Agents, Session_ID, reward_storage, loss)
        print("Model, architecture, weights, optimizer, memory and training results succesfully saved. Succesfully Terminated.")
    
    # Plotting training progress
    plt.figure(figsize=(8,4.5))
    x_series = range(1,len(reward_storage)+1)
    fit = np.polyfit(x_series,reward_storage,1)
    fit_fn = np.poly1d(fit) 
    plt.plot(x_series,reward_storage, '-b', x_series, fit_fn(x_series), '--r')
    plt.xlabel('Episodes')
    plt.ylabel('Average ageng reward in episode')
    plt.title('Training evolution and trend')
    plt.gca().legend(('Episode Reward','Linear Trend'))
    plt.show()
    
    # Plotting training loss
    plt.figure(figsize=(8,4.5))
    x_series = range(1,len(loss)+1)
    plt.plot(x_series,loss, '-b')
    plt.xlabel('Training Epoch')
    plt.ylabel('Loss')
    plt.title('Model Loss')
    plt.gca().legend(('Loss'))
    plt.show()
    
    # Close Vissim
    Vissim = None

Working Directory set to: C:\Users\acabrejasegea\OneDrive - The Alan Turing Institute\Desktop\ATI\0_TMF\MLforFlowOptimisation\Vissim\
Generating Cache...
Cache generated.

****************************
*   COM Server dispatched  *
****************************

Loading Model File: Single_Cross_Straight.inpx ...
Load process successful
Simulation length set to 18000.0 seconds.
Results from Previous Simulations: Deleted. Fresh Start Available.
Fetched and containerized Simulation Object
Fetched and containerized Network Object 

*******************************************************
*                                                     *
*                 SETUP COMPLETE                      *
*                                                     *
*******************************************************

Random seed set in simulator. Random Seed = 42
NetworkParser has succesfully crawled the model network.
Instructions for updating:
Colocations handled automatically by placer.
Deploying in

Episode: 2/800, Epsilon:1.0, Average reward: -242.33
Prediction for [500,0,500,0] is: [[-222.40158   -291.4055       7.9351196    2.5771484 -363.84937  ]]
New best agent found. Saved in C:\Users\acabrejasegea\OneDrive - The Alan Turing Institute\Desktop\ATI\0_TMF\MLforFlowOptimisation\Vissim\Single_Cross_Straight\Single_Cross_Straight_Episodes800_ProgramDuelingDDQN_BestAgent0_Memory.p
[array([ 6.51121372, 22.0533308 ,  0.25562089, 15.27820096]), array([ 8.02053245, 17.27123601, 11.7728664 ,  6.25122048]), array([142.48380933,   0.        , 148.70406767,   0.58450791]), array([206.6365808 ,  10.06487251, 208.68266506,  16.38328842]), array([2.34140056e+02, 1.38459040e-01, 2.39257858e+02, 0.00000000e+00]), array([161.40952956,   6.07675412, 132.7839447 ,   2.3278031 ]), array([175.25258581,   6.20859941,  20.17160574,   1.45302714]), array([23.39262964,  3.49502336, 19.18418723,  0.        ]), array([ 61.79778163,   0.        , 144.27070385,   0.        ]), array([200.76542384,   0.     

Episode: 3/800, Epsilon:1.0, Average reward: -158.03
Prediction for [500,0,500,0] is: [[-230.27002    -297.92197       0.76327515   -8.450043   -371.0028    ]]
New best agent found. Saved in C:\Users\acabrejasegea\OneDrive - The Alan Turing Institute\Desktop\ATI\0_TMF\MLforFlowOptimisation\Vissim\Single_Cross_Straight\Single_Cross_Straight_Episodes800_ProgramDuelingDDQN_BestAgent0_Memory.p
[array([1.88375156e+02, 9.47540620e-02, 1.88433487e+02, 1.90327713e+00]), array([36.84780339,  0.        , 38.24392645,  0.        ]), array([197.75147343,   9.60004569,  94.25523202,  20.98565232]), array([ 94.2908584 ,   0.        , 198.46925285,   1.55228481]), array([234.73316521,   0.        , 234.57898793,   0.        ]), array([ 6.01060729,  3.47909838, 20.47050477,  4.68411038]), array([99.89402195,  1.55101195, 27.46897179,  0.9383114 ]), array([212.39636293,   0.51169096, 175.43447654,   0.        ]), array([148.48083364,   0.8920642 , 219.96882803,   0.        ]), array([171.42391736,  11.

Episode: 4/800, Epsilon:1.0, Average reward: -218.98
Prediction for [500,0,500,0] is: [[-237.26466   -304.93768     -6.3681793  -19.473267  -378.46967  ]]
[array([242.75712396,   0.        , 199.28069186,   0.54958683]), array([1.86117858e+02, 1.22390004e+00, 5.82000865e+01, 1.71405802e-01]), array([150.51748088,   0.        , 121.29260542,   0.31239186]), array([203.2087042 ,   6.30742603, 202.57672907,  11.05528639]), array([133.26252889,  12.80080143, 154.4366742 ,  15.16425546]), array([125.39097398,  10.87973753,   4.10290823,   4.40761474]), array([7.39205181, 3.81094229, 1.28240367, 8.17644416]), array([148.81076832,  11.16648182, 118.48821532,  19.57389885]), array([38.61215512,  0.39039731, 30.18529475,  0.        ]), array([130.44334088,   0.62176888,  48.57468823,   0.        ]), array([36.13383034,  0.        , 52.97602292,  0.        ]), array([129.12281052,   6.84127102,  55.96736942,   6.51063203]), array([ 78.04262538,   0.        , 227.38839895,   2.13549873]), array([

Episode: 5/800, Epsilon:0.99, Average reward: -187.53
Prediction for [500,0,500,0] is: [[-245.32483  -312.95615   -14.017502  -28.720383 -384.36163 ]]
[array([ 65.65123267,   4.67168419, 124.71568795,   6.86265088]), array([88.77822391, 10.98565314,  8.51500158,  6.19978418]), array([27.33150506,  8.74996017, 21.05085512,  5.69560762]), array([156.13186081,   6.8640114 ,  25.95639729,  21.8615472 ]), array([ 4.40458889,  5.22673662,  4.48388753, 18.5699448 ]), array([125.14379009,  29.63715912, 119.20141542,   4.32288229]), array([ 94.2908584 ,   0.        , 198.46925285,   1.55228481]), array([ 5.8433486 ,  4.55815944, 72.79266301,  3.11189992]), array([15.91444958,  3.48323836, 10.25194812,  3.12140987]), array([56.3426856 ,  1.88223378, 15.05361152,  7.76344451]), array([181.51472606,   0.        ,  26.0869345 ,   0.        ]), array([111.75325155,   1.29266103,  71.87413448,   1.56950543]), array([234.33524725,   0.79883789, 230.13485001,   1.4003892 ]), array([89.80993566,  1.2343

Episode: 6/800, Epsilon:0.99, Average reward: -234.66
Prediction for [500,0,500,0] is: [[-254.31026  -320.3914    -22.437256  -37.122177 -390.38925 ]]
[array([244.9423217 ,   1.77663121, 183.42203832,   0.        ]), array([227.01402391,   7.24114715, 230.06057061,   4.09387873]), array([ 77.62485616,   0.        , 160.76985579,   0.        ]), array([36.37529347,  2.90607286, 27.96047882,  2.28249462]), array([103.58949699,   0.        , 163.48048873,   0.        ]), array([234.33524725,   0.79883789, 230.13485001,   1.4003892 ]), array([ 4.40458889,  5.22673662,  4.48388753, 18.5699448 ]), array([132.10112352,   0.        ,  47.01487118,   0.47975884]), array([ 79.37286549,  12.39169531, 111.42517534,  10.40861663]), array([163.8700989 ,   0.        ,  93.77134514,   0.        ]), array([2.47742573e+02, 0.00000000e+00, 2.45829471e+02, 1.70561589e-01]), array([236.89117068,   1.24369589, 235.96205956,   0.        ]), array([223.84541839,   1.37031033,  96.82387168,   0.        ]), arr

Episode: 7/800, Epsilon:0.99, Average reward: -232.05
Prediction for [500,0,500,0] is: [[-262.699    -327.65802   -31.637756  -44.98349  -397.60645 ]]
[array([208.60796291,   2.39931015, 133.67414138,   1.26921907]), array([177.53048936,  16.05158353, 175.81510434,   7.68414876]), array([151.80862589,   4.38594876, 146.97295633,   1.75662775]), array([244.9423217 ,   1.77663121, 183.42203832,   0.        ]), array([164.26986682,   2.58287062, 168.8807236 ,   8.38796539]), array([23.39262964,  3.49502336, 19.18418723,  0.        ]), array([135.493628  ,   8.35767399,  92.23433707,   0.79013596]), array([243.6757181 ,   0.        , 240.79689191,   0.        ]), array([88.77822391, 10.98565314,  8.51500158,  6.19978418]), array([151.40665554,   7.00010974, 155.82680779,  22.35834498]), array([98.39017609,  1.50114318, 68.36624179,  0.24185387]), array([241.31040042,   0.        , 244.98577044,   0.        ]), array([35.68227565,  0.4596905 , 21.40450784,  2.54401595]), array([76.06467734,

Episode: 8/800, Epsilon:0.99, Average reward: -326.3
Prediction for [500,0,500,0] is: [[-271.60645  -334.51874   -40.12523   -52.671707 -404.66046 ]]
[array([186.05428428,   2.11355578, 188.31211293,   0.76652742]), array([92.24566502,  0.        , 90.24898754,  0.        ]), array([122.66362176,  13.45319032, 149.39251987,   8.0779223 ]), array([31.6714132 , 13.33059801, 18.30335391, 16.03776842]), array([ 1.73721664, 11.26487934,  4.58402916, 16.61758054]), array([ 30.2920798 ,   3.75512394, 186.12895766,   3.48915952]), array([ 2.20187524,  9.03614614,  2.41888811, 11.09298653]), array([222.22322728,   0.        , 212.39937165,   0.        ]), array([ 3.1338251 ,  8.74254619,  1.37475445, 23.05294042]), array([217.87849807,   5.58097656, 188.10590102,   8.67725969]), array([163.42349242,   4.58112397, 165.10701285,   1.82645487]), array([177.53048936,  16.05158353, 175.81510434,   7.68414876]), array([43.77248698,  0.87148363, 53.53312194,  0.54543889]), array([25.50092255,  9.06193

Episode: 9/800, Epsilon:0.99, Average reward: -153.79
Prediction for [500,0,500,0] is: [[-280.33868  -342.0857    -48.26828   -60.271484 -410.9668  ]]
New best agent found. Saved in C:\Users\acabrejasegea\OneDrive - The Alan Turing Institute\Desktop\ATI\0_TMF\MLforFlowOptimisation\Vissim\Single_Cross_Straight\Single_Cross_Straight_Episodes800_ProgramDuelingDDQN_BestAgent0_Memory.p
[array([161.51580811,  11.16624028, 163.06828932,  19.57445038]), array([ 7.44330602, 11.59847342,  6.51831599,  4.81181679]), array([26.49746994,  5.19800482, 57.98777444,  8.69702482]), array([162.66179572,  11.60430081, 138.66339053,   4.81181679]), array([155.35383173,  14.63391031,  39.69716397,  16.91843812]), array([182.04176897,   7.43791558,  43.60434975,   0.43123015]), array([7.39205181, 3.81094229, 1.28240367, 8.17644416]), array([147.30634292,   0.        , 111.38327492,   0.99396649]), array([233.20208055,   0.        , 132.31015031,   0.23938599]), array([19.39388291,  3.62531456,  9.99272908, 

Episode: 10/800, Epsilon:0.99, Average reward: -303.2
Prediction for [500,0,500,0] is: [[-288.6933   -349.97232   -57.101532  -68.04068  -416.8734  ]]
[array([190.75404972,   6.30742603, 204.33127712,  11.05528639]), array([244.59142047,   0.69873562, 247.65408939,   0.61754262]), array([195.15733978,   0.45605268, 199.44113585,   4.34994494]), array([26.49746994,  5.19800482, 57.98777444,  8.69702482]), array([104.71660965,   0.90313725,  40.72293402,   0.        ]), array([24.8037214 , 29.69494337, 12.40100594,  4.32288229]), array([29.85382167,  0.06252576, 18.48509053,  2.23340587]), array([98.39017609,  1.50114318, 68.36624179,  0.24185387]), array([223.84541839,   1.37031033,  96.82387168,   0.        ]), array([234.07719221,   0.62176888, 206.5360285 ,   0.        ]), array([170.18730783,  11.7330662 , 137.30856696,   3.25985601]), array([203.2087042 ,   6.30742603, 202.57672907,  11.05528639]), array([69.77484315,  0.79097287, 82.37515153,  0.52971365]), array([163.97744896,   

Dumping Training Results into pickle file.
Dumping Loss Results into pickle file.
Saved Partial results at the end of episode 10.
Sever Redispatched.
Redispatched
Episode: 11/800, Epsilon:0.98, Average reward: -239.8
Prediction for [500,0,500,0] is: [[-296.47327 -357.17642  -67.08606  -76.42438 -423.2653 ]]
[array([139.12427987,  10.35093398, 151.47587749,  11.01835724]), array([233.13620947,   0.        , 218.76279456,   0.77837142]), array([71.22347266,  0.        , 74.99712141,  0.        ]), array([ 1.27314883, 17.96557616, 56.74720893,  8.43728987]), array([139.53188998,   1.14308555, 123.43509497,  14.00658659]), array([150.28815545,   1.23631608, 154.73660025,   3.33848039]), array([150.51748088,   0.        , 121.29260542,   0.31239186]), array([165.20793227,   3.44124309, 164.79348924,  17.65670266]), array([79.11436388,  0.        , 25.80856054,  0.        ]), array([197.45660985,  10.16686899, 173.48345647,   0.74142055]), array([167.35346152,   5.41655342, 171.03474185,  12

Episode: 12/800, Epsilon:0.98, Average reward: -220.02
Prediction for [500,0,500,0] is: [[-304.58224 -363.74133  -76.31673  -85.50235 -429.2934 ]]
[array([81.98893487,  8.57815716, 11.99274264,  0.85440769]), array([192.18592618,   2.44234718, 181.23419318,   2.05498861]), array([153.74484426,   0.        ,  99.48475976,   0.3706096 ]), array([ 71.86081629,   0.7366836 , 179.59600559,   0.8167163 ]), array([204.37880397,  18.94999119, 197.56357945,  12.38410788]), array([11.17906349,  0.        , 11.18334803, 11.37571452]), array([147.83814501,   5.87428774,  43.71175102,   1.20263324]), array([24.8037214 , 29.69494337, 12.40100594,  4.32288229]), array([ 2.20187524,  9.03614614,  2.41888811, 11.09298653]), array([139.53188998,   1.14308555, 123.43509497,  14.00658659]), array([89.80993566,  1.23437294, 48.42109635,  1.08119384]), array([247.89481332,   0.936134  , 249.38019147,   0.80466485]), array([158.68371268,  19.30426342, 163.89789505,   0.61177863]), array([56.62804842,  0.    

Episode: 13/800, Epsilon:0.98, Average reward: -246.04
Prediction for [500,0,500,0] is: [[-312.02878 -370.74078  -85.82146  -94.44357 -436.00888]]
[array([ 8.36660925,  0.98789063,  3.21974715, 13.29078335]), array([174.72712593,   0.76720707, 186.5328893 ,   3.37137046]), array([146.34233979,   0.        , 171.44749452,   6.6156325 ]), array([92.24566502,  0.        , 90.24898754,  0.        ]), array([38.9261052 ,  1.11161443, 78.5870937 ,  0.        ]), array([234.83081802,   0.        , 234.84288533,   1.68598756]), array([151.40665554,   7.00010974, 155.82680779,  22.35834498]), array([157.21567921,   0.        ,  32.6616577 ,   0.47975884]), array([22.21099304,  0.63401521, 30.25770303,  3.09870459]), array([108.57922681,  14.5124262 ,  34.80834881,   7.09405094]), array([11.51618103,  2.23226557, 10.31240044, 20.19691916]), array([150.66549118,   0.6497967 , 189.73852728,   0.4481072 ]), array([150.51748088,   0.        , 121.29260542,   0.31239186]), array([46.45103746,  4.0849

Episode: 14/800, Epsilon:0.98, Average reward: -205.01
Prediction for [500,0,500,0] is: [[-318.0204  -377.91782  -96.07605 -104.41499 -442.428  ]]
[array([158.02890458,   3.2085289 , 155.65588232,  10.32722713]), array([4.01817522e+01, 6.79536508e-02, 9.38954727e+01, 1.71040932e+00]), array([238.60605084,   1.17969485, 239.51277575,   2.28217466]), array([162.29725568,  11.16648182, 161.85652966,  19.57389885]), array([163.33221558,   0.        , 163.43630983,   8.30188193]), array([26.55470284,  4.16308196, 61.84181753, 10.82341463]), array([230.91353542,   1.11161443, 231.65154227,   0.        ]), array([179.96002018,   0.        , 115.53504951,  11.64452565]), array([236.7894881 ,   1.38296959, 239.51524302,   0.        ]), array([17.03607107,  0.57415953, 18.08797682,  1.99070798]), array([25.26827397, 10.05975318, 15.5216719 ,  4.67340151]), array([235.50547865,   0.        , 236.03822983,   1.0275312 ]), array([31.51210288, 17.84458944, 41.64278104, 10.88890462]), array([136.3907

Episode: 15/800, Epsilon:0.98, Average reward: -116.0
Prediction for [500,0,500,0] is: [[-323.74054  -385.81946  -106.85812  -114.456055 -448.36502 ]]
New best agent found. Saved in C:\Users\acabrejasegea\OneDrive - The Alan Turing Institute\Desktop\ATI\0_TMF\MLforFlowOptimisation\Vissim\Single_Cross_Straight\Single_Cross_Straight_Episodes800_ProgramDuelingDDQN_BestAgent0_Memory.p
[array([1.47336098e+02, 7.16285635e-01, 5.13577489e+01, 7.01873670e-02]), array([89.80993566,  1.23437294, 48.42109635,  1.08119384]), array([162.67076259,   1.2980321 , 150.13516514,   3.3966744 ]), array([36.13383034,  0.        , 52.97602292,  0.        ]), array([182.11201412,   3.47909838, 179.76230961,   4.68411038]), array([ 1.51197999, 19.73644168,  5.82660452,  0.6837718 ]), array([1.74654946, 3.19909821, 5.36621994, 0.96970119]), array([189.1291344 ,   0.        , 202.19897457,   0.        ]), array([ 3.439313  ,  0.17417766,  1.97945376, 20.35489415]), array([146.34233979,   0.        , 171.4474945

Episode: 16/800, Epsilon:0.98, Average reward: -149.75
Prediction for [500,0,500,0] is: [[-331.1714   -393.9541   -116.370224 -124.64157  -453.53888 ]]
[array([17.03607107,  0.57415953, 18.08797682,  1.99070798]), array([98.39017609,  1.50114318, 68.36624179,  0.24185387]), array([25.26827397, 10.05975318, 15.5216719 ,  4.67340151]), array([ 1.51197999, 19.73644168,  5.82660452,  0.6837718 ]), array([172.2133328 ,  13.68707683, 129.77115795,   2.41879198]), array([18.76688644,  2.23226557, 38.5996254 , 20.19691916]), array([35.04730096,  0.        , 39.14413317,  0.89592375]), array([0.62961474, 3.0619443 , 0.12677328, 0.90904725]), array([125.09498929,   4.74683632,  37.9708251 ,   3.75186396]), array([200.15574521,   0.        , 158.26041902,   0.95252597]), array([142.68352702,   3.6785731 ,  31.270495  ,   3.57768783]), array([2.31147756e+02, 2.07037796e-01, 2.37795537e+02, 1.73189299e+00]), array([177.15666305,   1.20958037, 173.37499697,  14.62596678]), array([231.76838397,   4.2

Episode: 17/800, Epsilon:0.98, Average reward: -269.91
Prediction for [500,0,500,0] is: [[-339.7193  -400.95496 -125.49107 -134.76982 -458.9167 ]]
[array([25.42788482,  4.10477125, 54.32396104, 10.54731905]), array([ 8.36660925,  0.98789063,  3.21974715, 13.29078335]), array([195.15733978,   0.45605268, 199.44113585,   4.34994494]), array([108.60602079,   0.2687408 ,  55.15632843,   0.        ]), array([ 4.34608851, 19.94077354,  7.80669614,  6.65513261]), array([99.54957879,  0.        , 41.75738644,  0.        ]), array([64.89905411,  0.        , 98.64971203,  0.87796873]), array([227.65439373,   0.        , 238.90220114,   0.31239186]), array([188.46250193,  10.16686899,  81.85153214,   0.74142055]), array([234.36988733,   0.        , 236.97586135,   0.        ]), array([223.47667977,   0.        , 205.97586795,   0.        ]), array([187.3813312 ,   0.19552471, 188.08044875,   2.94346631]), array([1.47336098e+02, 7.16285635e-01, 5.13577489e+01, 7.01873670e-02]), array([122.87651531

Episode: 18/800, Epsilon:0.97, Average reward: -153.55
Prediction for [500,0,500,0] is: [[-348.01852 -407.53763 -135.23222 -144.24869 -464.64343]]
[array([197.4193528 ,   2.11355578, 109.39148292,   0.76652742]), array([223.84541839,   1.37031033,  96.82387168,   0.        ]), array([13.67510116, 29.63715912,  5.34649374,  4.32288229]), array([68.40142644,  3.03541812, 80.99314598,  4.44146823]), array([ 3.73424627, 15.03409803,  9.29774676,  7.60700451]), array([ 6.07922687,  1.24221827, 10.39609309,  1.80392664]), array([19.80573518,  2.13045824, 54.98689264,  4.95704355]), array([89.80993566,  1.23437294, 48.42109635,  1.08119384]), array([ 5.49263487,  1.50114318, 48.21019346,  0.24185387]), array([124.28575555,   7.6820914 , 134.03829441,   9.98895529]), array([16.98430387,  2.62937183, 78.81897946,  1.1281837 ]), array([17.8767219 , 11.16648182, 10.66971808, 19.57389885]), array([206.6365808 ,  10.06487251, 208.68266506,  16.38328842]), array([36.52368711, 14.29212019, 31.1240283

Episode: 19/800, Epsilon:0.97, Average reward: -291.21
Prediction for [500,0,500,0] is: [[-355.8456  -414.76907 -144.28564 -153.82663 -470.29572]]
[array([164.71234005,   4.67091151, 178.60272672,   4.14536955]), array([31.71680039,  6.20873852, 20.95946361,  1.45302714]), array([117.7771914 ,   7.83663465, 171.24491992,   6.75501571]), array([17.03607107,  0.57415953, 18.08797682,  1.99070798]), array([28.21901099,  4.14204471, 48.17778055, 12.34052383]), array([151.80862589,   4.38594876, 146.97295633,   1.75662775]), array([70.03454101,  0.        , 61.71069558,  0.42170357]), array([1.88375156e+02, 9.47540620e-02, 1.88433487e+02, 1.90327713e+00]), array([28.83073302,  0.        , 69.67451668, 11.37571452]), array([46.48483161,  6.07368929, 38.04174559, 10.65972404]), array([33.51445026,  0.        , 59.23907793,  4.12468308]), array([17.03607107,  0.57415953, 18.08797682,  1.99070798]), array([34.92466147,  6.74253056, 34.36048607,  0.        ]), array([192.18592618,   2.44234718, 

Episode: 20/800, Epsilon:0.97, Average reward: -217.68
Prediction for [500,0,500,0] is: [[-364.0261  -421.66284 -153.0841  -164.13214 -475.85095]]
[array([7.62730133e+01, 0.00000000e+00, 1.77557188e+02, 8.64667942e-02]), array([132.56882244,   1.07659073,  99.96786399,  13.37959636]), array([140.37942018,   0.        , 114.58103326,   8.30188193]), array([39.80275809,  0.5922528 , 20.43519326,  1.30628624]), array([125.09498929,   4.74683632,  37.9708251 ,   3.75186396]), array([139.93216964,   0.87148363, 125.91344046,   0.50896342]), array([46.51251703,  0.64801144, 72.99609511,  0.        ]), array([116.09331598,  11.33659918,  35.88660486,   4.83014828]), array([160.85543251,   8.47055818, 165.40890948,   5.38344051]), array([139.53188998,   1.14308555, 123.43509497,  14.00658659]), array([ 94.47917195,   0.        , 128.82903196,   0.        ]), array([199.13614577,   7.98787984, 206.16472498,   5.48133689]), array([92.83026561,  0.        , 65.16744932,  0.84855526]), array([158.

Sever Redispatched.
Redispatched
Episode: 21/800, Epsilon:0.97, Average reward: -167.15
Prediction for [500,0,500,0] is: [[-371.55157 -428.62384 -160.92932 -175.06662 -480.9803 ]]
[array([19.19081193,  1.14642244,  9.73004373,  2.18043966]), array([231.76838397,   4.25988615, 236.39220763,   0.95849824]), array([229.7456706 ,   0.        , 211.17057155,   0.        ]), array([139.93216964,   0.87148363, 125.91344046,   0.50896342]), array([142.37596431,   0.        , 115.83940968,   6.6156325 ]), array([190.46885476,   3.29904016, 189.73959837,  16.97352656]), array([197.15635882,   1.1455401 , 106.99793689,   0.        ]), array([230.24128119,   2.40061068, 217.26401545,   8.46348416]), array([62.25691465,  7.82851609, 14.68634033,  4.5740533 ]), array([124.28588686,   0.        ,  99.73414372,   1.6864975 ]), array([ 2.94627432, 17.83775009, 20.96801423, 10.88144392]), array([187.66408035,   3.8853251 , 185.14003341,   3.88934816]), array([12.79524083,  3.53800747,  9.55056017,  2.99

Episode: 22/800, Epsilon:0.97, Average reward: -249.54
Prediction for [500,0,500,0] is: [[-379.0917  -437.16452 -168.75966 -184.83815 -486.40894]]
[array([ 44.47923975,   0.        , 101.55713268,   1.5517727 ]), array([16.98430387,  2.62937183, 78.81897946,  1.1281837 ]), array([ 85.53865184,   0.54092422, 104.63088391,   0.55917621]), array([21.47767626,  2.15901862, 22.38189848,  3.09215601]), array([108.57922681,  14.5124262 ,  34.80834881,   7.09405094]), array([173.77706644,   1.56167107, 167.89819435,   0.30250782]), array([25.35894427,  4.58112397, 41.74814515,  1.82645487]), array([ 80.97235523,   0.93743776, 109.66065402,   0.        ]), array([162.43531024,   0.98789063, 171.50674849,  13.29092799]), array([157.68733342,  16.38444032, 154.09766289,   7.89772391]), array([163.09725271,   3.72488965, 159.92510779,  13.89877946]), array([47.15459488,  3.94593718,  9.27979532,  8.37308902]), array([162.79916971,  11.7330662 , 143.93647983,   3.25985601]), array([181.51472606,   

Episode: 23/800, Epsilon:0.97, Average reward: -248.91
Prediction for [500,0,500,0] is: [[-386.71976 -445.11502 -177.40472 -194.91957 -492.16403]]
[array([161.73982714,   6.59743579, 168.28260723,   2.45626305]), array([161.66113051,   1.35974812, 177.27983584,   3.45489214]), array([161.40952956,   6.07675412, 132.7839447 ,   2.3278031 ]), array([59.2667273 ,  9.39444772, 84.35069661,  4.3949434 ]), array([217.87849807,   5.58097656, 188.10590102,   8.67725969]), array([183.93151189,   2.06827489, 178.23922715,   2.74225422]), array([25.98249443,  9.39444772, 29.18027705,  4.39501803]), array([245.9586061 ,   0.25803993, 243.09275713,   0.33332067]), array([28.36217766, 10.16686899, 23.91522578,  0.74142055]), array([248.46662693,   0.        , 247.52631641,   0.        ]), array([ 79.37286549,  12.39169531, 111.42517534,  10.40861663]), array([3.6487479 , 1.18296218, 6.19139818, 6.90271217]), array([ 87.48904446,   0.        , 117.63655527,   0.        ]), array([103.16129594,   0.  

Episode: 24/800, Epsilon:0.97, Average reward: -296.37
Prediction for [500,0,500,0] is: [[-394.5612  -453.89908 -186.37753 -204.71793 -498.43973]]
[array([89.80993566,  1.23437294, 48.42109635,  1.08119384]), array([162.29026715,   5.95119952, 148.48864145,   7.19381612]), array([40.96407526,  0.        , 47.37534768,  0.57105711]), array([58.65146679,  1.44685433, 39.14064749,  0.91661908]), array([142.37596431,   0.        , 115.83940968,   6.6156325 ]), array([79.11443501,  1.79194665, 25.80856054,  0.        ]), array([ 2.89629123, 12.54009534,  6.37260839, 14.72749899]), array([197.51176796,   2.60117225, 197.81323525,   2.15483475]), array([124.28575555,   7.6820914 , 134.03829441,   9.98895529]), array([36.13383034,  0.        , 52.97602292,  0.        ]), array([194.03635669,   4.56271663, 188.43431272,   7.85407294]), array([16.69507339,  8.22805744, 10.86889311,  9.02814157]), array([139.93216964,   0.87148363, 125.91344046,   0.50896342]), array([182.25337611,   6.65429412, 

Episode: 25/800, Epsilon:0.96, Average reward: -268.3
Prediction for [500,0,500,0] is: [[-402.8002  -462.1353  -194.34195 -214.58105 -505.464  ]]
[array([147.7098035 ,   6.59785643,  37.78889242,   2.45616796]), array([1.47336098e+02, 7.16285635e-01, 5.13577489e+01, 7.01873670e-02]), array([89.80993566,  1.23437294, 48.42109635,  1.08119384]), array([ 5.44934986,  4.1531667 , 57.04785597,  7.55436753]), array([59.2439501 ,  0.51694812, 33.49528259,  4.53343443]), array([82.47249456,  1.35816795, 34.09834842,  0.73727965]), array([127.08057554,   8.3344274 ,  79.32104094,  16.80393274]), array([182.25337611,   6.65429412, 160.26732464,   5.36717165]), array([101.1789317 ,   1.56167107, 152.92612221,   0.30250782]), array([156.21975799,  17.17797742, 150.52147835,   0.        ]), array([245.24711934,   0.        , 247.14723873,   0.31239186]), array([116.09331598,  11.33659918,  35.88660486,   4.83014828]), array([29.98577894,  2.43419661, 71.28936854,  1.05835659]), array([ 3.11694392, 

Episode: 26/800, Epsilon:0.96, Average reward: -276.31
Prediction for [500,0,500,0] is: [[-410.86893 -469.9418  -202.12607 -223.92938 -512.6565 ]]
[array([8.53504326, 6.34537511, 3.3805853 , 7.83884824]), array([223.47667977,   0.        , 205.97586795,   0.        ]), array([226.21977936,   0.        , 228.16835892,   0.        ]), array([ 87.67521166,   0.        , 135.84751545,   8.02588423]), array([125.09498929,   4.74683632,  37.9708251 ,   3.75186396]), array([217.13411832,   1.23115893, 109.65628049,   0.        ]), array([169.03285897,   6.08135229, 113.30528625,  10.65947274]), array([165.36596069,   5.40932862, 168.49661416,   2.8721687 ]), array([229.17362758,   0.        , 237.72288417,   0.83613795]), array([17.03607107,  0.57415953, 18.08797682,  1.99070798]), array([178.01244589,   0.        , 184.52295107,   0.        ]), array([204.37880397,  18.94999119, 197.56357945,  12.38410788]), array([231.15413734,   0.        , 232.28204242,   0.        ]), array([59.2667273 ,

Episode: 27/800, Epsilon:0.96, Average reward: -136.43
Prediction for [500,0,500,0] is: [[-419.7363  -477.06042 -210.11961 -234.1499  -519.11975]]
[array([209.40575792,   0.80181916, 103.7154549 ,   0.32363198]), array([ 27.77659901,   2.85323143, 152.58599353,   2.64667964]), array([203.78335595,   3.38773213, 236.77153569,   0.        ]), array([80.23828734,  9.76998549, 30.01912551,  0.6752392 ]), array([188.92222861,   1.33607199, 196.83357316,   1.68586136]), array([17.73508546,  5.40932862,  7.32499027,  2.8721687 ]), array([24.9744608 , 10.66526163, 25.36304192, 28.31991828]), array([178.80924126,   6.17170871, 167.17345442,   8.67744274]), array([205.03955128,   6.68067701, 161.54160274,   1.33688371]), array([36.13383034,  0.        , 52.97602292,  0.        ]), array([ 3.11694392, 12.80080143,  6.88950692, 15.16425546]), array([6.31711089, 6.47905928, 3.01883229, 8.05385894]), array([116.30624843,   1.20752895, 190.15470509,   0.        ]), array([ 9.10861854,  2.5941012 ,  4

Episode: 28/800, Epsilon:0.96, Average reward: -196.68
Prediction for [500,0,500,0] is: [[-429.30576 -484.2303  -219.08455 -245.05623 -525.6539 ]]
[array([208.72617476,   0.        , 190.49617713,   0.        ]), array([108.57922681,  14.5124262 ,  34.80834881,   7.09405094]), array([ 51.55375962,   0.        , 101.42047496,   1.58859118]), array([223.99444992,   0.62176888, 129.8329622 ,   0.        ]), array([53.49186372,  0.        , 58.71813446,  0.09641957]), array([161.50883771,  12.46687329, 158.8470422 ,  12.67545616]), array([187.96759218,   2.18498528, 190.92831607,   0.        ]), array([17.03607107,  0.57415953, 18.08797682,  1.99070798]), array([218.83857534,   0.        , 215.20484846,   0.        ]), array([75.29085662,  3.62531456, 39.09285939,  3.64380357]), array([191.94433814,   0.        , 185.07905729,   0.99137261]), array([47.15459488,  3.94593718,  9.27979532,  8.37308902]), array([201.45280079,   6.07368929, 196.80936498,  10.65972404]), array([226.69247557,   

Episode: 29/800, Epsilon:0.96, Average reward: -160.36
Prediction for [500,0,500,0] is: [[-440.30508 -491.465   -227.88162 -255.81378 -532.60645]]
[array([89.80993566,  1.23437294, 48.42109635,  1.08119384]), array([236.89117068,   1.24369589, 235.96205956,   0.        ]), array([19.19081193,  1.14642244,  9.73004373,  2.18043966]), array([80.32107125, 16.68595577, 93.80187371, 12.66745954]), array([241.31040042,   0.        , 244.98577044,   0.        ]), array([203.78335595,   3.38773213, 236.77153569,   0.        ]), array([3.92303597, 1.59670796, 1.85826445, 4.03600224]), array([26.55470284,  4.16308196, 61.84181753, 10.82341463]), array([191.16236275,   2.13036462, 177.7092904 ,   3.06313575]), array([208.72617476,   0.        , 190.49617713,   0.        ]), array([158.84975715,  19.30426342, 166.23306   ,   0.61177863]), array([52.44448773,  0.27309931, 36.15072371,  7.40637879]), array([2.46251572e+02, 1.37465498e-01, 2.46054859e+02, 0.00000000e+00]), array([4.91651714e+01, 7.47

Episode: 30/800, Epsilon:0.96, Average reward: -188.48
Prediction for [500,0,500,0] is: [[-451.81433 -499.17386 -236.46997 -267.32187 -538.666  ]]
[array([ 3.58853016,  1.70302667,  1.86156366, 17.7658412 ]), array([25.26827397, 10.05975318, 15.5216719 ,  4.67340151]), array([178.80924126,   6.17170871, 167.17345442,   8.67744274]), array([2.46917475e+02, 2.15918653e-01, 2.40201824e+02, 0.00000000e+00]), array([69.77484315,  0.79097287, 82.37515153,  0.52971365]), array([89.80993566,  1.23437294, 48.42109635,  1.08119384]), array([13.58377775,  6.07985379, 93.5901908 ,  3.09571904]), array([198.15571941,   2.53145553, 203.33621324,   5.02507183]), array([177.53048936,  16.05158353, 175.81510434,   7.68414876]), array([194.99778628,   3.8853251 , 186.50569563,   3.88934816]), array([62.33744761,  2.16205644, 15.05623906,  5.2717229 ]), array([165.36596069,   5.40932862, 168.49661416,   2.8721687 ]), array([130.34744343,   0.25359061,  45.6272956 ,   1.27367327]), array([46.59171233, 13.

Sever Redispatched.
Redispatched
Episode: 31/800, Epsilon:0.95, Average reward: -254.47
Prediction for [500,0,500,0] is: [[-461.66534 -507.23578 -244.75864 -277.88385 -544.61957]]
[array([246.3816307 ,   0.        , 247.80223559,   0.        ]), array([183.88215866,  13.67460946,  92.73328736,   2.41843715]), array([167.90599964,  19.94077354, 122.49940367,   2.06504746]), array([61.63367738,  2.10034041, 30.19317304,  5.13570961]), array([216.47018189,   1.0050236 , 228.31390212,   0.60935115]), array([39.80275809,  0.5922528 , 20.43519326,  1.30628624]), array([197.4193528 ,   2.11355578, 109.39148292,   0.76652742]), array([198.80856254,   1.55569206,  84.35993996,   0.        ]), array([147.14722004,   0.53603879,  46.78291493,   0.        ]), array([ 1.51197999, 19.73644168,  5.82660452,  0.6837718 ]), array([89.80993566,  1.23437294, 48.42109635,  1.08119384]), array([ 6.98661775, 14.53745806, 13.09774911,  0.        ]), array([133.83504308,   6.37948677, 182.36138571,   0.      

Episode: 32/800, Epsilon:0.95, Average reward: -190.93
Prediction for [500,0,500,0] is: [[-470.958   -515.7163  -253.36047 -288.91864 -550.51697]]
[array([55.74779071, 19.94077354, 15.96653664,  2.06504746]), array([228.08601921,   0.88175645, 226.49442573,   3.2922475 ]), array([24.9744608 , 10.66526163, 25.36304192, 28.31991828]), array([202.29418941,   4.14204471, 171.18429003,  12.34052383]), array([148.48083364,   0.8920642 , 219.96882803,   0.        ]), array([155.43749847,   2.87161574, 144.7427331 ,   7.79450478]), array([28.55331011,  2.06355491, 54.66207321,  0.        ]), array([152.64198746,  12.54009534, 156.98066494,  14.72749899]), array([166.48563569,   3.33625212, 171.18322194,   1.03035513]), array([132.10112352,   0.        ,  47.01487118,   0.47975884]), array([169.77187239,   7.33604791, 126.68956328,   7.75064625]), array([210.1281585 ,   0.        , 207.07486372,   1.88458625]), array([ 1.89999062, 14.01429056,  2.30020931, 15.85319424]), array([11.49551264,  6.

Episode: 33/800, Epsilon:0.95, Average reward: -244.11
Prediction for [500,0,500,0] is: [[-480.3235  -524.38074 -261.31528 -299.87964 -557.18146]]
[array([10.15537272,  6.35569576,  8.58159962,  4.80307828]), array([239.05290739,   0.        , 235.45820234,   0.        ]), array([107.9503405 ,   2.34977794, 148.55301426,   2.16344722]), array([233.30091879,   1.91150684, 236.04910569,   1.14556521]), array([150.0179471 ,  25.31689043,  57.41554292,   2.79304227]), array([68.83095151,  6.3367992 , 22.66065303,  2.39193083]), array([163.33221558,   0.        , 163.43630983,   8.30188193]), array([216.18634585,   0.        , 240.59560211,   0.46480478]), array([131.2929676 ,   0.        ,  96.80388743,   1.06920638]), array([124.93232327,   0.        , 177.75333629,   1.82780225]), array([ 1.2970537 ,  4.22057353, 14.75395552, 14.6969582 ]), array([ 77.62485616,   0.        , 160.76985579,   0.        ]), array([ 26.43129149,   0.        , 164.07262616,   3.80061557]), array([182.11201412

In [None]:
Demo_Mode = True
if __name__ == "__main__":
    # Initialize storage
    reward_storage = []
    
    # If previous agents aren't to be loaded, proceed to memory population
    if not load_trained:
        # Initialize simulation
        if 'Vissim' not in globals() or Vissim == None:
            Vissim, Simulation, Network, cache_flag = COMServerDispatch(model_name, vissim_working_directory,\
                                                                        memory_population_length, Start_Fresh, reset_flag = True)
        else:
            Vissim = com.Dispatch("Vissim.Vissim")
            Simulation, Network = COMServerReload(Vissim, model_name, vissim_working_directory,\
                                                  memory_population_length, Start_Fresh, reset_flag = True)
        
    # Setting Random Seed
    Vissim.Simulation.SetAttValue('RandSeed', Random_Seed)
    print ('Random seed set in simulator. Random Seed = '+str(Random_Seed))

    # Deploy Network Parser (crawl network)
    npa = NetworkParser(Vissim)
    print('NetworkParser has succesfully crawled the model network.')
    
    # Initialize agents
    if program == "DQN":
        Agents = [DQNAgent(state_size, action_size, ID, state_type, npa, memory_size, gamma, 0 if Demo_Mode else epsilon_start,\
                           epsilon_end, epsilon_decay, alpha, Vissim, DoubleDQN = False, Dueling = False) for ID in npa.signal_controllers_ids] 
        agents_deployed = True
    elif program == "DDQN":
        Agents = [DQNAgent(state_size, action_size, ID, state_type, npa, memory_size, gamma, 0 if Demo_Mode else epsilon_start,\
                           epsilon_end, epsilon_decay, alpha, Vissim, DoubleDQN = True, Dueling = False) for ID in npa.signal_controllers_ids] 
        agents_deployed = True
    elif program == "DuelingDQN":
        Agents = [DQNAgent(state_size, action_size, ID, state_type, npa, memory_size, gamma, 0 if Demo_Mode else epsilon_start,\
                           epsilon_end, epsilon_decay, alpha, copy_weights_frequency, Vissim, DoubleDQN = True, Dueling = True) for ID in npa.signal_controllers_ids] 
      
        agents_deployed = True
    else:
        print("Incorrect Agent Class selected. Deployment could not be completed.")
        quit()
    
    if agents_deployed:
        print("Deployed {} agent(s) of the Class {}.".format(len(Agents), program))
    
    if Demo_Mode:
        for index, agent in enumerate(Agents):
            Filename = os.path.join(vissim_working_directory, model_name, model_name+'_'+ Session_ID + '_Agent'+str(index)+'.h5')
            agent.model = load_model(Filename)
            Memory_Filename = os.path.join(vissim_working_directory, model_name, model_name+'_'+ Session_ID + '_Agent'+str(index)+'_Memory'+'.p')
            agent.memory = pickle.load(open(Memory_Filename, 'rb'))
        print('Items successfully loaded.')        
        SF.run_simulation_episode(Agents, Vissim, state_type, state_size, memory_population_length, Demo_Mode = True)
        Vissim = None

In [None]:
 # Plotting training progress
    plt.figure(figsize=(8,4.5))
    x_series = range(1,len(reward_storage)+1)
    fit = np.polyfit(x_series,reward_storage,1)
    fit_fn = np.poly1d(fit) 
    plt.plot(x_series,reward_storage, '-b', x_series, fit_fn(x_series), '--r')
    plt.xlabel('Episodes')
    plt.ylabel('Average ageng reward in episode')
    plt.title('Training evolution and trend')
    plt.gca().legend(('Episode Reward','Linear Trend'))
    plt.show()
    

In [None]:
# Plotting training loss
bo = loss/np.max(loss)
x = np.arange(0, len(bo), 128)
plt.figure(figsize=(16,9))
x_series = range(1,len(bo)+1)
plt.plot(x_series,np.sqrt(bo), '-b')
#for xc in x:
#    plt.axvline(xc)
plt.xlabel('Training Epoch')
plt.ylabel('Loss')
plt.title('Model Loss')
plt.gca().legend(('Loss'))
plt.show()

In [None]:
Agents[0].model.predict(np.reshape([0,5,0,5],(1,4)))[0]

In [None]:
minibatch = random.sample(Agents[0].memory,2)
minibatch[0][0][0]

In [None]:
sta = [(5 , 0 ,5, 0),(6 , 0 ,6, 0)]
st= np.asarray(sta)
print(st)
tar = [(5,0,5,0,5),(6,0,6,0,6)]
tg = np.asarray(tar)
Agents[0].model.fit(st,tg,epochs=1, verbose=0)

In [None]:
st = np.asarray(state)
tg = np.asarray(target)
b.model.fit(st,tg,epochs=1,verbose=0)

In [None]:
epsilon_decay = 1.2(epsilon_end - epsilon_start)/(episodes-1)
epsilon_decay

In [None]:
np.zeros(batch_size, 4)