In [None]:
from Vissim_env_class import environment
from Actor_critic_class import ACAgent
from MasterAC_Agent import MasterAC_Agent
from MasterDQN_Agent import MasterDQN_Agent

# Network Specific Libraries
from Balance_Functions import balance_dictionary

# General Libraries
import numpy as np 
import pylab as plt
import os
import shutil
import csv
import pandas as pd
import json

%matplotlib inline
%load_ext autoreload
%autoreload 2

# Balance RL DQN Partial Training

In [None]:
model_name  = 'Balance_integrated'
vissim_working_directory = 'C:\\Users\\acabrejasegea\\OneDrive - The Alan Turing Institute\\Desktop\\ATI\\0_TMF\\MLforFlowOptimisation\\Vissim\\'
#vissim_working_directory = "E:\\OneDrive - University of Warwick\\Desktop\\ATI\\0_TMF\\MLforFlowOptimisation\\Vissim"

# State the internal IDs of the intersections used during training, according to topology
intersection_ids = ["1_2_4", "1_2_4", "3", "1_2_4", "5", "6", "7",\
                    "8", "9", "10", "11_12", "11_12", "13", "14"]

## Simulation Parameters
Random_Seed = 44
sim_length = 9001
agent_type = "DDQN"
actions = 'default_actions'     # 'default_actions' or 'all_actions'

## DQN Hyperaramenters
episodes = 500
copy_weights_frequency = 10
timesteps_per_second = 1
PER_activated = True
memory_size = 5000
batch_size = 128
batches_per_episode = 10

alpha = 0.00005
gamma = 0.95

# Load balance dictionary
Balance_dictionary = balance_dictionary(agent_type)

# State session ID
Session_ID = model_name + "_" + actions + "_" + str(episodes) + "_" + str(sim_length-1) + "_" + agent_type
print("Session ID: {}".format(Session_ID))

In [None]:
# Exploration Schedule ("linear" or "geometric")
exploration_schedule = "geometric"
epsilon_start = 0.2
epsilon_end   = 0.01
epsilon_min   = 0.01

def choose_schedule(exploration_schedule, espilon_start, epsilon_end, epsilon_min, episodes):
    if exploration_schedule == "linear":
        epsilon_decay = 1.2*(epsilon_end - epsilon_start)/(episodes-1)
        epsilon_sequence = [epsilon_start + epsilon_decay * entry for entry in range(episodes+1)]
        epsilon_sequence = [epsilon_min if entry < epsilon_min else entry for entry in epsilon_sequence]
    elif exploration_schedule == "geometric":
        epsilon_decay = np.power(epsilon_end/epsilon_start, 1./(episodes-1)) # Geometric decay
        epsilon_sequence = [epsilon_start * epsilon_decay ** entry for entry in range(episodes+1)]
        epsilon_sequence = [epsilon_min if entry < epsilon_min else entry for entry in epsilon_sequence]
    elif exploration_schedule == "entropy":
        pass
    else:
        print("ERROR: Unrecognized choice of exploration schedule.")
        
    # Plotting exploration schedule
    plt.figure(figsize=(8,4.5))
    x_series = np.array(range(1,episodes+1))
    y_series = epsilon_sequence[0:episodes]
    plt.plot(x_series, y_series, '-b')
    plt.xlabel('Episodes')
    plt.ylabel('Ratio of random exploration')
    plt.title('Exploration schedule')
    plt.show()
    return(epsilon_sequence)

epsilon_sequence = choose_schedule(exploration_schedule, epsilon_start, epsilon_end, epsilon_min, episodes)


In [None]:
## Gather files from pretrained Balance_intX Runs

# Re-create the Intersection IDs used during training
single_intersection_agent_ids = ['Balance_int'+ int_id for int_id in intersection_ids]
# Re-create the Session IDs used during training
single_intersection_session_ids = [ agent_id + "_" + actions + "_" + str(episodes) + "_" + str(sim_length-1) + "_" + agent_type
 for agent_id in single_intersection_agent_ids]

# Use the two previous set of strings to point to the location of the agent files
agents_sources_origin = [os.path.join(vissim_working_directory, single_intersection_agent_ids[idx],\
                                   "Agents_Results", agent_type, session_id) for idx, session_id in enumerate(single_intersection_session_ids)]

# Set the destination path into the new model
# Intersections that trained together are now split into individual forlders
agents_sources_destination = [os.path.join(vissim_working_directory, "Balance_Integrated", "Agents_source",\
                                         agent_type, Session_ID,"Agent{}".format(idx)) for idx, session_id in enumerate(single_intersection_session_ids)]

# Check if output folders exist, if they do not, then create them
for folder in agents_sources_destination:
    if not os.path.exists(folder):
        os.makedirs(folder)

# Copy all training files from their origin to the Balance_integrated archive
for idx, (origin, destination) in enumerate(zip(agents_sources_origin, agents_sources_destination)):
    print("Agent {}".format(idx))
    #print("Listing files in:", origin)
    
    # List all files in origin folder
    files_in_folder = os.listdir(origin)
    print("Copying files...")
    
    # Copy them one by one
    for file in files_in_folder:
        shutil.copy(os.path.join(origin,file), destination)
print("All Files Successfully Copied.")

In [None]:
Balance_integrated_MultiDQN_Agents = MasterDQN_Agent(model_name, vissim_working_directory, sim_length, Balance_dictionary, actions,\
                gamma, alpha, agent_type, memory_size, PER_activated, batch_size, batches_per_episode, copy_weights_frequency, epsilon_sequence,\
                Random_Seed, timesteps_per_second, Session_ID, verbose = True)

In [None]:
Balance_integrated_MultiDQN_Agents.load_isolated(500, best = True)

In [None]:
Balance_integrated_MultiDQN_Agents.demo()