In [None]:
import os, sys
import sumolib
import libtraci as traci
import time
import pandas as pd
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import Element, SubElement, ElementTree
import numpy as np
import shutil
import traci.constants
from multiprocessing import Pool
import gym
from gym import spaces
import random
from tensorforce.environments import Environment
from tensorforce.agents import Agent
from tensorforce.execution import Runner

In [None]:
## Simulation environment
class TrafficSimEnv(Environment):
    def __init__(self):
        super().__init__()
        self.dtype = 'high'
        self.demand = np.ones(28, dtype='float')

    def states(self):
        return dict(type='float', shape=(28,))
    
    def actions(self):
        return dict(type='float', shape=(1,), min_value = 0.05, max_value= 0.059)
    
    def close(self):
        super().close()
        
    def reset(self):
        return np.ones(28, dtype='float')
    
    def execute(self, actions):
        next_state = self.demand
        terminal = False
        
        self.make_add_edge_xml()
        self.run_simulation()
        eval_speed, eval_volume = self.simulation_evaluation()
        reward = (100 - abs(eval_speed - 100)) * 0.08 + (100 - abs(eval_volume - 100)) * 0.02
        print("reward : {:.2f}".format(reward))
        print("demand : {}".format(self.demand))
        print("Speed_acc : {}".format(eval_speed) + " Volume_acc : {}".format(eval_volume))
        
        self.demand[random.randint(0,27)] -= round(actions[0],2)
        next_state = self.demand
        
        if reward >= 0.9:
            print('check')
            terminal = True

        return next_state, terminal, reward
    
    def make_add_edge_xml(self):
        mapping = pd.read_csv('demand/mapping.csv')
        df = pd.read_csv('demand/volume(r).csv')

        targ = [1220032803, 1220030002, 1220032903, 1220030102, 1220025002, 1220025103, 1220014302, 1220014201, 
           1220026302, 1220026202, 1220023902, 1220023801, 1220026103, 1220026003, 1220027402, 1220027502, 
           1220027702, 1220027602, 1220036102, 1220036003, 1220029302, 1220029201, 1220034602, 1220034702, 
           1220003403, 1220003302, 1220020902, 1220020802]

        mapping = mapping[mapping['network'].isin(targ)]
        mapping = mapping.reset_index(drop=True)

        def _pretty_print(current, parent=None, index=-1, depth=0):
            for i, node in enumerate(current):
                _pretty_print(node, current, i, depth + 1)
            if parent is not None:
                if index == 0:
                    parent.text = '\n' + ('\t' * depth)
                else:
                    parent[index - 1].tail = '\n' + ('\t' * depth)
                if index == len(parent) - 1:
                    current.tail = '\n' + ('\t' * (depth - 1))

        # low 2~4
        # middle 5~7
        # high 17~19
        if self.dtype == 'high':
            start = 17
            end = 19
        elif self.dtype == 'middle':
            start = 5
            end = 7
        elif self.dtype == 'low':
            start = 2
            end = 4
        
        cnt = 0
        root = Element("data")
        for i in range(start,end):
            elem = Element("interval")
            elem.set("id",str(cnt))
            elem.set("begin",str(cnt*3600))
            elem.set("end",str((cnt+1)*3600))

            root.append(elem)

            j = 0
            for plink, w in zip(mapping['network'], self.demand):
                sub_elem = SubElement(elem,"edge")
                sub_elem.set("id",str(plink))
                sub_elem.set("entered",str(round(df[str(i%24)][j] * w)))
                j += 1
            cnt += 1

        _pretty_print(root)

        tree = ElementTree(root)
        file_path = 'demand/' + self.dtype + '.xml'
        with open(file_path, "wb") as file:
            tree.write(file, encoding='utf-8', xml_declaration=True)
            
        !python demand/routeSampler.py -r demand/sampleRoutes.rou.xml --edgedata-files demand/high.xml -o demand/hight.xml --threads 40
        return
    
    def run_simulation(self):
        sumoBinary = "/usr/bin/sumo"
        sumoCmd = [sumoBinary, "-c", 'demand/osm.' + self.dtype + '.sumocfg']

        traci.start(sumoCmd, label="sim")

        step = 0
        while step < 7200:
            traci.simulationStep()
            step += 1
        traci.close("sim")

        return
    
    def simulation_evaluation(self):
        mapping = pd.read_csv('demand/mapping.csv')
        base_speed = pd.read_csv('demand/s_base.csv', index_col='Unnamed: 0')
        base_volume = pd.read_csv('demand/v_base.csv', index_col='Unnamed: 0')

        if self.dtype == 'low':
            time = ['2', '3']
        elif self.dtype == 'middle':
            time = ['5', '6']
        elif self.dtype == 'high':
            time = ['17', '18']

        for i in range(2):
            nu = np.zeros(len(base_speed.index))
            for j in range(len(base_speed.index)):
                for s in mapping['topis']:
                    if base_speed.index[j] == s:
                        nu[j] += 1

            vol = {}
            speed = {}
            for j in range(len(base_speed.index)):
                vol[str(base_volume.index[j])] = 0
                speed[str(base_speed.index[j])] = 0

            tree = ET.parse('demand/edgeData_' + self.dtype + '/edge' + str(i) + '.xml')
            root = tree.getroot()

            for edge in root.iter('edge'):
                cnt = 0
                for p in mapping['network']:
                    if edge.get('id') == str(p):
                        break
                    cnt += 1
                if cnt == 506:
                    continue
                vol[str(mapping['topis'][cnt])] += int((int(edge.get('entered')) + int(edge.get('left'))) / 2)
                speed[str(mapping['topis'][cnt])] += float(edge.get('speed')) * 3.6

            for j in range(len(base_speed.index)):
                if vol[str(base_volume.index[j])] != 0:
                    vol[str(base_volume.index[j])] = round(vol[str(base_volume.index[j])] / nu[j])
                    speed[str(base_speed.index[j])] = round(speed[str(base_speed.index[j])] / nu[j], 2)

            m = 0
            n = 0
            for t in time:
                min_bv = np.min(base_volume[t])
                max_bv = np.max(base_volume[t])
                min_v = np.min(list(vol.values()))
                max_v = np.max(list(vol.values()))
                
                for col in base_speed.index:
                    if vol[str(col)] != 0:
                        scaled_base_volume = (base_volume[t][col] - min_bv) / (max_bv - min_bv)
                        scaled_vol = (vol[str(col)] - min_v) / (max_v - min_v)
                        
                        if speed[str(col)] > base_speed[t][col]:
                            m += round(base_speed[t][col] / speed[str(col)], 2) * 100
                        else:
                            m += round(speed[str(col)] / base_speed[t][col], 2) * 100

                        if scaled_vol > scaled_base_volume:
                            n += round(scaled_base_volume / scaled_vol, 2) * 100
                        else:
                            n += round(scaled_vol / scaled_base_volume, 2) * 100
                            
        spd_map = round(m / (len(base_speed.index) * 2), 2)
        vol_map = round(n / (len(base_volume.index) * 2), 2)
        return spd_map, vol_map

    

In [None]:
environment = Environment.create(
    environment=TrafficSimEnv, max_episode_timesteps=280
)

agent = Agent.create(
    agent='ddpg', environment=environment,
    memory= 10000,
    batch_size=10, learning_rate=1e-3
)

for i in range(500):
    states = environment.reset()
    terminal = False
    while not terminal:
        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)
        
        
        
agent.close()
environment.close()