# Main imports

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import sys
import pandas as pd
import gym
import _pickle as cPickle
import city_simulation

In [2]:
if 'SUMO_HOME' in os.environ:
    tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
    sys.path.append(tools)
    import traci
else:
    sys.exit("please declare environment variable 'SUMO_HOME'")

# Env setup

In [3]:
env = gym.make('city_simulation-v0')

In [3]:
sumoCmd = ["/usr/bin/sumo/bin/sumo-gui", "-c", "../sumo_simulation/sim_config/osm.sumocfg"]

In [5]:
def runSimulationSteps(self):
        '''
        This function runs a block of SUMO simulations and returns the emission and
        fuel consumption state. The state the agent percieves will be
        '''

        for i in range(self.steps_in_hour):
            #Check if all vehicles have left the simulation
            if traci.simulation.getMinExpectedNumber() == 0:
                self.done = 1
                sim_results = pd.DataFrame.from_dict(traci.lane.getAllSubscriptionResults())
                self.state =  self.umap.transform(sim_results.values) #sim_results.values # self.pca.transform(sim_results.values)
                traci.close()
                collect()
                #Returns state means for reward calculation
                return sim_results.T.mean()

            else:
                traci.simulationStep()
                traci.gui.screenshot("View #0", "images/"+str(i)+".png")
                collect()

        sim_results = pd.DataFrame.from_dict(traci.lane.getAllSubscriptionResults())
        self.state = self.umap.transform(sim_results.values) #sim_results.values #self.umap.transform(sim_results.values) self.pca.transform(sim_results.values)
        collect()

        return sim_results.T.mean()

# Pytorch Setup

In [6]:
BATCH_SIZE = 32
GAMMA = 0.999
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 200
TARGET_UPDATE = 10

In [7]:
def select_action(state):
    global steps_done
    sample = random.random()
    eps_threshold = EPS_END + (EPS_START - EPS_END) *         math.exp(-1. * steps_done / EPS_DECAY)
    steps_done += 1
    if sample > eps_threshold:
        with torch.no_grad():
            # t.max(1) will return largest column value of each row.
            # second column on max result is index of where max element was
            # found, so we pick action with the larger expected reward.
            return policy_net(state.view([-1,12])).cpu().numpy()
    else:
        return np.random.random(46114)

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [9]:
class DQN(nn.Module):

    def __init__(self, outputs):
        super(DQN, self).__init__()
        self.mlp1 = nn.Linear(12,32)
        self.mlp2 = nn.Linear(32,64)
        self.mlp3 = nn.Linear(64,128)
        self.mlp4 = nn.Linear(128,256)
        self.mlp5 = nn.Linear(256,256)
        self.head = nn.Linear(256, outputs)

    def forward(self, x):
        x = F.relu(self.mlp1(x))
        x = F.relu(self.mlp2(x))
        x = F.relu(self.mlp3(x))
        x = F.relu(self.mlp4(x))
        x = F.relu(self.mlp5(x))

        return self.head(x.view(x.size(0), -1))


In [10]:
n_actions = 46114

In [11]:
policy_net = DQN(n_actions).to(device)
target_net = DQN(n_actions).to(device)
policy_net.load_state_dict(torch.load('output_weights/target/target_net_weights_2019-06-27 09:02:37_ep_11.pt'))
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

DQN(
  (mlp1): Linear(in_features=12, out_features=32, bias=True)
  (mlp2): Linear(in_features=32, out_features=64, bias=True)
  (mlp3): Linear(in_features=64, out_features=128, bias=True)
  (mlp4): Linear(in_features=128, out_features=256, bias=True)
  (mlp5): Linear(in_features=256, out_features=256, bias=True)
  (head): Linear(in_features=256, out_features=46114, bias=True)
)

# Main simulation loop for demo

In [10]:
env.runSimulationSteps = runSimulationSteps
env.sumoCmd = sumoCmd

In [3]:
!mkdir images_demo_emissions

In [4]:
def simulation_for_demo(img_path):

    traci.start(sumoCmd)

    i = 0
    while traci.simulation.getMinExpectedNumber() > 0:
        traci.simulationStep()
        if i % 50 == 0:
            traci.gui.screenshot("View #0", "{}/".format(img_path)+str(i)+".png")
        i += 1

    traci.close()

# Generate demo video

In [7]:
simulation_for_demo('images_demo')

 Retrying in 1 seconds


# Demo video

In [8]:
import imageio

In [9]:
with imageio.get_writer('demo_2.gif', mode='I') as writer:
    for filename in range(0,7150, 50):
        image = imageio.imread('images_demo/{}.png'.format(filename))
        writer.append_data(image)

In [13]:
with imageio.get_writer('demo.gif', mode='I') as writer:
    for filename in range(0,7150, 50):
        image = imageio.imread('images_demo_emissions/{}.png'.format(filename))
        writer.append_data(image)

# Plotly radar charts

In [11]:
import plotly
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

In [24]:
import numpy as np

In [12]:
init_notebook_mode(connected=True)

In [29]:
no_reg = np.random.random(6)
current = np.random.random(6)
agent = np.random.random(6)

In [36]:
data = [
  go.Scatterpolar(
  r = no_reg,
  theta = ['$CO_2$','$CO$','$PM_x$', '$NO_x$', 'Ruido', 'Consumo de Gasolina'],
  fill = 'toself',
  name='Sin Regulación'),
    
  go.Scatterpolar(
  r = current,
  theta = ['$CO_2$','$CO$','$PM_x$', '$NO_x$', 'Ruido', 'Consumo de Gasolina'],
  fill = 'toself',
  name='Regulación Actual'),
    
  go.Scatterpolar(
  r = agent,
  theta = ['$CO_2$','$CO$','$PM_x$', '$NO_x$', 'Ruido', 'Consumo de Gasolina'],
  fill = 'toself',
  name='Regulación Propuesta')

]

layout = go.Layout(
  polar = dict(
    radialaxis = dict(
      visible = True,
      range = [0, 1]
    )
  ),
  showlegend = True
)

fig = go.Figure(data=data, layout=layout)
iplot(fig, filename = "radar/basic")