# Simulation results and road aggregation

This notebook contains the code to generate the results and the figure from the simulation of the baseline. It also contains the code to aggregate the edges of the road network into roads.

(The first time you should execute all the cell, then if you want just analyze results can skip cell group 1)

In [None]:
import json
from result_utils import *
import sumolib
import geopandas as gpd
from shapely.geometry import Point
import pandas as pd
import html
from sklearn.preprocessing import MinMaxScaler
import osmnx as ox
import networkx as nx
import matplotlib as mpl
import folium
import numpy as np
from tqdm.auto import tqdm

#### parameters

In [None]:
# road network path
road_network_path = "../data/road_net/Milano_big/Milano_big_road_network.net.xml"

city_boundaries_path = '../data/shapes/Milano_big_boundaries.geojson'
path_road_edge_mapping = '../data/road_net/Milano_big/Milano_big_road_edge_map.csv'

folder_experiments = "../data/simulations/Milano_big/baseline/sumo_out/"
nav_str = "baseline"

# output paths
path_results = "../data/simulations/Milano_big/baseline/results/"
path_plots = "../data/simulations/Milano_big/baseline/plots/"

Create a dictionary that associates configuration and filenames

In [None]:
dict_exps = create_dict_exps(folder_experiments, "baseline")

## 1. Road aggregation

Aggregation of the edges considering the name of the road associated through the road network. Because not all the edges have a roadname, edges without name where associated using reverse geocoding from arcgis

In [None]:
road_network = sumolib.net.readNet(road_network_path, withInternal=True)

In [None]:
print('#Edges road net: '+str(len(road_network.getEdges())))

In [None]:
# create a dictionary with the name of road as key, and a list as values where the first element
# is the length of the road in meters, and the other element of the list are the edge_ids of the road

dict_name_edges = {}

for edge in road_network.getEdges():
    # Manage internal edge names
    if edge.getFunction() == 'internal':
        edge_id = edge.getID()
        edge_length = road_network.getEdge(edge_id).getLength()
        edge_name_in = list(road_network.getEdge(edge_id).getIncoming())[0].getName()
        edge_name_out = list(road_network.getEdge(edge_id).getOutgoing())[0].getName()
        # Assign the name if they are the same or at least one is different from ''.
        # If the two names are != '' and different, the policy is to assign the incoming edge name
        # because it doesn't affect the total emissions
        if edge_name_in == edge_name_out:
            edge_name = edge_name_in
        elif edge_name_in == '' and edge_name_out != '':
            edge_name = edge_name_out
        elif edge_name_in != '' and edge_name_out == '':
            edge_name = edge_name_in
        else:
            edge_name = edge_name_in
    # Classic edge
    else:
        edge_id = edge.getID()
        edge_name = road_network.getEdge(edge_id).getName()
        edge_length = road_network.getEdge(edge_id).getLength()
    if edge_name in dict_name_edges:
        dict_name_edges[edge_name].append(edge_id)
        dict_name_edges[edge_name][0] += edge_length
    else:
        dict_name_edges[edge_name] = [edge_length, edge_id]

In [None]:
print('#roads: '+str(len(dict_name_edges)))
print('Edges without name: '+str(len(dict_name_edges[''])))
print('Meters of edges without name: '+str(dict_name_edges[''][0]))

In [None]:
road_lengths = [round(sublist[0],2) for sublist in list(dict_name_edges.values())]
road_lengths.sort(reverse=True)

In [None]:
plt.figure(figsize=(10, 3))
plt.bar(range(len(dict_name_edges)), road_lengths, width=1)
plt.title('Length per road')
plt.xlabel('road_id')
plt.ylabel('lentgh (m)')
plt.show()

In [None]:
plt.figure(figsize=(10, 3))
plt.hist(road_lengths, bins=100)
plt.title('Road histogram')
plt.xlabel('Road length (m)')
plt.ylabel('#road')
plt.show()

### 1.1 Merge unnamed roads using reverse geocoding

$\approx 5 min$ with 500 no-named roads <br>
$\approx 2 hours 15 min$ with 12k no-named roads

In [None]:
%%time

unnamed_road_edge = {}
for edge in dict_name_edges[''][1:]:
    # Get the midpoint between node from and to of the edge
    coord_node_from = road_network.getEdge(edge).getFromNode().getCoord()
    coord_node_to = road_network.getEdge(edge).getToNode().getCoord()
    coord_midpoint = (round((coord_node_from[0] + coord_node_to[0])/2,2), round((coord_node_from[1] + coord_node_to[1])/2,2))
    # Get lon and lat of the midpoint
    lonlat_midpoint = road_network.convertXY2LonLat(coord_midpoint[0], coord_midpoint[1])
    
    # Get address using arcgis
    response = gpd.tools.reverse_geocode(Point(lonlat_midpoint), provider='arcgis')
    address = response['address'][0]
    # Clean address
    street = address.split(',')[0]
    # Check if the street contains the house number, and remove it
    if street.split(' ')[-1].isnumeric():
        street = ' '.join(street.split(' ')[:-1])
    if street in unnamed_road_edge:    
        unnamed_road_edge[street].append(edge)
    else:
        unnamed_road_edge[street] = [edge]

In [None]:
print('#founded roadname: '+str(len(unnamed_road_edge)))
print('Previous #road: '+str(len(dict_name_edges)))

In [None]:
for roadname, edges in unnamed_road_edge.items():
    length = 0
    for edge in edges:
        length += road_network.getEdge(edge_id).getLength()
    if roadname in dict_name_edges:
        dict_name_edges[roadname] = dict_name_edges[roadname] + edges
        dict_name_edges[roadname][0] += length
    else:
        dict_name_edges[roadname] = [length] + edges

In [None]:
dict_name_edges['unnamed'] = dict_name_edges['']
del dict_name_edges['']

In [None]:
print('#road after reverse geocoding: '+str(len(dict_name_edges)))

### 1.2 Saving road-edge map csv

In [None]:
road_edge_map = {'road': [], 'edge_id': [], 'edge_len': []}

for roadname, edges in dict_name_edges.items():
    # iterate through edges skipping the road length
    for edge in edges[1:]:
        road_edge_map['road'].append(roadname)
        road_edge_map['edge_id'].append(edge)
        road_edge_map['edge_len'].append(road_network.getEdge(edge).getLength())   

In [None]:
pd.DataFrame(road_edge_map).drop_duplicates().to_csv(path_road_edge_mapping, index=False)

### 1.3 Assign city to roads with the same name

If needed to download the boundaries of a city you can use [overpass turbo](https://overpass-turbo.eu/#) which is a tool of OSM to query at different level OSM maps. In this case I need of the administrative boundaries of a city, so I used this query: 


```
/*
This has been generated by the overpass-turbo wizard.
*/

[out:json][timeout:25];

// gather results
(
  // query part for: “boundary=administrative and admin_level=8”
  relation["boundary"="administrative"]["admin_level"="8"]({{bbox}});
);

// print results
out body;
>;
out skel qt;
```

In [None]:
road_edge_map = pd.read_csv(path_road_edge_mapping)

In [None]:
boundaries = gpd.read_file(city_boundaries_path)
# Keep only boundaries
boundaries.dropna(subset=['boundary'], inplace=True)
boundaries = boundaries[['name', 'geometry']]

In [None]:
boundaries.head()

In [None]:
grouped_road = road_edge_map.groupby('road').agg({'edge_id': lambda x: x.tolist()})
grouped_road.reset_index(inplace=True)

$\approx 20 min$ with 5000 roads

In [None]:
prefix_road = ['Piazza', 'Via', 'Viale', 'Corso', 'Largo', 'Piazzale',
               'Rotatoria', 'Vicolo', 'Parco', 'Ponte', 'unnamed']

pbar = tqdm(total=len(grouped_road))

for index, row in grouped_road.iterrows():
    
    # Not split road like 'Tangenziale' or 'A1', etc.
    if row['road'].split(' ')[0] in prefix_road:
        bound_road = []
        
        for edge in row['edge_id']:
            # find the midpoint of the edge
            coord_node_from = road_network.getEdge(edge).getFromNode().getCoord()
            coord_node_to = road_network.getEdge(edge).getToNode().getCoord()
            coord_midpoint = (round((coord_node_from[0] + coord_node_to[0])/2,2), round((coord_node_from[1] + coord_node_to[1])/2,2))
            lon_mid, lat_mid = road_network.convertXY2LonLat(coord_midpoint[0], coord_midpoint[1])
            p = Point(lon_mid, lat_mid)
            
            # find the administrative boundaries of that edge
            for index_b, row_b in boundaries.iterrows():
                if row_b['geometry'].contains(p):
                    bound_road.append(row_b['name'])
                    break
    
        # check how many administrative boundaries were assigned to each road
        if len(set(bound_road)) > 1:
            for i, edge in enumerate(row['edge_id']):
                road_edge_map.loc[road_edge_map['edge_id']==edge, 'road'] += ', '+bound_road[i]
    
    pbar.update(1)            

In [None]:
road_edge_map.to_csv(path_road_edge_mapping, index=False)

In [None]:
road_len = road_edge_map.groupby(by=['road']).sum().reset_index().sort_values(by=['edge_len'], ascending=False)

In [None]:
plt.figure(figsize=(10, 3))
plt.bar(range(len(road_len)), road_len['edge_len'], width=1)
plt.title('Length per road')
plt.xlabel('road_id')
plt.ylabel('lentgh (m)')
plt.savefig(path_plots+'length_roads.png', bbox_inches ="tight")
plt.show()

In [None]:
plt.figure(figsize=(10, 3))
plt.hist(road_len['edge_len'], bins=100)
plt.title('Road histogram')
plt.xlabel('Road length (m)')
plt.ylabel('#road')
plt.savefig(path_plots+'road_hist.png', bbox_inches ="tight")
plt.show()

In [None]:
#for road, edges in dict_name_edges.items():
#    if edges[0]>=1200:
#        print(road)

### 1.4 Road_measures.csv

Aggregate the results of the simulation for each road

In [None]:
road_edge_map = pd.read_csv(path_road_edge_mapping)

In [None]:
def create_road_measures(dict_exps, folder_experiments, main_experiment_name, road_edge_map):
    for exp_id, exp_folder_name in dict_exps[main_experiment_name].items():
        exp_df = pd.read_csv(folder_experiments+exp_folder_name+"/edge_measures.csv")
        merged = pd.merge(road_edge_map, exp_df, on=['edge_id'])
        grouped = merged.groupby('road').agg({'edge_id': lambda x: x.tolist(),
                                              'edge_len': 'sum',
                                              'total_co2': 'sum',
                                              'total_nox': 'sum',
                                              'total_fuel': 'sum',
                                              'total_v_edge': 'sum'})
        grouped.reset_index(inplace=True)
        grouped.to_csv(folder_experiments+exp_folder_name+"/road_measures.csv", index=False)

In [None]:
create_road_measures(dict_exps, folder_experiments, 'baseline', road_edge_map)

## 2. Results by roads

Analysis of the results of the simulation by road with plots

In [None]:
road_edge_map = pd.read_csv(path_road_edge_mapping)

In [None]:
# Aggregate all experiments results and compute the mean and the std of the 'total_of' column.
# It returns a dictionary with keys = roadnames and list [mean, std].

def create_dict_total_per_road(dict_exps, folder_experiments, main_experiment_name, total_of):
    dict_total = {}
    for exp_id, exp_folder_name in dict_exps[main_experiment_name].items():
        exp_df = pd.read_csv(folder_experiments+exp_folder_name+"/road_measures.csv")
        
        for ind, row in exp_df.iterrows():
            if row['road'] in dict_total:
                dict_total[row['road']].append(row[total_of])
            else:
                dict_total[row['road']] = [row[total_of]]
    
    list_df = []
    for road, total in dict_total.items():
        list_df.append([road, np.array(total).mean(), np.array(total).std()])
    df = pd.DataFrame(list_df, columns=['road', 'mean', 'std'])
        
    return df

#### 1. Total CO2 per road

Overall emissions of CO2 per road

In [None]:
df_total_co2_road = create_dict_total_per_road(dict_exps, folder_experiments, 'baseline', 'total_co2')

In [None]:
print('Total CO2: {:.3e}'.format(df_total_co2_road['mean'].sum()))

In [None]:
df_total_co2_road.sort_values(by=['mean', 'std'], ascending=False, inplace=True)

In [None]:
plt.figure(figsize=(10, 3))
#plt.bar(range(len(mean_list)), mean_list, yerr=std_list)
plt.bar(range(df_total_co2_road.shape[0]), df_total_co2_road['mean'], yerr=df_total_co2_road['std'])
plt.title('CO2 per road')
plt.xlabel('road_id')
plt.ylabel('CO2 (mg)')
plt.show()

In [None]:
plt.figure(figsize=(10, 3))
plt.bar(df_total_co2_road['road'].iloc[:50].apply(html.unescape), df_total_co2_road['mean'].iloc[:50], yerr=df_total_co2_road['std'].iloc[:50])
plt.title('CO2 per road')
plt.xlabel('roadname')
plt.ylabel('CO2 (mg)')
plt.xticks(rotation='vertical')
plt.savefig(path_plots+'CO2_first50.png', bbox_inches ="tight")
plt.show()

In [None]:
first_50 = df_total_co2_road['road'].iloc[:50]

Compute the CO2 per meter for each road

In [None]:
df_co2_len = pd.merge(road_edge_map.groupby('road')['edge_len'].sum(), df_total_co2_road, on=['road'])

In [None]:
df_co2_len['mean_len'] = df_co2_len['mean']/df_co2_len['edge_len']
df_co2_len['std_len'] = df_co2_len['std']/df_co2_len['edge_len']

In [None]:
df_co2_len.sort_values(by=['mean_len', 'std_len'], ascending=False, inplace=True)

In [None]:
plt.figure(figsize=(10, 3))
plt.bar(df_co2_len['road'].iloc[:50].apply(html.unescape), df_co2_len['mean_len'].iloc[:50], yerr=df_co2_len['std_len'].iloc[:50])
plt.title('CO2/length per road')
plt.xlabel('roadname')
plt.ylabel('CO2/length (mg/m)')
plt.xticks(rotation='vertical')
plt.savefig(path_plots+'CO2_m_first50.png', bbox_inches ="tight")
plt.show()

In [None]:
#df_co2_len['mean_len'] = MinMaxScaler().fit_transform(df_co2_len['mean_len'].values.reshape(-1,1))
#df_co2_len['mean'] = MinMaxScaler().fit_transform(df_co2_len['mean'].values.reshape(-1,1))
#df_co2_len['mean_len'] = df_co2_len['mean_len']*df_co2_len['mean'].max()

In [None]:
df_co2_len2 = df_co2_len
df_co2_len2['road'] = df_co2_len2['road'].apply(html.unescape)
_ = df_co2_len2.iloc[:50].plot(kind='bar', x='road', y=['mean', 'mean_len'], secondary_y='mean_len',
                               width=0.6, rot=90, figsize=(10,3))
plt.title('CO2 vs CO2/length')
ax1, ax2 = plt.gcf().get_axes()
ax1.set_ylabel('CO2 mean (mg)')
ax2.set_ylabel('CO2\length (mg\m)')
plt.savefig(path_plots+'CO2_vs_first50.png', bbox_inches ="tight")
plt.show()

In [None]:
first_50_normalized = df_co2_len['road'].iloc[:50]

In [None]:
print('Different roads bewteen the two metrics: ' + str(len(np.setdiff1d(first_50, first_50_normalized))))

In [None]:
df_co2_len.sort_values(by=['mean', 'std'], ascending=False, inplace=True)

In [None]:
plt.errorbar(df_co2_len['edge_len'], df_co2_len['mean'], yerr=df_co2_len['std'], ms=4, fmt='o', ecolor='r', elinewidth=1)
#plt.yscale('log')
#plt.xscale('log')
#for i in range(20):
#    plt.annotate(html.unescape(df_co2_len['road'].iloc[i]), (df_co2_len['edge_len'].iloc[i], df_co2_len['mean'].iloc[i] + 0.2), size=6)
plt.title('CO2 - road length correlation')
plt.ylabel('CO2 (mg)')
plt.xlabel('Road length (m)')
plt.savefig(path_plots+'CO2_length_corr.png', bbox_inches ="tight")
#plt.savefig(path_plots+'CO2_length_corr_name.png', bbox_inches ="tight")
plt.show()

#### 2. Gini index CO2 per road

In [None]:
def gini(array):
    """Gini coefficient for array >= 0. Faster than the standard Gini"""
    array.sort()
    # values can't be zero
    array = array + 0.0000001
    index = np.array(np.arange(1, array.shape[0]+1))
    n = array.shape[0]
    return (np.sum((2 * index - n  - 1) * array)) / (n * np.sum(array))

In [None]:
def gini_coefficient(x):
    """Compute Gini coefficient of array of values"""
    diffsum = 0
    for i, xi in enumerate(x[:-1], 1):
        diffsum += np.sum(np.abs(xi - x[i:]))
    return diffsum / (len(x)**2 * np.mean(x))

In [None]:
def create_dict_gini(dict_exps, folder_experiments, main_experiment_name, gini_of):
    dict_gini = {}
    for exp_id, exp_folder_name in dict_exps[main_experiment_name].items():
        exp_df = pd.read_csv(folder_experiments+exp_folder_name+"/road_measures.csv")
        gini_array = np.array(exp_df[gini_of])
        dict_gini[exp_id] = gini(gini_array)
    return dict_gini

In [None]:
dict_gini_co2 = create_dict_gini(dict_exps, folder_experiments, 'baseline', 'total_co2')

with open(path_results+'co2_gini.json', 'w') as fp:
    json.dump(dict_gini_co2, fp)

In [None]:
gini_co2 = np.array(list(dict_gini_co2.values()))
print('Gini coefficient CO2 per road mean: '+str(gini_co2.mean()))
print('Gini coefficient CO2 per road std: '+str(gini_co2.std()))   

#### 3. CO2 plot OSMnx

In [None]:
road_network = sumolib.net.readNet(road_network_path, withInternal=False)

In [None]:
len(road_network.getEdges())

In [None]:
G = nx.Graph()
#G = ox.graph_from_address('Milano, Lombardia, Italia', dist=5000, network_type='drive')
#G = ox.graph_from_point((45.469262, 9.182007), dist=4000, network_type='drive') #Milano
G = ox.graph_from_point((45.469262, 9.182007), dist=12000, network_type='drive') #Milano_big

In [None]:
ox.plot_graph(G, bgcolor='white', node_size=0.1, edge_linewidth=0.5)

In [None]:
# Map sumo edge id in road network with (u,v,key) pair of edges in osmnx graph. 
# To edge association is done computing the lonlat of midpoint edge and finding the nearest edge in the graph. 

def map_sumo_osm_edges(road_net, osm_graph):

    list_edge = []
    list_lon = []
    list_lat = []

    for edge in road_net.getEdges():
        # Get the midpoint between node from and to of the edge
        coord_node_from = edge.getFromNode().getCoord()
        coord_node_to = edge.getToNode().getCoord()
        coord_midpoint = (round((coord_node_from[0] + coord_node_to[0])/2,2), round((coord_node_from[1] + coord_node_to[1])/2,2))
        # Get lon and lat of the midpoint
        lon, lat = road_net.convertXY2LonLat(coord_midpoint[0], coord_midpoint[1])
        # round sumo lonlat for the comparison with osmnx lonlat
        lon, lat = round(lon, 7), round(lat, 7)
        list_lon.append(lon)
        list_lat.append(lat)
        list_edge.append(edge.getID())

    osmnx_edges = ox.nearest_edges(osm_graph, list_lon, list_lat)
    df = pd.DataFrame(list(zip(list_edge, osmnx_edges)), columns =['edge_id', 'edge_osmnx'])
    #df = dict(zip(list_edge, osmnx_edges))
    
    return df

In [None]:
map_edge_sumo_osmnx = map_sumo_osm_edges(road_network, G)

In [None]:
# Initialize the plot dataframe with road, sumo edge_id, osmnx edge_id, mean per road

def plot_df(df_exps, road_edge_map, sumo_osm_edge_map):
    road_edge_map_no_intern = road_edge_map[~road_edge_map['edge_id'].astype(str).str.startswith(':')]
    road_edge_mean_map = pd.merge(road_edge_map_no_intern, df_exps, on=['road'])
    df = pd.merge(road_edge_mean_map, map_edge_sumo_osmnx, on=['edge_id'])
    return df

In [None]:
df_co2_road = create_dict_total_per_road(dict_exps, folder_experiments, 'baseline', 'total_co2')

df_co2_plot = plot_df(df_co2_road, road_edge_map, map_edge_sumo_osmnx)

In [None]:
def add_attribute_to_graph(graph, df_plot, attr_name):
    # Initialize co2 attribute in the graph
    for edge in graph.edges:
        G[edge[0]][edge[1]][edge[2]][attr_name] = None
        
    # Set co2 attribute based on mean per road
    for edge, mean in zip(df_plot['edge_osmnx'], df_plot['mean']):
        if mean == 0:
            graph[edge[0]][edge[1]][edge[2]][attr_name] = None
        else:
            graph[edge[0]][edge[1]][edge[2]][attr_name] = mean

In [None]:
add_attribute_to_graph(G, df_co2_plot, 'co2')

In [None]:
# colormap
ec = ox.plot.get_edge_colors_by_attr(G, attr='co2', cmap='autumn_r', na_color='lightgrey')

In [None]:
fig, ax = ox.plot_graph(G, bgcolor='white', node_size=0.1, edge_linewidth=1, edge_color=ec, show=False)

# colorbar
norm = mpl.colors.Normalize(vmin=min(v for v in nx.get_edge_attributes(G, 'co2').values() if v is not None),
                            vmax=max(v for v in nx.get_edge_attributes(G, 'co2').values() if v is not None))
cbar = fig.colorbar(plt.cm.ScalarMappable(norm=norm, cmap='autumn_r'), ax=ax, shrink=0.5)
cbar.set_label('CO2 emission (mg)')
plt.title('CO2 emission per road')
#plt.savefig(path_plots+'OSMnx_CO2.png', bbox_inches ="tight")
plt.show()

#### 4. CO2 plot folium

In [None]:
# generate a dataframe to plot a sumo network in folium
# df_exp: dataframe with roadname, mean, std
# road_edge_map: dataframe with map roadname - sumo edge_id
# value_of_cmap: column values to use for the colormap
# cmap_name: name of the colormap

def plot_df_folium(df_exp, road_net, road_edge_map, value_of_cmap, cmap_name):
    # merge roadname - edge_id with values to plot
    road_edge_map_no_intern = road_edge_map[~road_edge_map['edge_id'].astype(str).str.startswith(':')]
    df = pd.merge(road_edge_map_no_intern, df_exp, on=['road'])
    
    # compute the colormap
    norm = mpl.colors.Normalize(vmin=df[df[value_of_cmap] != 0][value_of_cmap].min(),
                            vmax=df[value_of_cmap].max())
    cmap = plt.cm.get_cmap(cmap_name)
    colors = cmap(norm(df[value_of_cmap]))
    colors = [mpl.colors.to_hex(c, keep_alpha=True) for c in colors]
    
    # compute the edge lonlat to be plotted in folium
    list_from = []
    list_to = []

    for edge in df['edge_id']:
        # Compute lat and lon, from and to, for each edge
        coord_node_from = road_net.getEdge(edge).getFromNode().getCoord()
        coord_node_to = road_net.getEdge(edge).getToNode().getCoord()
        lon_from, lat_from = road_net.convertXY2LonLat(coord_node_from[0], coord_node_from[1])
        lon_to, lat_to = road_net.convertXY2LonLat(coord_node_to[0], coord_node_to[1])

        list_from.append((lat_from, lon_from))
        list_to.append((lat_to, lon_to))
        
    # update the dataframe
    df['from'] = list_from
    df['to'] = list_to
    df['color'] = colors
    
    return df

In [None]:
df_plot_co2 = plot_df_folium(df_co2_road, road_network, road_edge_map, 'mean', 'autumn_r')

In [None]:
m = folium.Map(location=[45.469262, 9.182007],
               tiles='CartoDB Positron',
               zoom_start=13,
               attr='CartoDB')
               #png_enabled=True)

In [None]:
for index, row in df_plot_co2.iterrows():
    folium.PolyLine([row['from'], row['to']],
                     tooltip=row['road']+' - CO2: {:.3e}'.format(row['mean']),
                     color=row['color'], weight=1.5).add_to(m)

In [None]:
m

In [None]:
# Export map as png

#import io
#from PIL import Image

#img_data = m._to_png(5)
#img = Image.open(io.BytesIO(img_data))
#img.save(path_plots+'folium_map_CO2.png')

In [None]:
# Export map as html
#m.save(path_plots+'folium_map_CO2.html')

## 3. Select roads to be removed

Create a csv with the road to be removed and the associated edge_id

In [None]:
removed_path = '../data/simulations/Milano_big'

In [None]:
df_co2_len.sort_values(by=['mean_len', 'std_len'], ascending=False, inplace=True)

Create dictionary with bin per road length with respect road in each bin. It is used to extract random roads in the same bin of the removed road

In [None]:
road_len = road_edge_map.groupby(by=['road']).agg({'edge_len': 'sum'}).reset_index()

In [None]:
road_len.shape

In [None]:
plt.figure(figsize=(10, 3))
plt.hist(road_len[road_len['edge_len']<10000]['edge_len'], bins=100)
plt.title('Road histogram')
plt.xlabel('Road length (m)')
plt.ylabel('#road')
plt.xticks(np.arange(0, 10000, step=1000))
plt.show()

In [None]:
# double grain bin
#bins = list(np.arange(0, 1000, 200))+list(np.arange(1000, 9000, 500))
bins = list(np.arange(0, 1000, 100))+list(np.arange(1000, 2000, 200))+list(np.arange(2000, 90000, 2000))
road_len['bin'], b = pd.cut(x=road_len['edge_len'], bins=bins, retbins=True, include_lowest=True)

In [None]:
road_len_dict = {(k.left, k.right): [] for k in road_len['bin'].unique()}

In [None]:
for idx, row in road_len.iterrows():
    b = row['bin']
    road_len_dict[(b.left, b.right)].append(row['road'])

In [None]:
# check if all roads are in the dictionary
t = 0
for k, v in road_len_dict.items():
    t+=len(v)
    
t

In [None]:
# Select the top value road from the experiment. If flag = 'perc' the value is %, if flag = 'abs' the value is the #.
# The df may be sorted, because the first values were selected (if rand=True, doesn't matter).
# If rand=True this function return also the random value road such that the length is in range [len_top-eps, len_top+eps]

def road_to_remove(df, flag, value, rand=False, eps=0.05):
    n = df.shape[0]
    if flag == 'perc':
        selected = round(n*value/100)
    elif flag == 'abs':
        selected = value
        
    top_road = np.array(df['road'].iloc[:selected])
    
        
    if rand:
        edge_len = df[df['road'].isin(top_road)]['edge_len'].sum()
        df_shuffled = df.sample(frac=1)
        index_rand = []
        while True:
            index_rand = np.random.randint(0, n-1, selected)
            edge_len_rand = df_shuffled.loc[index_rand]['edge_len'].sum()
            if edge_len_rand >= edge_len-(edge_len*eps) and edge_len_rand <= edge_len+(edge_len*eps):
                break
        return top_road, np.array(df_shuffled['road'].loc[index_rand])
    
    else:  
        return top_road   

In [None]:
# Select the top value road from the experiment. The df may be sorted, because the top value roads are selected.
# rand_dict is a dictionary with key (min,max) that is the bin from which extract the random road.
# bins the list of bin's extremes used to create the dictionary.
# n_rand is the number of random extraction for the top value roads.

def road_to_remove_v2(df, value, rand_dict, bins, n_rand=1):
        
    top_road = np.array(df['road'].iloc[:value])
    
    rand_road_list = []
    for i in range(n_rand):
        rand_roads = []
        for road in top_road:
            road_len = df[df['road']==road]['edge_len'].values
            b = pd.cut(road_len, bins, include_lowest=True)
            road_to_extract = rand_dict[(b[0].left, b[0].right)]
            random_road = np.random.choice(road_to_extract)
            while random_road in rand_roads:
                random_road = np.random.choice(road_to_extract)
            rand_roads.append(random_road)
        rand_road_list.append(rand_roads)
      
    return top_road , rand_road_list  

In [None]:
#road_tbr, road_tbr_rand = road_to_remove(df_co2_len, 'abs', 15, rand=True, eps=0.05)

In [None]:
topk = 90

In [None]:
road_tbr, road_tbr_rand_list = road_to_remove_v2(df_co2_len, topk, road_len_dict, bins, n_rand=5)

In [None]:
print('Top roads: '+str(len(road_tbr)))
for i in range(len(road_tbr_rand_list)):
    print('Random roads '+str(i)+': '+str(len(road_tbr_rand_list[i])))

In [None]:
print('Top removed meters: '+str(df_co2_len[df_co2_len['road'].isin(road_tbr)]['edge_len'].sum()))
for i in range(len(road_tbr_rand_list)):
    print('Rand removed meters '+str(i)+': '+str(df_co2_len[df_co2_len['road'].isin(road_tbr_rand_list[i])]['edge_len'].sum()))

In [None]:
road_edge_map = pd.read_csv(path_road_edge_mapping)

In [None]:
# Create csv with roadname of the road to be removed and the list fo edges associated to each edge

road_edge_tbr = road_edge_map[road_edge_map['road'].isin(road_tbr)].groupby('road').agg({'edge_id': lambda x: x.tolist()})
road_edge_tbr.reset_index(inplace=True)

In [None]:
road_edge_tbr.to_csv('../data/simulations/Milano_big/top'+str(topk)+'/Milano_big_road_top'+str(topk)+'.csv', index=False)

In [None]:
for i in range(len(road_tbr_rand_list)):
    road_edge_tbr_rand = road_edge_map[road_edge_map['road'].isin(road_tbr_rand_list[i])].groupby('road').agg({'edge_id': lambda x: x.tolist()})
    road_edge_tbr_rand.reset_index(inplace=True)
    road_edge_tbr_rand.to_csv('../data/simulations/Milano_big/top'+str(topk)+'/rand/Milano_big_road_rand'+str(topk)+'_'+str(i)+'.csv', index=False)

#### Test removed roads

In [None]:
#road_network = sumolib.net.readNet(road_network_path, withInternal=True)

In [None]:
#road_edge_tbr_tmp = road_edge_map[road_edge_map['road'].isin(road_tbr)]#.groupby('road').agg({'edge_id': lambda x: x.tolist()})
#road_edge_tbr_tmp.reset_index(inplace=True)

In [None]:
#len(road_edge_tbr_tmp)

In [None]:
#m = folium.Map(location=[45.469262, 9.182007],
#               tiles='CartoDB Positron',
#               zoom_start=11,
#               attr='CartoDB')
               #png_enabled=True)

In [None]:
#for index, row in road_edge_tbr_tmp.iterrows():
#    if not row['edge_id'].startswith(':'):
#        edge = row['edge_id']
        # Compute lat and lon, from and to, for each edge
#        coord_node_from = road_network.getEdge(edge).getFromNode().getCoord()
#        coord_node_to = road_network.getEdge(edge).getToNode().getCoord()
#        lon_from, lat_from = road_network.convertXY2LonLat(coord_node_from[0], coord_node_from[1])
#        lon_to, lat_to = road_network.convertXY2LonLat(coord_node_to[0], coord_node_to[1])

#        folium.PolyLine([(lat_from, lon_from), (lat_to, lon_to)],
#                         tooltip=row['road']+' - '+row['edge_id'],
#                         color='red', weight=3).add_to(m)

In [None]:
#m

In [None]:
#road_edge_map[road_edge_map['edge_id'].str.startswith('1022350')]

In [None]:
#road_edge_map[road_edge_map['road'].str.startswith('Via Cartesio')]

In [None]:
#for idx, row in road_edge_map[road_edge_map['road'].str.startswith('Via Mario del')].iterrows():
#    road_edge_map.loc[road_edge_map['edge_id']==row['edge_id'], 'road'] = 'Via Mario Del Monaco'

In [None]:
#road_edge_map.loc[road_edge_map['edge_id']=='-62257426', 'road'] = 'Via Sempione, Rho'

In [None]:
#road_edge_map.to_csv(path_road_edge_mapping, index=False)