# Computation of K_road, K_source, betweenness centrality

This notebook contains the code to compute the K_road, K_source, and the betweenness centrality of the road network. It also contains an analysis of the correlations between all the metrics and the visualizations of different type of road in the road network with a clustering by K_road and betweenness.

In [None]:
import sumolib
import pandas as pd
import numpy as np
import json
import os
from ast import literal_eval
import networkx as nx
from networkx.algorithms import bipartite
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from matplotlib import colors as mcolors
import seaborn as sns
from distfit import distfit
from kneed import KneeLocator
import scipy
import osmnx as ox
from shapely.geometry import LineString, Polygon
import igraph as ig
from result_utils import *
import html
from skmob.tessellation import tilers
from skmob.utils import constants
import geopandas as gpd
import folium

#### parameters

In [None]:
city = 'Milano_big'
fold_prefix = 'baseline'

# road network path
road_network_path = "../data/road_net/"+city+"/"+city+"_road_network.net.xml"

# road-edge map
path_road_edge_mapping = '../data/road_net/'+city+'/'+city+'_road_edge_map.csv'

# tile-edge map path
dict_tile_edges_path = "../data/OD_matrices/"+city+"_tile_edges_h3_8.json"

# routed paths
routed_paths_folder = '../data/simulations/'+city+'/'+fold_prefix+'/routed_paths/'

# experiment results
folder_experiments = '../data/simulations/'+city+'/'+fold_prefix+'/sumo_out/'

# shapefile path
shapefile_path = '../data/shapes/'+city+'_shape.geojson'

# output paths
path_results = "../data/simulations/"+city+"/"+fold_prefix+"/results/"
path_plots = "../data/simulations/"+city+"/"+fold_prefix+"/plots/"

## 1. Compute MDS and K_road

Load tile_id - list of edges (without internal)

In [None]:
with open(dict_tile_edges_path, 'r') as f:
    dict_tile_edges = json.load(f)

In [None]:
dict_edge_tile = {}

for k, v in dict_tile_edges.items():
    for edge in v:
        dict_edge_tile[edge] = k

Compute dictionary MDS

In [None]:
# Compute dictionary with key=edge_id and value=dictionary(tile_id: #path starting from this tile).
# At each edge of the path is assigned the tile corresponding to the tile of the first edge (origin).
# This function takes a list of routed paths (from .rou.xml) and a dictionary edge-tile.
# You can also compute the driver_sinks, taking into account the destination of the paths. To do it set origin=False.

def compute_driver_sources(rou_path, edge_tile_dict, origin=True):
    routed_paths = list(sumolib.xml.parse(rou_path, 'route'))
    
    ds_dict = {}
    if origin:
        s = 0
    else:
        s = -1
    
    for path in routed_paths:
        edges = path.edges.split(' ')
        origin = edges[s]
        tile = edge_tile_dict[origin]
        for edge in edges:
            if edge in ds_dict:
                if tile in ds_dict[edge]:
                    ds_dict[edge][tile] += 1
                else:
                    ds_dict[edge][tile] = 1
                
            else:
                ds_dict[edge] = {}
                ds_dict[edge][tile] = 1
                
    return ds_dict

In [None]:
# This function returns the top traffic_threshold % of driver sources that produce the flow, for each edge.

def compute_MDS(ds_dict, traffic_threshold):
    mds_dict = {}
    
    for edge, ds in ds_dict.items():
        # driver sources sorted by flow
        ds_list = sorted(ds.items(), key=lambda x: x[1], reverse=True)
        ds_flow = sum(x[1] for x in ds_list)
        tmp_sum = 0
        i = 0
        mds_edge_dict = {}
        while tmp_sum <= ds_flow*traffic_threshold:
            mds_edge_dict[ds_list[i][0]] = ds_list[i][1]
            tmp_sum += ds_list[i][1]
            i += 1

        mds_dict[edge] = mds_edge_dict
        
    return mds_dict
        

In [None]:
# Returns the bipartite graph from the major driver sources dictionary

def mds2graph(mds):
    B = nx.Graph()
    for edgename, mds_dict in mds.items():
        B.add_node(edgename, bipartite=0)
        B.add_nodes_from(mds_dict.keys(), bipartite=1)
        B.add_edges_from([(edgename, tile) for tile in mds_dict.keys()])
        
    return B

In [None]:
#B = mds2graph(mds)
#nx.is_bipartite(B)

In [None]:
def plot_mds_graph(B):
    color_state_map = {0: 'tab:blue', 1: 'tab:red'}
    pos = nx.nx_agraph.graphviz_layout(B)
    nx.draw_networkx(B, pos,
                     with_labels=False,
                     node_size=1, 
                     node_color=[color_state_map[node[1]['bipartite']] for node in B.nodes(data=True)]
                    )

In [None]:
# 7 min
#%%time
#
#plot_mds_graph(B)

In [None]:
def compute_k_road(mds):
    k_road = {}
    for edge, mds_dict in mds.items():
        k_road[edge] = len(mds_dict)
        
    return k_road

In [None]:
def compute_k_source(mds):
    k_source = {}
    for edge, mds_dict in mds.items():
        for tile, flow in mds_dict.items():
            if tile in k_source:
                k_source[tile] += 1
            else:
                k_source[tile] = 1
        
    return k_source

In [None]:
# Take two dictionaries and append the value of the keys of the first one (dict_in) to the value of the keys of the second one (dict_out) 

def append_dictionaries(dict_in, dict_out):
    for k,v in dict_in.items():
        if k in dict_out:
            dict_out[k].append(v)
        else:
            dict_out[k] = [v]
    
    return dict_out

In [None]:
def compute_mean_k_road_and_source(rou_path_folder, edge_tile_dict, origin=True, threshold=0.8):
    k_road_mean = {}
    k_source_mean = {}
    
    for rou_file in os.listdir(rou_path_folder):
        ds = compute_driver_sources(rou_path_folder+rou_file, edge_tile_dict, origin=origin)
        mds = compute_MDS(ds, threshold)
        k_road = compute_k_road(mds)
        k_source = compute_k_source(mds)
        k_road_mean = append_dictionaries(k_road, k_road_mean)
        k_source_mean = append_dictionaries(k_source, k_source_mean)
    
    n_exp = len(os.listdir(rou_path_folder))
    for edge, k_road_list in k_road_mean.items():
        k_road_mean[edge] = np.sum(k_road_list)/n_exp
        
    for edge, k_source_list in k_source_mean.items():
        k_source_mean[edge] = np.sum(k_source_list)/n_exp
        
    return k_road_mean, k_source_mean

K_road origin

In [None]:
k_road_o, k_source_o = compute_mean_k_road_and_source(routed_paths_folder, dict_edge_tile, origin=True, threshold=0.8)

In [None]:
with open(path_results+'kroad_O_'+fold_prefix+'.json', 'w') as outf:
    json.dump(k_road_o, outf)
with open(path_results+'ksource_O_'+fold_prefix+'.json', 'w') as outf:
    json.dump(k_source_o, outf)

K_road destination

In [None]:
k_road_d, k_source_d = compute_mean_k_road_and_source(routed_paths_folder, dict_edge_tile, origin=False, threshold=0.8)

In [None]:
with open(path_results+'kroad_D_'+fold_prefix+'.json', 'w') as outf:
    json.dump(k_road_d, outf)
with open(path_results+'ksource_D_'+fold_prefix+'.json', 'w') as outf:
    json.dump(k_source_d, outf)

In [None]:
fig, axs = plt.subplots(3, 2, figsize=(10,6), gridspec_kw={"height_ratios":[0.01,1,1]})
fig.suptitle('Milano_big k_road - k_source histograms', fontweight='bold')

axs[1,0].hist(k_road_o.values(), bins=30)
axs[1,0].set_title('origin', fontsize=10)
axs[1,0].set_ylabel('#edges')

axs[2,0].hist(k_road_d.values(), bins=30)
axs[2,0].set_title('destination', fontsize=10)
axs[2,0].set_ylabel('#edges')
axs[2,0].set_xlabel('k_road')

axs[1,1].hist(k_source_o.values(), bins=30, color='tab:orange')
axs[1,1].set_title('origin', fontsize=10)
axs[1,1].set_ylabel('#edges')

axs[2,1].hist(k_source_d.values(), bins=30, color='tab:orange')
axs[2,1].set_title('destination', fontsize=10)
axs[2,1].set_ylabel('#edges')
axs[2,1].set_xlabel('k_source')

axs[0,0].axis('off')
axs[0,0].set_title('K_road')
axs[0,1].axis('off')
axs[0,1].set_title('K_source')

fig.subplots_adjust(hspace=0.4)
plt.savefig(path_plots+'k_road_hist.png', bbox_inches ="tight", dpi=150)
plt.show()

## 2. Aggregate K_road

Aggregation of the K_road at road level

In [None]:
road_edge_map = pd.read_csv(path_road_edge_mapping)

In [None]:
k_road_o_df = pd.DataFrame(columns=['edge_id', 'k_road_o'])
k_road_o_df['edge_id'] = k_road_o.keys()
k_road_o_df['k_road_o'] = k_road_o.values()

k_road_d_df = pd.DataFrame(columns=['edge_id', 'k_road_d'])
k_road_d_df['edge_id'] = k_road_d.keys()
k_road_d_df['k_road_d'] = k_road_d.values()

k_road_od_df = pd.merge(k_road_o_df, k_road_d_df, on='edge_id')

In [None]:
# k_road computed aggregating by road and with weighted average on length

k_road_df = pd.merge(k_road_od_df, road_edge_map, on='edge_id', how='left')
weighted_avg = lambda x: np.average(x, weights=k_road_df.loc[x.index, "edge_len"])
k_road_df = k_road_df.groupby(by=['road']).agg({'k_road_o': weighted_avg, 'k_road_d': weighted_avg}).reset_index()

In [None]:
k_source_o_df = pd.DataFrame(columns=['tile_id', 'k_source_o'])
k_source_o_df['tile_id'] = k_source_o.keys()
k_source_o_df['k_source_o'] = k_source_o.values()

k_source_d_df = pd.DataFrame(columns=['tile_id', 'k_source_d'])
k_source_d_df['tile_id'] = k_source_d.keys()
k_source_d_df['k_source_d'] = k_source_d.values()

k_source_df = pd.merge(k_source_o_df, k_source_d_df, on='tile_id')

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(15,4))
fig.suptitle('Milano_big k_road histograms', fontweight='bold')

axs[0].hist(k_road_df['k_road_o'], bins=40)
axs[0].set_title('k_road origin', fontsize=10)
axs[0].set_xticks(np.arange(0,48,2))
axs[0].set_xlabel('k_road')
axs[0].set_ylabel('#roads')

axs[1].hist(k_road_df['k_road_d'], bins=40)
axs[1].set_title('k_road destination', fontsize=10)
axs[1].set_xticks(np.arange(0,30,2))
axs[1].set_xlabel('k_road')
axs[1].set_ylabel('#roads')

plt.savefig(path_plots+'k_road_agg_hist.png', bbox_inches ="tight", dpi=150)
plt.show()

In [None]:
fig, axs = plt.subplots(3, 2, figsize=(15,6), gridspec_kw={"height_ratios":[0.02,1,1]})
fig.suptitle('Milano_big k_road - k_source histograms', fontweight='bold')

axs[1,0].hist(k_road_df['k_road_o'], bins=60)
axs[1,0].set_title('origin', fontsize=10)
axs[1,0].set_xticks(np.arange(0,50,2))
axs[1,0].set_ylabel('#roads')

axs[2,0].hist(k_road_df['k_road_d'], bins=60)
axs[2,0].set_title('destination', fontsize=10)
axs[2,0].set_xticks(np.arange(0,30,1))
axs[2,0].set_ylabel('#roads')
axs[2,0].set_xlabel('k_road')

axs[1,1].hist(k_source_df['k_source_o'], bins=80, color='tab:orange')
axs[1,1].set_title('origin', fontsize=10)
axs[1,1].set_xticks(np.arange(0,7600,500))
axs[1,1].tick_params(axis='x', labelsize=8)
axs[1,1].set_ylabel('#roads')

axs[2,1].hist(k_source_df['k_source_d'], bins=80, color='tab:orange')
axs[2,1].set_title('destination', fontsize=10)
axs[2,1].set_xticks(np.arange(0,7100,500))
axs[2,1].tick_params(axis='x', labelsize=8)
axs[2,1].set_ylabel('#roads')
axs[2,1].set_xlabel('k_source')


axs[0,0].axis('off')
axs[0,0].set_title('K_road')
axs[0,1].axis('off')
axs[0,1].set_title('K_source')


fig.subplots_adjust(hspace=0.4)
plt.savefig(path_plots+'k_road_hist2.png', bbox_inches ="tight", dpi=150)
plt.show()

Fit the distribution of K_road

In [None]:
dfit = distfit(distr='popular')
results = dfit.fit_transform(k_road_df['k_road_o'], verbose=0)

In [None]:
dfit.plot(n_top=1, figsize=(9,3), fontsize=10,
          pdf_properties={'color': 'tab:orange', 'linewidth': 2},
          emp_properties=None,
          cii_properties={'linewidth': 1})
#plt.axvline(np.percentile(k_road_df['k_road_o'], 75))
plt.savefig(path_plots+'k_road_o_dist.png', bbox_inches ="tight", dpi=150)

In [None]:
dfit = distfit(distr='popular')
results = dfit.fit_transform(k_road_df['k_road_d'], verbose=0)

In [None]:
dfit.plot(n_top=1, figsize=(9,3), fontsize=10,
          pdf_properties={'color': 'tab:orange', 'linewidth': 2},
          emp_properties=None,
          cii_properties={'linewidth': 1})
#plt.axvline(np.percentile(k_road_df['k_road_d'], 75))
plt.savefig(path_plots+'k_road_d_dist.png', bbox_inches ="tight", dpi=150)

## 3. Betweenness centrality

Compute the betweenness centrality and aggregate it at road level

In [None]:
road_network = sumolib.net.readNet(road_network_path, withInternal=False)

In [None]:
def from_sumo_to_igraph_network(road_network):
    
    """
    Converts a SUMO road network to an igraph network.

    Parameters:
    -----------
    road_network : SUMO road network
        A SUMO road network object.

    Returns:
    --------
    G : igraph graph
        An igraph graph representing the road network.
    """
    
    
    nodes_dict = {}
    edges_dict = {}
    connections_list = []
    conn_attr = {"id":[], "length":[], "speed_limit":[], "traveltime":[]}
    
    for node in road_network.getNodes():
        in_edges = [edge for edge in list(node.getIncoming())]
        out_edges = [edge for edge in list(node.getOutgoing())]
        
        # compute length connection
        unique_connections = set()

        for c in node.getConnections():
            p = (c.getFrom().getID(), c.getTo().getID())
            unique_connections.add(p)
        
        # Fully connected nodes
        if len(in_edges)*len(out_edges) == len(unique_connections):
            nodes_dict[node.getID()] = {"in": in_edges, "out": out_edges, "fc": 1}
            
            for e in in_edges:
                edge = e.getID()
                if edge in edges_dict:
                    edges_dict[edge]["to"] = node.getID()
                else:
                    edges_dict[edge] = {"to": node.getID()}
                    edges_dict[edge]["id"] = edge
                    edges_dict[edge]["length"] = e.getLength()
                    edges_dict[edge]["speed_limit"] = e.getSpeed()
                    edges_dict[edge]["traveltime"] = e.getLength()/e.getSpeed()
                    
            for e in out_edges:
                edge = e.getID()
                if edge in edges_dict:
                    edges_dict[edge]["from"] = node.getID()
                else:
                    edges_dict[edge] = {"from": node.getID()}
                    edges_dict[edge]["id"] = edge
                    edges_dict[edge]["length"] = e.getLength()
                    edges_dict[edge]["speed_limit"] = e.getSpeed()
                    edges_dict[edge]["traveltime"] = e.getLength()/e.getSpeed()
        # Nodes with connections
        else:
            # add new connection nodes
            for e in in_edges:
                edge = e.getID()
                node_id = edge+"_to"
                nodes_dict[node_id] = {"in": [edge], "fc": 0}
                if edge in edges_dict:
                    edges_dict[edge]["to"] = node_id
                else:
                    edges_dict[edge] = {"to": node_id}
                    edges_dict[edge]["id"] = edge
                    edges_dict[edge]["length"] = e.getLength()
                    edges_dict[edge]["speed_limit"] = e.getSpeed()
                    edges_dict[edge]["traveltime"] = e.getLength()/e.getSpeed()
            for e in out_edges:
                edge = e.getID()
                node_id = edge+"_from"
                nodes_dict[node_id] = {"out": [edge], "fc": 0}
                if edge in edges_dict:
                    edges_dict[edge]["from"] = node_id
                else:
                    edges_dict[edge] = {"from": node_id}
                    edges_dict[edge]["id"] = edge
                    edges_dict[edge]["length"] = e.getLength()
                    edges_dict[edge]["speed_limit"] = e.getSpeed()
                    edges_dict[edge]["traveltime"] = e.getLength()/e.getSpeed()
                    
            for conn in node.getConnections():
                from_edge = conn.getFrom().getID()
                to_edge = conn.getTo().getID()

                connections_list.append([from_edge+"_to", to_edge+"_from"])
                conn_attr["id"].append("connection")
                conn_attr["length"].append(1e-10)
                conn_attr["speed_limit"].append(-1)
                conn_attr["traveltime"].append(0)
                
    edges_list = []
    edges_attr = {"id":[], "length":[], "speed_limit":[], "traveltime":[]}
    
    for edge in edges_dict.keys():
        edges_list.append((edges_dict[edge]["from"], edges_dict[edge]["to"]))
        edges_attr["id"].append(edge)
        edges_attr["length"].append(edges_dict[edge]["length"])
        edges_attr["speed_limit"].append(edges_dict[edge]["speed_limit"])
        edges_attr["traveltime"].append(edges_dict[edge]["traveltime"])
        
    G_igraph_new = ig.Graph(directed=True)
    G_igraph_new.add_vertices(list(nodes_dict.keys()))
    G_igraph_new.add_edges(edges_list, edges_attr)
    G_igraph_new.add_edges(connections_list, conn_attr)
    
    G_igraph_new["edge_sumo_ig"] = {e["id"]: e.index for e in G_igraph_new.es}
        
    return G_igraph_new

In [None]:
#G_igraph_conn = from_sumo_to_igraph_network_v2(road_network)

In [None]:
#G_igraph_conn.ecount()

In [None]:
#%%time
# 1h15min 146k edges
# 3min 60k edges
#bc_conn = G_igraph_conn.edge_betweenness(weights='length')

In [None]:
#edge_bc_igraph_conn = {}

#for p in list(zip(G_igraph_conn.es['id'], bc_conn)):
#    if p[0] != 'connection':
#        edge_bc_igraph_conn[p[0]] = p[1]

In [None]:
#len(edge_bc_igraph_conn)

In [None]:
#with open(path_results+'bc_igraph_'+fold_prefix+'.json', 'w') as outf:
#    json.dump(edge_bc_igraph_conn, outf)

In [None]:
#node_sample = np.random.choice(G.nodes, 20000)
#G_sample = G.subgraph(node_sample)

In [None]:
#%%time
#pos = nx.nx_agraph.graphviz_layout(G_sample)
#nx.draw_networkx(G_sample, pos,
#                 with_labels=False,
#                 node_size=1)

In [None]:
#%%time
# 10min 10k edges
# 1h35min 46k edges
#bc = nx.edge_betweenness_centrality(G, weight='length')

In [None]:
#edge_bc_map = {}
#for k,v in bc.items():
    # k[2] == edge_id
#    edge_bc_map[k[2]] = v

In [None]:
#with open(path_results+'bc_'+fold_prefix+'.json', 'w') as outf:
#    json.dump(edge_bc_map, outf)

In [None]:
#G = nx.MultiDiGraph()

#for edge in road_network.getEdges():
#    node_from = edge.getFromNode().getID()
#    node_to = edge.getToNode().getID()
#    geom = [list(x) for x in edge.getShape()]
#    G.add_edge(node_from, node_to, key=edge.getID(), length=edge.getLength(), geometry=LineString(geom))

#print(G.number_of_edges())

In [None]:
#G_igraph = ig.Graph.from_networkx(G)
#print(G_igraph.ecount())

In [None]:
#%%time
# 2min 46k edges
#bc = G_igraph.edge_betweenness(weights='length')

In [None]:
#edge_bc_igraph = {}

#for p in list(zip(G_igraph.es['_nx_multiedge_key'], bc)):
#    edge_bc_igraph[p[0]] = p[1]
    
#print(len(edge_bc_igraph))

In [None]:
#with open(path_results+'bc_'+fold_prefix+'.json', 'r') as f:
#    edge_bc_map = json.load(f)

In [None]:
with open(path_results+'bc_igraph_'+fold_prefix+'.json', 'r') as f:
    edge_bc_map = json.load(f)

In [None]:
# Normalize bc
val = list(edge_bc_map.values())
max_val = np.max(val)
min_val = np.min(val)
for k,v in edge_bc_map.items():
    edge_bc_map[k] = (v - min_val)/(max_val - min_val)

In [None]:
#diff_bc = {}
#for k,v in edge_bc_map_igraph.items():
#    diff_bc[k] = v - edge_bc_igraph[k]

In [None]:
#plt.bar(np.arange(len(diff_bc)), diff_bc.values())
#plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(8,3))

ax.hist(np.array(list(edge_bc_map.values())), bins=100)
ax.set_title('Milano_big betweenness histogram')
ax.set_xticks(np.arange(0, 1.1, 0.1))
#ax.tick_params(axis='x', labelsize=9)
ax.set_xlabel('bc')
ax.set_ylabel('#edges')

plt.savefig(path_plots+'bc_hist.png', bbox_inches ="tight", dpi=150)
plt.show()

In [None]:
bc_edge_df = pd.DataFrame()
bc_edge_df['edge_id'] = edge_bc_map.keys()
bc_edge_df['bc'] = edge_bc_map.values()

In [None]:
bc_df = pd.merge(bc_edge_df, road_edge_map, on='edge_id', how='left')
weighted_avg = lambda x: np.average(x, weights=bc_df.loc[x.index, "edge_len"])
bc_df = bc_df.groupby(by=['road']).agg({'bc': weighted_avg}).reset_index()

In [None]:
bc_df.head()

In [None]:
fig, ax = plt.subplots(figsize=(8,3))

ax.hist(bc_df['bc'], bins=100)
ax.set_title('Milano_big betweenness histogram')
ax.set_xticks(np.arange(0, 1.01, 0.1))
#ax.tick_params(axis='x', labelsize=9)
ax.set_xlabel('bc')
ax.set_ylabel('#road')

plt.savefig(path_plots+'bc_hist2.png', bbox_inches ="tight", dpi=150)
plt.show()

Fit the distribution of betweenness centrality

In [None]:
dfit = distfit(distr='popular')
results = dfit.fit_transform(bc_df['bc'], verbose=0)

In [None]:
dfit.plot(n_top=1, figsize=(9,3), fontsize=10,
          pdf_properties={'color': 'tab:orange', 'linewidth': 1},
          emp_properties=None,
          cii_properties={'linewidth': 1})
#plt.axvline(np.percentile(bc_df['bc'], 65))
plt.savefig(path_plots+'bc_dist.png', bbox_inches ="tight", dpi=150)

In [None]:
ln = scipy.stats.genextreme(results['model']['params'][0], loc=results['model']['loc'], scale=results['model']['scale'])

In [None]:
x = np.linspace(0.0002, 0.7, 5000)
#plt.plot(x, ln.pdf(x))
kneedle = KneeLocator(x, ln.pdf(x), curve='convex', direction='decreasing')
kneedle.plot_knee()

In [None]:
del bc_edge_df, dfit, edge_bc_map, kneedle, ln, results, road_edge_map, val, x
del k_road_d, k_road_d_df, k_road_o, k_road_o_df, k_road_od_df
del k_source_d, k_source_d_df, k_source_o, k_source_o_df

## 4. Correlations

Plot the correlation between K_road, betweenness centrality, CO2 emissions

In [None]:
corr_df = pd.merge(bc_df, k_road_df, on='road')

In [None]:
corr_df.head()

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(10,10))
fig.suptitle('Milano_big k_road - betwenness correlation', fontweight='bold', y=0.93)

axs[0].scatter(corr_df['k_road_o'], corr_df['bc'], s=1)
axs[0].set_title('k_road origin', fontsize=10)
axs[0].set_xticks(np.arange(0,48,2))
axs[0].set_yscale('log')
axs[0].set_xlabel('k_road')
axs[0].set_ylabel('bc')

axs[1].scatter(corr_df['k_road_d'], corr_df['bc'], s=1)
axs[1].set_title('k_road destination', fontsize=10)
axs[1].set_xticks(np.arange(0,30,2))
axs[1].set_yscale('log')
axs[1].set_xlabel('k_road')
axs[1].set_ylabel('bc')

plt.savefig(path_plots+'k_road_bc_corr.png', bbox_inches ="tight", dpi=150)
plt.show()

Compute the categories of roads based on K_road and betweenness centrality\

In [None]:
colors = []
k_road_high = np.percentile(corr_df['k_road_o'], 75)
bc_high = np.percentile(corr_df['bc'], 75)

for ids, row in corr_df.iterrows():
    if row['k_road_o'] >= k_road_high and row['bc'] >= bc_high:
        colors.append('red')
    elif row['k_road_o'] < k_road_high and row['bc'] >= bc_high:
        colors.append('green')
    elif row['k_road_o'] >= k_road_high and row['bc'] < bc_high:
        colors.append('orange')
    else:
        colors.append('grey')
        
corr_df['color_o'] = colors

In [None]:
colors = []
k_road_high = np.percentile(corr_df['k_road_d'], 75)
bc_high = np.percentile(corr_df['bc'], 75)

for ids, row in corr_df.iterrows():
    if row['k_road_d'] >= k_road_high and row['bc'] >= bc_high:
        colors.append('red')
    elif row['k_road_d'] < k_road_high and row['bc'] >= bc_high:
        colors.append('green')
    elif row['k_road_d'] >= k_road_high and row['bc'] < bc_high:
        colors.append('orange')
    else:
        colors.append('grey')
        
corr_df['color_d'] = colors

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(10,10))
fig.suptitle('Milano_big k_road - betwenness correlation', fontweight='bold', y=0.93)

for i, dff in corr_df.groupby('color_o'):
    match i:
        case 'red':
            label = 'connectors'
        case 'orange':
            label = 'attractors'
        case 'green':
            label = 'peripherals'
        case 'grey':
            label = 'locals'
        case _:
            label = ''
    axs[0].scatter(dff['k_road_o'], dff['bc'], s=1, c=dff['color_o'], label=label)
axs[0].set_title('k_road origin', fontsize=10)
axs[0].set_xticks(np.arange(0,48,2))
axs[0].set_yscale('log')
axs[0].set_xlabel('k_road')
axs[0].set_ylabel('bc')
axs[0].legend(loc='lower right', markerscale=5)

for i, dff in corr_df.groupby('color_d'):
    match i:
        case 'red':
            label = 'connectors'
        case 'orange':
            label = 'attractors'
        case 'green':
            label = 'peripherals'
        case 'grey':
            label = 'locals'
        case _:
            label = ''
    axs[1].scatter(dff['k_road_d'], dff['bc'], s=1, c=dff['color_d'], label=label)
axs[1].set_title('k_road destination', fontsize=10)
axs[1].set_xticks(np.arange(0,30,2))
axs[1].set_yscale('log')
axs[1].set_xlabel('k_road')
axs[1].set_ylabel('bc')
axs[1].legend(loc='lower right', markerscale=5)

plt.savefig(path_plots+'k_road_bc_corr.png', bbox_inches ="tight", dpi=150)
plt.show()

In [None]:
del bc_df, colors, dff, k_road_df, k_source_df

### 4.1 Correlation with CO2

In [None]:
road_edge_map = pd.read_csv(path_road_edge_mapping)

In [None]:
dict_exps = create_dict_exps(folder_experiments, 'baseline')

In [None]:
# Aggregate all experiments results and compute the mean and the std of the 'total_of' column.
# It returns a dictionary with keys = roadnames and list [mean, std].

def create_dict_total_per_road(dict_exps, folder_experiments, main_experiment_name, total_of):
    dict_total = {}
    for exp_id, exp_folder_name in dict_exps[main_experiment_name].items():
        exp_df = pd.read_csv(folder_experiments+exp_folder_name+"/road_measures.csv")
        
        for ind, row in exp_df.iterrows():
            if row['road'] in dict_total:
                dict_total[row['road']].append(row[total_of])
            else:
                dict_total[row['road']] = [row[total_of]]
    
    list_df = []
    for road, total in dict_total.items():
        list_df.append([road, np.array(total).mean(), np.array(total).std()])
    df = pd.DataFrame(list_df, columns=['road', 'mean', 'std'])
        
    return df

In [None]:
df_total_co2_road = create_dict_total_per_road(dict_exps, folder_experiments, 'baseline', 'total_co2')

In [None]:
df_co2_len = pd.merge(road_edge_map.groupby('road')['edge_len'].sum(), df_total_co2_road, on=['road'])
df_co2_len['mean_len'] = df_co2_len['mean']/df_co2_len['edge_len']
df_co2_len['std_len'] = df_co2_len['std']/df_co2_len['edge_len']
df_co2_len.sort_values(by=['mean_len', 'std_len'], ascending=False, inplace=True)

In [None]:
corr_co2 = pd.merge(df_co2_len, corr_df, on=['road'])
corr_co2.sort_values(by=['mean_len', 'std_len'], ascending=False, inplace=True)

In [None]:
corr_co2.head()

In [None]:
#corr_co2[corr_co2['road'].str.startswith('A')]

In [None]:
s = len(corr_co2['color_o'])
color_dict_o = {'red': [0]*s, 'orange': [0]*s, 'green': [0]*s, 'grey': [0]*s}

for i,c in enumerate(corr_co2['color_o']):
    if i==0:
        for k,v in color_dict_o.items():
            if k==c:
                color_dict_o[k][i] = 1
            else:
                color_dict_o[k][i] = 0
    else:
        for k,v in color_dict_o.items():
            if k==c:
                color_dict_o[k][i] = color_dict_o[k][i-1]+1
            else:
                color_dict_o[k][i] = color_dict_o[k][i-1]

In [None]:
s = len(corr_co2['color_d'])
color_dict_d = {'red': [0]*s, 'orange': [0]*s, 'green': [0]*s, 'grey': [0]*s}

for i,c in enumerate(corr_co2['color_d']):
    if i==0:
        for k,v in color_dict_d.items():
            if k==c:
                color_dict_d[k][i] = 1
            else:
                color_dict_d[k][i] = 0
    else:
        for k,v in color_dict_d.items():
            if k==c:
                color_dict_d[k][i] = color_dict_d[k][i-1]+1
            else:
                color_dict_d[k][i] = color_dict_d[k][i-1]

Compute the cumulative sum of roads by type for CO2 emissions

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(12,4))
fig.suptitle('Cumulative sum of roads for each type ordered by CO2', fontweight='bold')

axs[0].plot(np.arange(s), color_dict_o['red'], c='red', linewidth=3, label='connectors')
axs[0].plot(np.arange(s), color_dict_o['orange'], c='orange', linewidth=3, label='attractors')
axs[0].plot(np.arange(s), color_dict_o['green'], c='green', linewidth=3, label='peripherals')
axs[0].plot(np.arange(s), color_dict_o['grey'], c='grey', linewidth=3, label='locals')
axs[0].set_title('origin')
axs[0].set_xlabel('roads ordered by CO2/m')
axs[0].set_ylabel('#roads of each type')
axs[0].grid()
axs[0].legend(loc='upper left')

axs[1].plot(np.arange(s), color_dict_d['red'], c='red', linewidth=3, label='connectors')
axs[1].plot(np.arange(s), color_dict_d['orange'], c='orange', linewidth=3, label='attractors')
axs[1].plot(np.arange(s), color_dict_d['green'], c='green', linewidth=3, label='peripherals')
axs[1].plot(np.arange(s), color_dict_d['grey'], c='grey', linewidth=3, label='locals')
axs[1].set_title('destination')
axs[1].set_xlabel('roads ordered by CO2/m')
axs[1].set_ylabel('#roads of each type')
axs[1].grid()
axs[1].legend(loc='upper left')

plt.savefig(path_plots+'cumsum_corr_co2.png', bbox_inches ="tight", dpi=150)
plt.show()

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(10,6))
fig.suptitle('Top 100 CO2/m roads with type of road', fontweight='bold')

for i, row in corr_co2.iloc[:100].iterrows():
    match row['color_o']:
        case 'red':
            label_o = 'connectors'
        case 'orange':
            label_o = 'attractors'
        case 'green':
            label_o = 'peripherals'
        case 'grey':
            label_o = 'locals'
        case _:
            label_o = ''
    match row['color_d']:
        case 'red':
            label_d = 'connectors'
        case 'orange':
            label_d = 'attractors'
        case 'green':
            label_d = 'peripherals'
        case 'grey':
            label_d = 'locals'
        case _:
            label_d = ''
    axs[0].bar(html.unescape(row['road']), row['mean_len'], yerr=row['std_len'], color=row['color_o'], label=label_o)
    axs[1].bar(html.unescape(row['road']), row['mean_len'], yerr=row['std_len'], color=row['color_d'], label=label_d)

axs[0].set_title('origin')
axs[0].set_xlabel('road')
axs[0].set_ylabel('CO2/length (mg/m)')
axs[0].set_xticks([])
handles, labels = axs[0].get_legend_handles_labels()
by_label = dict(zip(labels, handles))
axs[0].legend(by_label.values(), by_label.keys(), loc='upper right')

axs[1].set_title('destination')
axs[1].set_xlabel('roadname')
axs[1].set_ylabel('CO2/length (mg/m)')
axs[1].set_xticklabels(axs[1].get_xticklabels(), rotation='vertical', fontsize=5)
handles, labels = axs[1].get_legend_handles_labels()
by_label = dict(zip(labels, handles))
axs[1].legend(by_label.values(), by_label.keys(), loc='upper right')


plt.savefig(path_plots+'CO2_m_corr.png', bbox_inches ="tight")
plt.show()

In [None]:
stacked_df_o = pd.DataFrame(columns=['exp', 'red', 'orange', 'green', 'grey'])
for i in np.arange(corr_co2.shape[0]):
    color_counter = corr_co2.iloc[:i].groupby('color_o')['road'].count()
    row = {'exp': i, 'red': 0, 'orange': 0, 'green': 0, 'grey': 0}
    for idx,val in color_counter.items():
        row[idx] = val
    stacked_df_o = stacked_df_o.append(row, ignore_index=True)
    
stacked_df_o = stacked_df_o.astype(float)

In [None]:
stacked_df_d = pd.DataFrame(columns=['exp', 'red', 'orange', 'green', 'grey'])
for i in np.arange(corr_co2.shape[0]):
    color_counter = corr_co2.iloc[:i].groupby('color_d')['road'].count()
    row = {'exp': i, 'red': 0, 'orange': 0, 'green': 0, 'grey': 0}
    for idx,val in color_counter.items():
        row[idx] = val
    stacked_df_d = stacked_df_d.append(row, ignore_index=True)
    
stacked_df_d = stacked_df_d.astype(float)

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(12,4))
fig.suptitle('Cumulative sum of roads for each type ordered by CO2', fontweight='bold')

axs[0].stackplot(stacked_df_o['exp'], stacked_df_o['red'], stacked_df_o['orange'], stacked_df_o['green'], stacked_df_o['grey'], 
              colors=['red', 'orange', 'green', 'grey'], 
              labels=['connectors','attractors','peripheral', 'locals'])
axs[0].set_title('origin')
axs[0].set_xlabel('roads ordered by CO2/m')
axs[0].set_ylabel('cumulative sum of roads')
axs[0].grid(alpha=0.4)
axs[0].legend(loc='upper left')

axs[1].stackplot(stacked_df_d['exp'], stacked_df_d['red'], stacked_df_d['orange'], stacked_df_d['green'], stacked_df_d['grey'], 
              colors=['red', 'orange', 'green', 'grey'], 
              labels=['connectors','attractors','peripheral', 'locals'])
axs[1].set_title('destination')
axs[1].set_xlabel('roads ordered by CO2/m')
axs[1].set_ylabel('cumulative sum of roads')
axs[1].grid(alpha=0.4)
axs[1].legend(loc='upper left')

plt.savefig(path_plots+'cumsum2_corr_co2.png', bbox_inches ="tight", dpi=150)
plt.show()

In [None]:
stacked_df2_o = pd.DataFrame(columns=['exp', 'red', 'orange', 'green', 'grey'])
for i in [1,10,20,30,40,50,60,70,80,90,100]:
    color_counter = corr_co2.iloc[:i].groupby('color_o')['road'].count()
    row = {'exp': i, 'red': 0, 'orange': 0, 'green': 0, 'grey': 0}
    for idx,val in color_counter.items():
        row[idx] = val
    stacked_df2_o = stacked_df2_o.append(row, ignore_index=True)

stacked_df2_d = pd.DataFrame(columns=['exp', 'red', 'orange', 'green', 'grey'])
for i in [1,10,20,30,40,50,60,70,80,90,100]:
    color_counter = corr_co2.iloc[:i].groupby('color_d')['road'].count()
    row = {'exp': i, 'red': 0, 'orange': 0, 'green': 0, 'grey': 0}
    for idx,val in color_counter.items():
        row[idx] = val
    stacked_df2_d = stacked_df2_d.append(row, ignore_index=True)

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(12,4))
fig.suptitle('Road type for each topK experiment', fontweight='bold')

p = axs[0].bar(stacked_df2_o['exp'], stacked_df2_o['red'], 5, color='red', label='connectors')
axs[0].bar_label(p, label_type='center', fontsize=8)
p = axs[0].bar(stacked_df2_o['exp'], stacked_df2_o['orange'], 5, bottom=stacked_df2_o['red'], color='orange', label='attractors')
axs[0].bar_label(p, label_type='center', fontsize=8)
p = axs[0].bar(stacked_df2_o['exp'], stacked_df2_o['green'], 5, bottom=stacked_df2_o['red']+stacked_df2_o['orange'], color='green', label='peripherals')
axs[0].bar_label(p, label_type='center', fontsize=8)
p = axs[0].bar(stacked_df2_o['exp'], stacked_df2_o['grey'], 5, bottom=stacked_df2_o['red']+stacked_df2_o['orange']+stacked_df2_o['green'], color='grey', label='locals')
axs[0].bar_label(p, label_type='center', fontsize=8)

axs[0].set_title('origin')
axs[0].set_xlabel('topK experiment')
axs[0].set_ylabel('#removed roads')
axs[0].set_xticks(np.concatenate(([1],np.arange(10,101,10))))
axs[0].set_yticks(np.concatenate(([1],np.arange(10,101,10))))
axs[0].legend(loc='upper left')

p = axs[1].bar(stacked_df2_d['exp'], stacked_df2_d['red'], 5, color='red', label='connectors')
axs[1].bar_label(p, label_type='center', fontsize=8)
p = axs[1].bar(stacked_df2_d['exp'], stacked_df2_d['orange'], 5, bottom=stacked_df2_d['red'], color='orange', label='attractors')
axs[1].bar_label(p, label_type='center', fontsize=8)
#p = axs[1].bar(stacked_df2_d['exp'], stacked_df2_d['green'], 5, bottom=stacked_df2_d['red']+stacked_df2_d['orange'], color='green', label='peripherals')
#axs[1].bar_label(p, label_type='center', fontsize=8)
p = axs[1].bar(stacked_df2_d['exp'], stacked_df2_d['grey'], 5, bottom=stacked_df2_d['red']+stacked_df2_d['orange']+stacked_df2_d['green'], color='grey', label='locals')
axs[1].bar_label(p, label_type='center', fontsize=8)

axs[1].set_title('destination')
axs[1].set_xlabel('topK experiment')
axs[1].set_ylabel('#removed roads')
axs[1].set_xticks(np.concatenate(([1],np.arange(10,101,10))))
axs[1].set_yticks(np.concatenate(([1],np.arange(10,101,10))))
axs[1].legend(loc='upper left')

plt.savefig(path_plots+'topk_exp_corr.png', bbox_inches ="tight", dpi=150)
plt.show()

Computation of Spearman correlation between the metrics

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(12,4))
fig.suptitle('K_road - CO2 correlation', fontweight='bold')

axs[0].scatter(corr_co2['k_road_o'], corr_co2['mean_len'], s=1)
axs[0].set_title('origin', fontsize=10)
axs[0].set_xscale('log')
axs[0].set_yscale('log')
axs[0].set_xlabel('k_road')
axs[0].set_ylabel('co2/m')

axs[1].scatter(corr_co2['k_road_d'], corr_co2['mean_len'], s=1)
axs[1].set_title('destination', fontsize=10)
axs[1].set_xscale('log')
axs[1].set_yscale('log')
axs[1].set_xlabel('k_road')
axs[1].set_ylabel('co2/m')

plt.savefig(path_plots+'k_road_co2_corr.png', bbox_inches ="tight", dpi=150)
plt.show()

In [None]:
print('K_road_o - CO2 Spearman corr: '+str(scipy.stats.spearmanr(corr_co2['k_road_o'], corr_co2['mean_len']).correlation))
print('K_road_d - CO2 Spearman corr: '+str(scipy.stats.spearmanr(corr_co2['k_road_d'], corr_co2['mean_len']).correlation))

In [None]:
fig, ax = plt.subplots(figsize=(6,4))

ax.scatter(corr_co2['bc'], corr_co2['mean_len'], s=1)
ax.set_title('Betweenness centrality - CO2 correlation', fontsize=10)
ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel('bc')
ax.set_ylabel('co2/m')

plt.savefig(path_plots+'co2_bc_corr.png', bbox_inches ="tight", dpi=150)
plt.show()

In [None]:
print('Betweenness - CO2 Spearman corr: '+str(scipy.stats.spearmanr(corr_co2['bc'], corr_co2['mean_len']).correlation))

In [None]:
percentiles = [10, 20, 30, 40, 50, 60, 70, 80, 90]
corr_dict = {}
for p_kroad in percentiles:
    for p_bc in percentiles:
        k_road_high = np.percentile(corr_co2['k_road_o'], p_kroad)
        bc_high = np.percentile(corr_co2['bc'], p_bc)

        # connectors
        tmp = corr_co2[(corr_co2['k_road_o'] >= k_road_high) & (corr_co2['bc'] >= bc_high)]
        sperman_corr = scipy.stats.spearmanr(tmp['k_road_o'], tmp['mean_len']).correlation
        corr_dict[(p_kroad, p_bc)] = [sperman_corr]

        # attractors
        tmp = corr_co2[(corr_co2['k_road_o'] < k_road_high) & (corr_co2['bc'] >= bc_high)]
        sperman_corr = scipy.stats.spearmanr(tmp['k_road_o'], tmp['mean_len']).correlation
        corr_dict[(p_kroad, p_bc)].append(sperman_corr)

        # peripherals
        tmp = corr_co2[(corr_co2['k_road_o'] >= k_road_high) & (corr_co2['bc'] < bc_high)]
        sperman_corr = scipy.stats.spearmanr(tmp['k_road_o'], tmp['mean_len']).correlation
        corr_dict[(p_kroad, p_bc)].append(sperman_corr)

        # locals
        tmp = corr_co2[(corr_co2['k_road_o'] < k_road_high) & (corr_co2['bc'] < bc_high)]
        sperman_corr = scipy.stats.spearmanr(tmp['k_road_o'], tmp['mean_len']).correlation
        corr_dict[(p_kroad, p_bc)].append(sperman_corr)

In [None]:
df_corr = pd.DataFrame(0, index=percentiles[::-1], columns=percentiles)

fig, axs = plt.subplots(2, 2, figsize=(12,10))
fig.suptitle('Spearman correlation k_road_o - CO2 varying percentiles', fontweight='bold', y=0.94)

# Connectors
for k,v in corr_dict.items():
    df_corr.loc[k[0], k[1]] = v[0]
sns.heatmap(df_corr, annot=True, annot_kws={"size": 7}, cmap='Reds', ax=axs[0,0])
axs[0,0].set_title('Connectors', fontsize=10)
axs[0,0].set_xlabel('Betweenness percentile')
axs[0,0].set_ylabel('K_road_o percentile')

# Attractors
for k,v in corr_dict.items():
    df_corr.loc[k[0], k[1]] = v[1]
sns.heatmap(df_corr, annot=True, annot_kws={"size": 7}, cmap='Oranges', ax=axs[0,1])
axs[0,1].set_title('Attractors', fontsize=10)
axs[0,1].set_xlabel('Betweenness percentile')
axs[0,1].set_ylabel('K_road_o percentile')


# Peripherals
for k,v in corr_dict.items():
    df_corr.loc[k[0], k[1]] = v[2]
sns.heatmap(df_corr, annot=True, annot_kws={"size": 7}, cmap='Greens', ax=axs[1,0])
axs[1,0].set_title('Locals', fontsize=10)
axs[1,0].set_xlabel('Betweenness percentile')
axs[1,0].set_ylabel('K_road_o percentile')

# Locals
for k,v in corr_dict.items():
    df_corr.loc[k[0], k[1]] = v[3]
sns.heatmap(df_corr, annot=True, annot_kws={"size": 7}, cmap='Greys', ax=axs[1,1])
axs[1,1].set_title('Locals', fontsize=10)
axs[1,1].set_xlabel('Betweenness percentile')
axs[1,1].set_ylabel('K_road_o percentile')


plt.savefig(path_plots+'k_road_o_spearman_all.png', bbox_inches ="tight", dpi=150)
plt.show()

In [None]:
percentiles = [10, 20, 30, 40, 50, 60, 70, 80, 90]
corr_dict = {}
for p_kroad in percentiles:
    for p_bc in percentiles:
        k_road_high = np.percentile(corr_co2['k_road_o'], p_kroad)
        bc_high = np.percentile(corr_co2['bc'], p_bc)

        # connectors
        tmp = corr_co2[(corr_co2['k_road_o'] >= k_road_high) & (corr_co2['bc'] >= bc_high)]
        sperman_corr = scipy.stats.spearmanr(tmp['bc'], tmp['mean_len']).correlation
        corr_dict[(p_kroad, p_bc)] = [sperman_corr]

        # attractors
        tmp = corr_co2[(corr_co2['k_road_o'] < k_road_high) & (corr_co2['bc'] >= bc_high)]
        sperman_corr = scipy.stats.spearmanr(tmp['bc'], tmp['mean_len']).correlation
        corr_dict[(p_kroad, p_bc)].append(sperman_corr)

        # peripherals
        tmp = corr_co2[(corr_co2['k_road_o'] >= k_road_high) & (corr_co2['bc'] < bc_high)]
        sperman_corr = scipy.stats.spearmanr(tmp['bc'], tmp['mean_len']).correlation
        corr_dict[(p_kroad, p_bc)].append(sperman_corr)

        # locals
        tmp = corr_co2[(corr_co2['k_road_o'] < k_road_high) & (corr_co2['bc'] < bc_high)]
        sperman_corr = scipy.stats.spearmanr(tmp['bc'], tmp['mean_len']).correlation
        corr_dict[(p_kroad, p_bc)].append(sperman_corr)

In [None]:
df_corr = pd.DataFrame(0, index=percentiles[::-1], columns=percentiles)

fig, axs = plt.subplots(2, 2, figsize=(12,10))
fig.suptitle('Spearman correlation betweenness - CO2 varying percentiles', fontweight='bold', y=0.94)

# Connectors
for k,v in corr_dict.items():
    df_corr.loc[k[0], k[1]] = v[0]
sns.heatmap(df_corr, annot=True, annot_kws={"size": 7}, cmap='Reds', ax=axs[0,0])
axs[0,0].set_title('Connectors', fontsize=10)
axs[0,0].set_xlabel('Betweenness percentile')
axs[0,0].set_ylabel('K_road_o percentile')

# Attractors
for k,v in corr_dict.items():
    df_corr.loc[k[0], k[1]] = v[1]
sns.heatmap(df_corr, annot=True, annot_kws={"size": 7}, cmap='Oranges', ax=axs[0,1])
axs[0,1].set_title('Attractors', fontsize=10)
axs[0,1].set_xlabel('Betweenness percentile')
axs[0,1].set_ylabel('K_road_o percentile')


# Peripherals
for k,v in corr_dict.items():
    df_corr.loc[k[0], k[1]] = v[2]
sns.heatmap(df_corr, annot=True, annot_kws={"size": 7}, cmap='Greens', ax=axs[1,0])
axs[1,0].set_title('Locals', fontsize=10)
axs[1,0].set_xlabel('Betweenness percentile')
axs[1,0].set_ylabel('K_road_o percentile')

# Locals
for k,v in corr_dict.items():
    df_corr.loc[k[0], k[1]] = v[3]
sns.heatmap(df_corr, annot=True, annot_kws={"size": 7}, cmap='Greys', ax=axs[1,1])
axs[1,1].set_title('Locals', fontsize=10)
axs[1,1].set_xlabel('Betweenness percentile')
axs[1,1].set_ylabel('K_road_o percentile')


plt.savefig(path_plots+'bc_spearman_all.png', bbox_inches ="tight", dpi=150)
plt.show()

In [None]:
del by_label, color_counter, color_dict_d, color_dict_o, df_co2_len, df_total_co2_road, labels
del stacked_df_d, stacked_df_o, stacked_df2_d, stacked_df2_o, v
del corr_df, corr_dict, percentiles, tmp

### 4.2 Visualization of road type on map

Plot on map of different road types

In [None]:
road_network = sumolib.net.readNet(road_network_path, withInternal=False)

In [None]:
road_edge_map = pd.read_csv(path_road_edge_mapping)

In [None]:
G = nx.MultiDiGraph()

for edge in road_network.getEdges():
    node_from = edge.getFromNode().getID()
    node_to = edge.getToNode().getID()
    geom = [list(x) for x in edge.getShape()]
    G.add_edge(node_from, node_to, key=edge.getID(), length=edge.getLength(), geometry=LineString(geom))
    
G.graph.update({'crs': 'epsg:3857'})

In [None]:
def plot_df(df_exps, road_edge_map, road_net):
    road_edge_map_no_intern = road_edge_map[~road_edge_map['edge_id'].astype(str).str.startswith(':')]
    road_edge_mean_map = pd.merge(road_edge_map_no_intern, df_exps, on=['road'])
    
    # create column with tuple of edges of the graph (u,v,key)
    edge_graph_list = []
    for edge in road_edge_mean_map['edge_id']:
        from_node = road_net.getEdge(edge).getFromNode().getID()
        to_node = road_net.getEdge(edge).getToNode().getID()
        edge_graph_list.append((from_node, to_node, edge))
        
    road_edge_mean_map['edge_graph'] = edge_graph_list
    
    return road_edge_mean_map

In [None]:
corr_plot = plot_df(corr_co2, road_edge_map, road_network)

In [None]:
def add_attribute_to_graph(graph, df_plot, attr_name, plot_col):
    # edge[0] = node_from, edge[1] = node_to, edge[2] = key = edge_id 
    
    # Initialize co2 attribute in the graph
    for edge in graph.edges:
        G[edge[0]][edge[1]][edge[2]][attr_name] = None
        
    # Set co2 attribute based on some value per road
    for edge, value in zip(df_plot['edge_graph'], df_plot[plot_col]):
        graph[edge[0]][edge[1]][edge[2]][attr_name] = value

In [None]:
add_attribute_to_graph(G, corr_plot, 'color_o', 'color_o')
add_attribute_to_graph(G, corr_plot, 'color_d', 'color_d')

In [None]:
# create colormap

val_o = []
val_d = []
ind = []
for edge in G.edges:
    # origin
    if G[edge[0]][edge[1]][edge[2]]['color_o'] == None:
        val_o.append('white')
        ind.append(edge)
    else:
        val_o.append(G[edge[0]][edge[1]][edge[2]]['color_o'])
        ind.append(edge)

    # destination
    if G[edge[0]][edge[1]][edge[2]]['color_d'] == None:
        val_d.append('white')
    else:
        val_d.append(G[edge[0]][edge[1]][edge[2]]['color_d'])   

ec_o = pd.Series(val_o, index=pd.MultiIndex.from_tuples(ind))
ec_d = pd.Series(val_d, index=pd.MultiIndex.from_tuples(ind))


In [None]:
fig, ax = ox.plot_graph(G, bgcolor='lightgrey', node_size=0, edge_linewidth=1, edge_color=ec_o, show=False)

plt.title('Milano-big: type of road origin')

legend_elements = [Line2D([0], [0], color='red', lw=4, label='Connectors'),
                   Line2D([0], [0], color='orange', lw=4, label='Attractors'),
                   Line2D([0], [0], color='green', lw=4, label='Peripherals'),
                   Line2D([0], [0], color='grey', lw=4, label='Locals')
                   ]

ax.legend(handles=legend_elements, bbox_to_anchor=(1.2, 1))
plt.savefig(path_plots+'corr_map_o.png', bbox_inches ="tight")
plt.show()

In [None]:
fig, ax = ox.plot_graph(G, bgcolor='lightgrey', node_size=0, edge_linewidth=1, edge_color=ec_d, show=False)

plt.title('Milano-big: type of road destination')

legend_elements = [Line2D([0], [0], color='red', lw=4, label='Connectors'),
                   Line2D([0], [0], color='orange', lw=4, label='Attractors'),
                   Line2D([0], [0], color='green', lw=4, label='Peripherals'),
                   Line2D([0], [0], color='grey', lw=4, label='Locals')
                   ]

ax.legend(handles=legend_elements, bbox_to_anchor=(1.2, 1))
plt.savefig(path_plots+'corr_map_d.png', bbox_inches ="tight")
plt.show()

### 4.3 Plot of driver sources and destinations for a given edge

Visualization of the tile id of the driver sources and destinations for a certain edge id

In [None]:
ds_o = compute_driver_sources(routed_paths_folder+'Milano_big_TD_30k_0.rou.xml', dict_edge_tile, origin=True)
ds_o = compute_MDS(ds_o, 0.8)

ds_d = compute_driver_sources(routed_paths_folder+'Milano_big_TD_30k_0.rou.xml', dict_edge_tile, origin=False)
ds_d = compute_MDS(ds_d, 0.8)

In [None]:
shape = gpd.GeoDataFrame.from_file(shapefile_path)
tile_h3_resolution = constants.H3_UTILS['average_hexagon_edge_length']['8']
tessellation_hex = tilers.tiler.get('h3_tessellation', base_shape=shape, meters=tile_h3_resolution*1000)

In [None]:
# convert tesselltion in SUMO road net coordinates
new_geom = []
for g in tessellation_hex['geometry']:
    xs = []
    ys = []
    for lon, lat in zip(g.exterior.coords.xy[0], g.exterior.coords.xy[1]):
        x,y = road_network.convertLonLat2XY(lon, lat)
        xs.append(x)
        ys.append(y)
    new_geom.append(Polygon(zip(xs, ys)))

tessellation_hex['geometry'] = new_geom

In [None]:
# road to visualize
road_edges = list(road_edge_map[road_edge_map['road'].str.startswith('A50')]['edge_id'])
#road_edges = list(road_edge_map[road_edge_map['road'].str.startswith('Via Lomellina, Milano')]['edge_id'])

In [None]:
# create colormap selected road
val = []
ind = []
for edge in G.edges:
    if edge[2] in road_edges:
        val.append(G[edge[0]][edge[1]][edge[2]]['color_o'])
        ind.append(edge)
    else:
        val.append('lightgrey')
        ind.append(edge)  

ec = pd.Series(val, index=pd.MultiIndex.from_tuples(ind))

In [None]:
tiles_o = {}
for edge in road_edges:
    if edge in ds_o.keys():
        for tile, val in ds_o[edge].items():
            if tile in tiles_o:
                tiles_o[tile].append(val)
            else:
                tiles_o[tile] = [val]
# mean of k_roads
for k,v in tiles_o.items():
    tiles_o[k] = np.mean(v)

# normalize for alpha values for the plot
val_min = np.min(list(tiles_o.values()))
val_max = np.max(list(tiles_o.values()))
for k,v in tiles_o.items():
    tiles_o[k] = (v-val_min)/(val_max-val_min)


tiles_d = {}
for edge in road_edges:
    if edge in ds_d.keys():
        for tile, val in ds_d[edge].items():
            if tile in tiles_d:
                tiles_d[tile].append(val)
            else:
                tiles_d[tile] = [val]
# mean of k_roads
for k,v in tiles_d.items():
    tiles_d[k] = np.mean(v)

# normalize for alpha values for the plot
val_min = np.min(list(tiles_d.values()))
val_max = np.max(list(tiles_d.values()))
for k,v in tiles_d.items():
    tiles_d[k] = (v-val_min)/(val_max-val_min)

In [None]:
fig, ax = ox.plot_graph(G, bgcolor='white', node_size=0, edge_linewidth=0.8, edge_color=ec, show=False)

# plot tiles
hx_o = tessellation_hex[tessellation_hex['tile_ID'].isin(tiles_o)]
for tile in tiles_o:
    facec_o = [mcolors.to_rgba('tab:blue', tiles_o[tile])]
    hx_o[hx_o['tile_ID']==tile].plot(ax=ax, aspect=1, edgecolor='tab:blue', fc=facec_o, linewidth=0.8)


plt.title('Major driver sources of road')

plt.savefig(path_plots+'mds_o_map.png', bbox_inches ="tight")
plt.show()

In [None]:
fig, ax = ox.plot_graph(G, bgcolor='white', node_size=0, edge_linewidth=0.8, edge_color=ec, show=False)

# plot tiles
hx_d = tessellation_hex[tessellation_hex['tile_ID'].isin(tiles_d)]
for tile in tiles_d:
    facec_o = [mcolors.to_rgba('tab:orange', tiles_d[tile])]
    hx_d[hx_d['tile_ID']==tile].plot(ax=ax, aspect=1, edgecolor='tab:orange', fc=facec_o, linewidth=0.8)


plt.title('Major driver destinations of road')

plt.savefig(path_plots+'mds_d_map.png', bbox_inches ="tight")
plt.show()

In [None]:
del corr_plot, df_corr, ds_d, ds_o, ec, ec_d, ec_o, hx_d, hx_o, ind, new_geom
del tessellation_hex, tiles_d, tiles_o, val_d, val_o, 

## 5. Path differences after road closure

Plot the differences in routed paths before and after the closure of a road

In [None]:
baseline_path = list(sumolib.xml.parse(routed_paths_folder+'Milano_big_TD_30k_0.rou.xml', 'vehicle'))
experiment_path = list(sumolib.xml.parse('../data/simulations/'+city+'/top30/routed_paths/Milano_big_TD_30k_top30_0.rou.xml', 'vehicle'))
road_edge_tbr = pd.read_csv('../data/simulations/'+city+'/top30/Milano_big_road_top30.csv')

In [None]:
road_edge_tbr['road'][:5]

In [None]:
all_removed_edges = []
for l in road_edge_tbr['edge_id']:
    all_removed_edges += literal_eval(l)
all_removed_edges = list(set(all_removed_edges))

In [None]:
roadname = 'Piazza Carlo Caneva'
edge_removed = set(literal_eval(road_edge_tbr[road_edge_tbr['road']==roadname]['edge_id'].values[0]))
threshold_edge = 1
vehicle_ids = []

for vehicle in baseline_path:
    v_path = set(vehicle.route[0].edges.split(' '))
    if len(edge_removed.intersection(v_path)) > threshold_edge:
        vehicle_ids.append(vehicle.id)

In [None]:
vehicle_ids[:5]

In [None]:
v_id = 'vehicle_1434.0'
baseline_v_route = []
experiment_v_route = []

for vehicle in baseline_path:
    if vehicle.id == v_id:
        baseline_v_route = vehicle.route[0].edges.split(' ')
        break

for vehicle in experiment_path:
    if vehicle.id == v_id:
        experiment_v_route = vehicle.route[0].edges.split(' ')
        break

In [None]:
road_network = sumolib.net.readNet(road_network_path, withInternal=False)
road_edge_map = pd.read_csv(path_road_edge_mapping)

In [None]:
def edge_list_to_gps_list(edge_list, road_network):
    
    gps_points = []

    for edge_id in edge_list:

        if edge_id.startswith(':'):
            continue
        
        sumo_edge = road_network.getEdge(edge_id)

        x, y = sumo_edge.getFromNode().getCoord()
        lon_from, lat_from = road_network.convertXY2LonLat(x, y)

        x, y = sumo_edge.getToNode().getCoord()
        lon_to, lat_to = road_network.convertXY2LonLat(x, y)

        gps_points.append((lat_from, lon_from))
        gps_points.append((lat_to, lon_to))
              
    return gps_points

In [None]:
m = folium.Map(location=[45.469262, 9.182007],
               tiles='CartoDB Positron',
               zoom_start=14,
               attr='CartoDB')

In [None]:
for edge in baseline_v_route:
    gps_edge = edge_list_to_gps_list([edge], road_network)
    folium.PolyLine(gps_edge, color='blue', weight=4).add_to(m)

for edge in experiment_v_route:
    gps_edge = edge_list_to_gps_list([edge], road_network)
    folium.PolyLine(gps_edge, color='orange', weight=4).add_to(m)

#gps_edge = edge_list_to_gps_list(edge_removed, road_network)
#folium.PolyLine(gps_edge, color='red', weight=3, dash_array='5').add_to(m)

for edge in all_removed_edges:
    if not edge.startswith(':'):
        gps_edge = edge_list_to_gps_list([edge], road_network)
        folium.PolyLine(gps_edge, color='red', weight=2).add_to(m)

In [None]:
m

In [None]:
G = nx.MultiDiGraph()

for edge in road_network.getEdges():
    node_from = edge.getFromNode().getID()
    node_to = edge.getToNode().getID()
    geom = [list(x) for x in edge.getShape()]
    G.add_edge(node_from, node_to, key=edge.getID(), length=edge.getLength(), geometry=LineString(geom))
    
G.graph.update({'crs': 'epsg:3857'})

In [None]:
# edge[0] = node_from, edge[1] = node_to, edge[2] = key = edge_id 
for edge in G.edges:
    if edge[2] in edge_removed:
        G[edge[0]][edge[1]][edge[2]]['color'] = 'tab:red'
    elif edge[2] in baseline_v_route:
        G[edge[0]][edge[1]][edge[2]]['color'] = 'tab:blue'
    elif edge[2] in experiment_v_route:
        G[edge[0]][edge[1]][edge[2]]['color'] = 'tab:orange'
    else:
        G[edge[0]][edge[1]][edge[2]]['color'] = 'white'

In [None]:
# create colormap
val = []
ind = []
for edge in G.edges:
    val.append(G[edge[0]][edge[1]][edge[2]]['color'])
    ind.append(edge)  

ec = pd.Series(val, index=pd.MultiIndex.from_tuples(ind))

In [None]:
fig, ax = ox.plot_graph(G, bgcolor='lightgrey', node_size=0, edge_linewidth=1, edge_color=ec, show=False)

plt.title('Milano-big: type of road origin')

#legend_elements = [Line2D([0], [0], color='red', lw=4, label='Connectors'),
#                   Line2D([0], [0], color='orange', lw=4, label='Attractors'),
#                   Line2D([0], [0], color='grey', lw=4, label='Locals')
#                   Line2D([0], [0], color='green', lw=4, label='Peripherals'),
#                   ]

#ax.legend(handles=legend_elements, bbox_to_anchor=(1.2, 1))
ax.set_ylim(14000, 16000)
ax.set_xlim(10000, 14000)
plt.show()