# Select the roads to remove by each type of road

This notebook contains the code to select which road to remove for different closure strategies based on the type of the road.

In [None]:
import sumolib
import pandas as pd
import numpy as np
import json
from pathlib import Path
import os
import matplotlib.pyplot as plt
from result_utils import *

#### parameters

In [None]:
city = 'Milano_big'
fold_prefix = 'baseline'

# road network path
road_network_path = "../data/road_net/"+city+"/"+city+"_road_network.net.xml"

# road-edge map
path_road_edge_mapping = '../data/road_net/'+city+'/'+city+'_road_edge_map.csv'
# road-cluster map
path_road_cluster_mapping = '../data/simulations/'+city+'/'+fold_prefix+'/results/road_clust_map.csv'


# experiment results
folder_experiments = '../data/simulations/'+city+'/'+fold_prefix+'/sumo_out/'

# output paths
path_results = "../data/simulations/"+city+"/"+fold_prefix+"/results/"

## 1. Load road categories from clustering

In [None]:
road_cluster_map = pd.read_csv(path_road_cluster_mapping)

## 2. Correlation with CO2

Merge type of road with CO2\m

In [None]:
road_edge_map = pd.read_csv(path_road_edge_mapping)

In [None]:
dict_exps = create_dict_exps(folder_experiments, 'baseline')

In [None]:
# Aggregate all experiments results and compute the mean and the std of the 'total_of' column.
# It returns a dictionary with keys = roadnames and list [mean, std].

def create_dict_total_per_road(dict_exps, folder_experiments, main_experiment_name, total_of):
    dict_total = {}
    for exp_id, exp_folder_name in dict_exps[main_experiment_name].items():
        exp_df = pd.read_csv(folder_experiments+exp_folder_name+"/road_measures.csv")
        
        for ind, row in exp_df.iterrows():
            if row['road'] in dict_total:
                dict_total[row['road']].append(row[total_of])
            else:
                dict_total[row['road']] = [row[total_of]]
    
    list_df = []
    for road, total in dict_total.items():
        list_df.append([road, np.array(total).mean(), np.array(total).std()])
    df = pd.DataFrame(list_df, columns=['road', 'mean', 'std'])
        
    return df

In [None]:
df_total_co2_road = create_dict_total_per_road(dict_exps, folder_experiments, 'baseline', 'total_co2')

In [None]:
df_co2_len = pd.merge(road_edge_map.groupby('road')['edge_len'].sum(), df_total_co2_road, on=['road'])
df_co2_len['mean_len'] = df_co2_len['mean']/df_co2_len['edge_len']
df_co2_len['std_len'] = df_co2_len['std']/df_co2_len['edge_len']
df_co2_len.sort_values(by=['mean_len', 'std_len'], ascending=False, inplace=True)

In [None]:
corr_co2 = pd.merge(df_co2_len, road_cluster_map, on=['road'])
corr_co2.sort_values(by=['mean_len', 'std_len'], ascending=False, inplace=True)

In [None]:
corr_co2.head()

## 3. Remove roads by type

In [None]:
removed_path = '../data/simulations/Milano_big'

In [None]:
df = corr_co2.copy()
#df = corr_co2[corr_co2['clust_label'] == 'LE'].copy()

In [None]:
df.sort_values(by=['mean_len', 'std_len'], ascending=False, inplace=True)

In [None]:
df.shape

Create dictionary with bin per road length with respect road in each bin. It is used to extract random roads in the same bin of the removed road

In [None]:
road_len = road_edge_map.groupby(by=['road']).agg({'edge_len': 'sum'}).reset_index()
road_len = road_len[road_len['road'].isin(df['road'])]

In [None]:
road_len.shape

In [None]:
plt.figure(figsize=(10, 3))
plt.hist(road_len['edge_len'], bins=100)
plt.title('Road histogram')
plt.xlabel('Road length (m)')
plt.ylabel('#road')
#plt.xticks(np.arange(0, 10000, step=1000))
plt.show()

In [None]:
# double grain bin
#bins = list(np.arange(0, 2000, 200))+list(np.arange(2000, 10000, 500))+list(np.arange(10000, 41000, 10000)) #HF
#bins = list(np.arange(0, 1500, 100))+list(np.arange(1500, 5000, 500))+list(np.arange(5000, 110000, 20000)) #HE
#bins = list(np.arange(0, 1000, 100))+list(np.arange(1000, 4000, 500))+list(np.arange(4000, 31000, 5000)) #LF
#bins = list(np.arange(0, 1000, 50))+list(np.arange(1000, 2000, 200))+list(np.arange(2000, 10000, 500)) #LE
bins = list(np.arange(0, 2000, 100))+list(np.arange(2000, 5000, 500))+list(np.arange(5000, 95000, 5000)) #MIX
road_len['bin'], b = pd.cut(x=road_len['edge_len'], bins=bins, retbins=True, include_lowest=True)

In [None]:
#road_len.groupby(['bin']).count()

In [None]:
road_len_dict = {(k.left, k.right): [] for k in road_len['bin'].unique()}

In [None]:
for idx, row in road_len.iterrows():
    b = row['bin']
    road_len_dict[(b.left, b.right)].append(row['road'])

In [None]:
# check if all roads are in the dictionary
t = 0
for k, v in road_len_dict.items():
    t+=len(v)
    
t

In [None]:
# Select the top value road from the experiment. The df may be sorted, because the top value roads are selected.
# rand_dict is a dictionary with key (min,max) that is the bin from which extract the random road.
# bins the list of bin's extremes used to create the dictionary.
# n_rand is the number of random extraction for the top value roads.

def road_to_remove(df, value, rand_dict, bins, n_rand=1):
        
    top_road = np.array(df['road'].iloc[:value])
    
    rand_road_list = []
    for i in range(n_rand):
        rand_roads = []
        for road in top_road:
            road_len = df[df['road']==road]['edge_len'].values
            b = pd.cut(road_len, bins, include_lowest=True)
            road_to_extract = rand_dict[(b[0].left, b[0].right)]
            random_road = np.random.choice(road_to_extract)
            while random_road in rand_roads:
                random_road = np.random.choice(road_to_extract)
            rand_roads.append(random_road)
        rand_road_list.append(rand_roads)
      
    return top_road , rand_road_list  

In [None]:
topk = 100

In [None]:
#n = int(np.ceil(topk/4))
#df = corr_co2.groupby(by=['clust_label'], group_keys=False).apply(lambda x: x.sort_values(by=['mean_len', 'std_len'], ascending=False).head(n))

In [None]:
#df.drop([1225, 326], inplace=True)
#len(df)
#road_tbr = df['road'].tolist()

In [None]:
road_tbr, road_tbr_rand_list = road_to_remove(df, topk, road_len_dict, bins, n_rand=5)

In [None]:
print('Top roads: '+str(len(road_tbr)))
for i in range(len(road_tbr_rand_list)):
    print('Random roads '+str(i)+': '+str(len(road_tbr_rand_list[i])))

In [None]:
print('Top removed meters: '+str(df[df['road'].isin(road_tbr)]['edge_len'].sum()))
for i in range(len(road_tbr_rand_list)):
    print('Rand removed meters '+str(i)+': '+str(df[df['road'].isin(road_tbr_rand_list[i])]['edge_len'].sum()))

In [None]:
road_edge_map = pd.read_csv(path_road_edge_mapping)

In [None]:
# Create csv with roadname of the road to be removed and the list fo edges associated to each edge
road_edge_tbr = road_edge_map[road_edge_map['road'].isin(road_tbr)].groupby('road').agg({'edge_id': lambda x: x.tolist()})
road_edge_tbr.reset_index(inplace=True)

In [None]:
category = 'MIX'

In [None]:
Path('../data/simulations/Milano_big/'+category+'_top'+str(topk)).mkdir(exist_ok=True)
Path('../data/simulations/Milano_big/'+category+'_top'+str(topk)+'/rand').mkdir(exist_ok=True)

In [None]:
road_edge_tbr.to_csv('../data/simulations/Milano_big/'+category+'_top'+str(topk)+'/Milano_big_road_'+category+'_top'+str(topk)+'.csv', index=False)

In [None]:
for i in range(len(road_tbr_rand_list)):
    road_edge_tbr_rand = road_edge_map[road_edge_map['road'].isin(road_tbr_rand_list[i])].groupby('road').agg({'edge_id': lambda x: x.tolist()})
    road_edge_tbr_rand.reset_index(inplace=True)
    road_edge_tbr_rand.to_csv('../data/simulations/Milano_big/'+category+'_top'+str(topk)+'/rand/Milano_big_road_'+category+'_rand'+str(topk)+'_'+str(i)+'.csv', index=False)