In [None]:
import os
import shutil
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.geometry import LineString
import json
import matplotlib.pyplot as plt
import seaborn as sns
from shapely.geometry import LineString

In [None]:
basedir = '/path/to/data'
road_graph_folder = 'road_graph'
spatial_data_folder = 'spatial_data'
results_folder = 'road_graph_data_exploration_results'
images_folder = 'images'
city = '' # london, madrid, melbourne

In [None]:
def check_and_create_dir(path:str):
    if os.path.exists(path):
        shutil.rmtree(path)
    os.makedirs(path)
    
    return None

In [None]:
spatial_data_city_path = f'{basedir}/{spatial_data_folder}/{city}'
check_and_create_dir(spatial_data_city_path)

In [None]:
road_graph_results_city_path = f"{basedir}/{results_folder}/{city}"
check_and_create_dir(road_graph_results_city_path)

In [None]:
road_graph_results_city_images_path = f"{road_graph_results_city_path}/{images_folder}"
check_and_create_dir(road_graph_results_city_images_path)

In [None]:
def preprocess_road_graph_edges(row:pd.Series): 

    node_start_coord = tuple([row['x_start'], row['y_start']])
    node_end_coord = tuple([row['x_end'], row['y_end']])
    number_of_nodes_with_counter_data = np.sum(['' != row['counter_info_start'].strip(), 
                                               '' != row['counter_info_end'].strip()]     
                                              )
    return {'coord_for_geometry' : [node_start_coord, node_end_coord],
            'num_nodes_with_counter_data' : number_of_nodes_with_counter_data
           }


In [None]:
def preprocess_road_graph_supersegments(node_id_list:list, road_graph_nodes_copy: pd.DataFrame):
    coords_list = []
    number_of_nodes_with_counter_data = 0
    
    for node_id in node_id_list:
        coords_list.append(tuple(road_graph_nodes_copy.loc[node_id][['x', 'y']]))
        number_of_nodes_with_counter_data += '' != road_graph_nodes_copy.loc[node_id]['counter_info'].strip()
        
    return {'coord_for_geometry' : coords_list,
            'num_nodes_with_counter_data' : number_of_nodes_with_counter_data
           }

In [None]:
road_graph_nodes = pd.read_parquet(f'{basedir}/{road_graph_folder}/{city}/road_graph_nodes.parquet')
road_graph_edges = pd.read_parquet(f'{basedir}/{road_graph_folder}/{city}/road_graph_edges.parquet')
road_graph_supersegments = pd.read_parquet(f'{basedir}/{road_graph_folder}/{city}/road_graph_supersegments.parquet')

In [None]:
road_graph_nodes['counter_data'] = np.where(road_graph_nodes['counter_info'] != '', 'yes', 'no')
road_graph_nodes_gdf = gpd.GeoDataFrame(road_graph_nodes, 
                                        crs = 4326, 
                                        geometry = gpd.points_from_xy(road_graph_nodes['x'], 
                                                                      road_graph_nodes['y']
                                                                     )
                                       )

In [None]:
road_graph_nodes_gdf.to_file(f'{spatial_data_city_path}/road_graph_nodes.gpkg', driver='GPKG')

In [None]:
road_graph_nodes_copy = road_graph_nodes.copy(deep=True)
road_graph_nodes_copy = road_graph_nodes_copy.set_index('node_id')

In [None]:
road_graph_edges = road_graph_edges.merge(road_graph_nodes[['node_id', 'x', 'y', 'counter_info']], 
                                          left_on = 'u', 
                                          right_on = 'node_id'
                                         )
road_graph_edges = road_graph_edges.merge(road_graph_nodes[['node_id', 'x', 'y', 'counter_info']], 
                                          left_on = 'v', 
                                          right_on = 'node_id', 
                                          suffixes = ('_start', '_end')
                                         )

In [None]:
road_graph_edges[['coord_for_geometry', 'num_nodes_with_counter_data']] = \
    road_graph_edges.apply(lambda row: preprocess_road_graph_edges(row),
                           axis=1, 
                           result_type='expand'
                          )

In [None]:
road_graph_edges.drop(columns = ['node_id_start', 'x_start', 'y_start', 'counter_info_start', 
                                 'node_id_end', 'x_end', 'y_end', 'counter_info_end', 
                                ], 
                      inplace = True
                     )

In [None]:
geometry_edges = road_graph_edges['coord_for_geometry'].apply(lambda x: LineString(x))
road_graph_edges_gdf = gpd.GeoDataFrame(road_graph_edges.drop(columns=['coord_for_geometry']), 
                                        crs = 4326, 
                                        geometry = geometry_edges
                                       )

In [None]:
road_graph_edges_gdf.to_file(f'{spatial_data_city_path}/road_graph_edges.gpkg', driver='GPKG')

In [None]:
road_graph_supersegments[['coord_for_geometry', 'num_nodes_with_counter_data']] = \
    road_graph_supersegments.apply(lambda row: preprocess_road_graph_supersegments(row['nodes'], road_graph_nodes_copy),
                                   axis=1, 
                                   result_type='expand'
                                  )

In [None]:
road_graph_supersegments['num_nodes'] = road_graph_supersegments['nodes'].apply(lambda x: len(x))
road_graph_supersegments['perc_nodes_with_counter_data'] = round(road_graph_supersegments['num_nodes_with_counter_data']/road_graph_supersegments['num_nodes'],2)

In [None]:
geometry_supersegments = road_graph_supersegments['coord_for_geometry'].apply(lambda x: LineString(x))
road_graph_supersegments_gdf = gpd.GeoDataFrame(road_graph_supersegments.drop(columns=['coord_for_geometry', 'nodes']), 
                                                crs = 4326, 
                                                geometry = geometry_supersegments
                                               )

In [None]:
road_graph_supersegments_gdf.to_file(f'{spatial_data_city_path}/road_graph_supersegments.gpkg', driver='GPKG')

In [None]:
# SOME STATISTICS
with open(f'{road_graph_results_city_path}/counter_coverage.txt', 'w') as file:  
    file.write(f"Percentage of nodes with counter data: {round(np.sum('' != road_graph_nodes['counter_info'])/road_graph_nodes.shape[0]*100,3)}%\n")
    file.write(f"Percentage of edges with counter data: {round(np.sum(road_graph_edges['num_nodes_with_counter_data']>0)/road_graph_edges.shape[0]*100,3)}%\n")
    file.write(f"Percentage of supersegments with counter data (at least one node): {round(np.sum(road_graph_supersegments['num_nodes_with_counter_data']>0)/road_graph_supersegments.shape[0]*100,3)} %")

In [None]:
imortance_values = ['anything else', 'tertiary', 'secondary', 'primary', 'trunk', 'highway']
importance_value_counts = road_graph_edges['importance'].value_counts()
x_labels = [imortance_values[importance_code] for importance_code in importance_value_counts.index]
plt.bar(x_labels, importance_value_counts.values, color='skyblue')
plt.xlabel('Categories')
plt.ylabel('Count')
plt.title(f"Distributrion of edges' types, {city.capitalize()}")
plt.xticks(x_labels, rotation=25, ha="center")
plt.savefig(f'{road_graph_results_city_images_path}/distributrion_of_edge_types.png',  bbox_inches='tight')
plt.show()

In [None]:
road_graph_edges['counter_label'] = road_graph_edges['counter_distance'].apply(lambda x: 'with counter data' if x == 0 else 'without counter data')
percentage_df = pd.crosstab(road_graph_edges['counter_label'], road_graph_edges['importance'], normalize='columns') * 100
ax = percentage_df.plot(y=importance_value_counts.index, kind="bar", figsize = (13,6), use_index=True, rot=0)
plt.legend(title='Type', labels=[f'{imortance_values[code]}' for code in importance_value_counts.index], loc='upper right')
plt.xlabel('Presence of counter data')
plt.ylabel('Percentage, %')
plt.title(f'Percentage of different road types with & without counter data, {city.capitalize()}')
plt.savefig(f'{road_graph_results_city_images_path}/percentage_of_different_road_types_with_&_without_counter_data.png',  bbox_inches='tight')
plt.show()

In [None]:
fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(8, 12))

for i, column in enumerate(['parsed_maxspeed', 'length_meters', 'counter_distance']):
    axes[i].hist(road_graph_edges[column], bins='auto', edgecolor='black')
    axes[i].set_xlabel('Value')
    axes[i].set_ylabel('Frequency')
    axes[i].set_title(f'Distribution of {column}')

plt.tight_layout()
plt.savefig(f'{road_graph_results_city_images_path}/simple_dirtribution_of_numeric_columns_of_road_edges.png',  bbox_inches='tight')
plt.show()

In [None]:
columns_to_plot = ['num_nodes', 'perc_nodes_with_counter_data']
fig, axes = plt.subplots(ncols=len(columns_to_plot))

for column, axis in zip(columns_to_plot, axes):
        sns.boxplot(data=road_graph_supersegments[column], ax=axis) 
        axis.set_title(column)
plt.tight_layout()
plt.savefig(f'{road_graph_results_city_images_path}/simple_whiskerplots_graph_supersegments.png',  bbox_inches='tight')
plt.show()