# 04b - Nodes distance matrix

This notebook loads all previously downloaded nodes and edges from notebook 02 and creates a __matrix containing the distance traveling by edges from each node to all nodes.__ 

It was __optimized by using g.shortest_paths_dijkstra() directly.__

## __Import libraries__

In [1]:
from pathlib import Path

current_path = Path().resolve()

for parent in current_path.parents:
    if parent.name == "accesibilidad-urbana":
        project_root = parent
        break

print(project_root)

/home/observatorio/Documents/repos/accesibilidad-urbana


In [2]:
import os
import sys

import pandas as pd
import geopandas as gpd
import numpy as np

import matplotlib.pyplot as plt
from matplotlib import colors
import seaborn as sns

from tqdm import tqdm

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join(project_root))
if module_path not in sys.path:
    print(f"Path found.")
    sys.path.append(module_path)
    import aup
else:
    print(f"Path already loaded.")
    import aup

repo_path = str(module_path)+"/"
print(f"Repo path: {repo_path}")

Path already loaded.
Repo path: /home/observatorio/Documents/repos/accesibilidad-urbana/


## __Notebook config__

In [3]:
# ----- ----- ----- Input configuration
project_path = repo_path + "data/projects/30_taojuan/"

# ----- ----- ----- Process configuration
projected_crs = "EPSG:32651"
hex_res = 11
project_schema = 'projects_research'
aoi_table = 'taoyuan_aoi'
nodes_table = 'taoyuan_nodes_drawnaoi'
edges_table = 'taoyuan_edges_drawnaoi'
hex_table = 'taoyuan_hexgrid'

# ----- ----- ----- Saving configuration
# save output to database?
save = True
if_exists='replace'

## __Load nodes and edges__

#### Load nodes

In [4]:
query = f"SELECT * FROM {project_schema}.{nodes_table}"
nodes_gdf = aup.gdf_from_query(query, geometry_col='geometry')
if nodes_gdf.crs != "EPSG:4326":
    nodes_gdf = nodes_gdf.to_crs("EPSG:4326")

# Show
print(nodes_gdf.crs)
print(nodes_gdf.shape)
nodes_gdf.head(2)

EPSG:4326
(245235, 5)


Unnamed: 0,osmid,x,y,street_count,geometry
0,25423153,121.421453,25.065326,3,POINT (121.42145 25.06533)
1,25423160,121.4395,25.069168,3,POINT (121.4395 25.06917)


#### Load edges

In [7]:
query = f"SELECT * FROM {project_schema}.{edges_table}"
edges_gdf = aup.gdf_from_query(query, geometry_col='geometry')
if edges_gdf.crs != "EPSG:4326":
    edges_gdf = edges_gdf.to_crs("EPSG:4326")

# Show
print(edges_gdf.crs)
print(edges_gdf.shape)
edges_gdf.head(2)

EPSG:4326
(638183, 18)


Unnamed: 0,u,v,key,osmid,oneway,lanes,name,highway,maxspeed,length,geometry,bridge,ref,junction,tunnel,access,width,service
0,4421143392,4397397103,0,285666853,False,,湖山街,unclassified,,30.375803,"LINESTRING (121.35037 24.98395, 121.35041 24.9...",,,,,,,
1,4421143392,2893294431,0,285666853,False,,湖山街,unclassified,,71.379208,"LINESTRING (121.35037 24.98395, 121.35027 24.9...",,,,,,,


## __Create igraph__

In [8]:
# Create g (network), weights (array with edge lengths) and node_mapping (series with osmid-index)
wght = 'length'
g, weights, node_mapping = aup.to_igraph(nodes_gdf,
                                         edges_gdf,
                                         wght=wght)

# Show
weights

array([ 30.37580327,  71.37920802, 142.2923363 , ..., 110.50357262,
        73.00347325, 177.38812404], shape=(638183,))

In [9]:
# Show
node_mapping

{np.int64(25423153): 0,
 np.int64(25423160): 1,
 np.int64(25423587): 2,
 np.int64(25423605): 3,
 np.int64(25423609): 4,
 np.int64(25423625): 5,
 np.int64(31365361): 6,
 np.int64(31365364): 7,
 np.int64(31365365): 8,
 np.int64(31365368): 9,
 np.int64(31365377): 10,
 np.int64(31365382): 11,
 np.int64(31365391): 12,
 np.int64(31365393): 13,
 np.int64(31365395): 14,
 np.int64(31365401): 15,
 np.int64(31365403): 16,
 np.int64(31365404): 17,
 np.int64(31365405): 18,
 np.int64(31365406): 19,
 np.int64(31365407): 20,
 np.int64(31365413): 21,
 np.int64(31365415): 22,
 np.int64(31365416): 23,
 np.int64(31365417): 24,
 np.int64(31365418): 25,
 np.int64(31365419): 26,
 np.int64(31365420): 27,
 np.int64(31365422): 28,
 np.int64(31365426): 29,
 np.int64(31365428): 30,
 np.int64(31365430): 31,
 np.int64(31365432): 32,
 np.int64(31365437): 33,
 np.int64(31365438): 34,
 np.int64(31365441): 35,
 np.int64(31365442): 36,
 np.int64(31365445): 37,
 np.int64(31365446): 38,
 np.int64(31365450): 39,
 np.int64(

## __Iterate separating the nodes as points of interest by batches, find shortest paths to them and save them__

In [105]:
# ----- ----- ----- PREPARATION ----- ----- -----
# Prepare nodes for seed mapping
if 'osmid' not in nodes_gdf.columns:
    nodes_gdf.reset_index(inplace=True)

# ----- ----- ----- PROCESS CONFIGURATION ----- ----- -----
# Set batch size - Number of points of interest to be calculated on loop
batch_size = 1000
# Set save_batch_size - When reaching this amount of processed pois, saves locally and to database. Deletes dataframe and starts again.
save_batch_size = 2000

# ----- ----- ----- PROCESS START ----- ----- -----

# Set first batch initial and final bounds
initial_bound = 0
final_bound = batch_size

# Set last saved value
last_saved_bound = 0

# Set dataframe that will store shortest paths
set_dataframe = True

#while initial_bound<len(nodes_gdf):
while initial_bound<2000:

    print(f"Processing from poi {initial_bound} to poi {final_bound}.")
    
    
    # Create an array of points of interest indexes
    seeds = aup.get_seeds(nodes_gdf.iloc[initial_bound:final_bound], #Current points of interest
                          node_mapping,
                          column_name='osmid') #Column with index to which relate nearest distance

    # Look for the shortest path between the nodes in g and the points of interest (seeds)
    shortest_paths = np.array(g.shortest_paths_dijkstra(seeds, weights=weights))
    
    # Reduce decimal places to save space
    shortest_paths = np.round(shortest_paths,2)

    # Create shortest path DataFrame
    if set_dataframe:
        shortest_paths_df = pd.DataFrame()
        # Turn off dataframe reset
        set_dataframe=False

    # Append data to shortest path DataFrame
    all_osmids = list(node_mapping.keys())
    poi_osmids = list(nodes_gdf.iloc[seeds].osmid.unique())
    batch_df = pd.DataFrame(shortest_paths, columns=all_osmids, index=poi_osmids)
    shortest_paths_df = pd.concat([shortest_paths_df, batch_df])

    del seeds #Save space
    del shortest_paths #Save space
    del batch_df #Save space
    
    if final_bound >= save_batch_size:
        print(f"Saving processing from poi {last_saved_bound} until poi {final_bound}.")
        shortest_paths_df.columns = shortest_paths_df.columns.map(str) #Turn cols to string
        shortest_paths_df.to_csv(project_path + f"nodes_distance_matrix/nodes_distance_matrix_batch{last_saved_bound}-{final_bound}.csv")
        print(f"Saved processing from poi {last_saved_bound} until poi {final_bound}.")
        last_saved_bound = final_bound
        # Turn on dataframe reset
        set_dataframe=True
        
    # Adjust bound for next iteration
    initial_bound = final_bound
    final_bound = final_bound+1000

Processing from poi 0 to poi 1000.


  shortest_paths = np.array(g.shortest_paths_dijkstra(seeds, weights=weights))


Processing from poi 1000 to poi 2000.


  shortest_paths = np.array(g.shortest_paths_dijkstra(seeds, weights=weights))


Saving processing from poi 0 until poi 2000.
Saved processing from poi 0 until poi 2000.
