# 03 - Nodes distance matrix

This notebook loads all previously downloaded nodes and edges from notebook 02 and creates a __matrix containing the distance traveling by edges from each node to all nodes.__

## __Import libraries__

In [1]:
from pathlib import Path

current_path = Path().resolve()

for parent in current_path.parents:
    if parent.name == "accesibilidad-urbana":
        project_root = parent
        break

print(project_root)

/home/observatorio/Documents/repos/accesibilidad-urbana


In [2]:
import os
import sys

import pandas as pd
import geopandas as gpd
import numpy as np

import matplotlib.pyplot as plt
from matplotlib import colors
import seaborn as sns

from tqdm import tqdm

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join(project_root))
if module_path not in sys.path:
    print(f"Path found.")
    sys.path.append(module_path)
    import aup
else:
    print(f"Path already loaded.")
    import aup

repo_path = str(module_path)+"/"
print(f"Repo path: {repo_path}")

Path already loaded.
Repo path: /home/observatorio/Documents/repos/accesibilidad-urbana/


## __Notebook config__

In [6]:
# ----- ----- ----- Input configuration
project_path = repo_path + "data/projects/30_taojuan/"

# ----- ----- ----- Saving configuration
# save output to database?
save = True
if_exists='replace'

## __Load nodes and edges__

In [4]:
# Load test nodes
#nodes_gdf = gpd.read_file(project_path+"nodes_distance_matrix/taoyuan_nodes_test.gpkg")
#if nodes_gdf.crs != "EPSG:4326":
#    nodes_gdf = nodes_gdf.to_crs("EPSG:4326")

nodes_schema = 'projects_research'
nodes_table = 'nodes_taoyuan_aoi'

query = f"SELECT * FROM {nodes_schema}.{nodes_table}"
nodes_gdf = aup.gdf_from_query(query, geometry_col='geometry')
if nodes_gdf.crs != "EPSG:4326":
    nodes_gdf = nodes_gdf.to_crs("EPSG:4326")

# Show
print(nodes_gdf.crs)
print(nodes_gdf.shape)
nodes_gdf.head(2)

EPSG:4326
(375762, 5)


Unnamed: 0,osmid,x,y,street_count,geometry
0,187365351,121.790021,24.836928,3,POINT (121.79002 24.83693)
1,187365420,121.789392,24.827145,3,POINT (121.78939 24.82714)


In [5]:
# Load test edges
#edges_gdf = gpd.read_file(project_path+"nodes_distance_matrix/taoyuan_edges_test.gpkg")
#if edges_gdf.crs != "EPSG:4326":
#    edges_gdf = edges_gdf.to_crs("EPSG:4326")

edges_schema = 'projects_research'
edges_table = 'edges_taoyuan_aoi'

query = f"SELECT * FROM {edges_schema}.{edges_table}"
edges_gdf = aup.gdf_from_query(query, geometry_col='geometry')
if edges_gdf.crs != "EPSG:4326":
    edges_gdf = edges_gdf.to_crs("EPSG:4326")

# Show
print(edges_gdf.crs)
print(edges_gdf.shape)
edges_gdf.head(2)

EPSG:4326
(961739, 18)


Unnamed: 0,u,v,key,osmid,oneway,lanes,name,highway,maxspeed,length,geometry,bridge,ref,junction,tunnel,access,width,service
0,187365351,292780412,0,"[26696993, 906806115, 1036529873]",True,1,頭城交流道,motorway_link,,609.514944,"LINESTRING (121.79002 24.83693, 121.79014 24.8...",yes,,,,,,
1,187365351,187365420,0,"[1036529860, 1036529861, 1036529867, 103652986...",True,2,蔣渭水高速公路,motorway,90.0,1115.870543,"LINESTRING (121.79002 24.83693, 121.79013 24.8...",yes,5.0,,,,,


## __Create and save nodes distance matrix__

In [None]:
# Create a base DataFrame to store all distances
nodes_distance_matrix = nodes_gdf.copy()
nodes_distance_matrix.drop(columns=['x','y','street_count'],inplace=True)

# Variables for gradual local saving
local_batch = 1
batch_gdf = nodes_distance_matrix.copy()

# Iterate over each node
for i in tqdm(range(len(nodes_gdf)), position=0, leave=True):

    # ----- ----- ----- Data preparation
    # Load current node, which is considered a point of interes (poi) and turned from Series into a DataFrame
    node = nodes_gdf.iloc[i].copy()
    poi_gdf = pd.DataFrame(node)
    poi_gdf = poi_gdf.transpose()
    # Get current node's unique ID (osmid)
    poi_id = poi_gdf.osmid.unique()[0]

    # ----- ----- ----- Calculation and formatting
    # Calculate the distance from all nodes to this poi
    nodes_distance_poi = aup.calculate_distance_nearest_poi(poi_gdf, 
                                                            nodes_gdf, 
                                                            edges_gdf, 
                                                            amenity_name = poi_id, #Name of the poi
                                                            column_name='osmid', #nodes_gdf index
                                                            )
    # Remove suffix 'dist' from poi's name column
    nodes_distance_poi.rename(columns={f'dist_{poi_id}':poi_id},inplace=True)

    # ----- ----- ----- Concatenation
    # Concatenate data to nodes_distance_matrix
    nodes_distance_poi.reset_index(inplace=True)
    nodes_distance_matrix = pd.merge(nodes_distance_matrix,
                                     nodes_distance_poi[['osmid',poi_id]],
                                     on='osmid',
                                     how='left' #To always keep all nodes. This way, if there's an unconnected area, distance with nodes in other areas will be NaNs
                                    )
    # Concatenate data to batch_gdf
    batch_gdf = pd.merge(batch_gdf,
                         nodes_distance_poi[['osmid',poi_id]],
                         on='osmid',
                         how='left' #To always keep all nodes. This way, if there's an unconnected area, distance with nodes in other areas will be NaNs
                        )

    # ----- ----- ----- Local batch saving
    # Gradual local saving every 10,000 nodes
    if i % 10000 == 0 and i != 0:
        # Save current batch
        batch_gdf.columns = batch_gdf.columns.map(str) #Turn cols to string
        batch_gdf.to_file(project_path + f"nodes_distance_matrix/nodes_distance_matrix_batch{local_batch}.gpkg")
        print(f"Saved batch {local_batch} locally.")
        # Reset nodes_distance_matrix_tmp
        batch_gdf = nodes_gdf.copy()
        batch_gdf.drop(columns=['x','y','street_count'],inplace=True)
        # Next batch
        local_batch+=1

# ----- ----- ----- Final local batch saving
batch_gdf.columns = batch_gdf.columns.map(str) #Turn cols to string
batch_gdf.to_file(project_path + f"nodes_distance_matrix/nodes_distance_matrix_batch{local_batch}.gpkg")
print(f"Saved batch {local_batch} locally.")
del batch_gdf

# ----- ----- ----- Final database saving
save_schema = 'projects_research'
nodes_save_table = 'taoyuan_nodes_distance'
if save:
    nodes_distance_matrix.columns = nodes_distance_matrix.columns.map(str) #Turn cols to string
    # Upload
    print(f"Uploading nodes_distance_matrix.")
    aup.gdf_to_db_slow(nodes_distance_matrix, nodes_save_table, save_schema, if_exists=if_exists)
    print(f"Uploaded nodes_distance_matrix.")

# Show
print(nodes_distance_matrix.crs)
print(nodes_distance_matrix.shape)
nodes_distance_matrix.head(2)

  0%|                                                                                                           | 27/375762 [01:10<274:30:09,  2.63s/it]