## Import libraries

In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup



## 00 - Required data

In [2]:
# Location of area of interest (Required)
aoi_dir = "../../data/external/prox_latam/aoi_ags.gpkg"

# Name of points of interest (Required)
s = 'random_pois'
# Location of points of interest (Required)
pois_dir = "../../data/external/prox_latam/pois_ags.gpkg"

## 01 - Create network

In [3]:
# Read area of interest (aoi)
aoi = gpd.read_file(aoi_dir)

# Create OSMnx network for area of interest
G, nodes, edges = aup.create_osmnx_network(aoi,how='from_bbox',network_type='all_private')

Extracted min and max coordinates from the municipality. Polygon N:22.10033, S:21.62227, E-102.06451, W-102.59887.
Created OSMnx graph from bounding box.
Converted OSMnx graph to 60233 nodes and 142263 edges GeoDataFrame.
Filtered columns.
Column: osmid in nodes gdf, has a list in it, the column data was converted to string.
Column: lanes in nodes gdf, has a list in it, the column data was converted to string.
Column: name in nodes gdf, has a list in it, the column data was converted to string.
Column: highway in nodes gdf, has a list in it, the column data was converted to string.
Column: maxspeed in nodes gdf, has a list in it, the column data was converted to string.
Column: ref in nodes gdf, has a list in it, the column data was converted to string.


## 02 - Assign nearest node(osmid) to each point of interest

In [4]:
# Load pois
pois = gpd.read_file(pois_dir)
pois = pois.set_crs("EPSG:4326")

# Filter pois
pois_aoi = gpd.sjoin(pois,aoi,how='inner')

pois_aoi.head(2)

Unnamed: 0,code,geometry,index_right,city
0,611111,POINT (-102.27464 21.90191),0,Aguascalientes
1,611111,POINT (-102.26601 21.85971),0,Aguascalientes


In [5]:
# Format to calculate nearest
nodes_gdf = nodes.set_crs("EPSG:4326")
edges_gdf = edges.set_crs("EPSG:4326")
nodes_gdf = nodes_gdf.set_index('osmid')
edges_gdf = edges_gdf.set_index(["u", "v", "key"])

# Calculate nearest
nearest = aup.find_nearest(G, nodes_gdf, pois, return_distance= True)
print("Calculated distances from pois to nearest node.")

Calculated distances from pois to nearest node.


In [6]:
nearest

Unnamed: 0,code,geometry,osmid,distance_node
0,611111,POINT (-102.27464 21.90191),961580633,16.377978
1,611111,POINT (-102.26601 21.85971),2253747737,71.553289
2,611111,POINT (-102.28277 21.92966),1314259696,64.123810
3,611111,POINT (-102.28387 21.86783),8559538139,7.363815
4,611111,POINT (-102.30135 21.86116),4638354829,36.750632
...,...,...,...,...
20787,9321,POINT (-102.26780 22.07600),2168313153,74.797796
20788,9321,POINT (-102.25874 21.97471),8417685286,38.813478
20789,9321,POINT (-102.25767 22.03427),2117400623,44.197349
20790,9321,POINT (-102.24936 21.95630),4120694367,25.284346


## 03 - Calcular la distancia de cada nodo al poi más cercano

Filtro (preescolares) para disminuir la cantidad de datos y hacer una comparación al final

In [7]:
nearest = nearest.loc[(nearest.code==611111)|(nearest.code==611112)]
nearest

Unnamed: 0,code,geometry,osmid,distance_node
0,611111,POINT (-102.27464 21.90191),961580633,16.377978
1,611111,POINT (-102.26601 21.85971),2253747737,71.553289
2,611111,POINT (-102.28277 21.92966),1314259696,64.123810
3,611111,POINT (-102.28387 21.86783),8559538139,7.363815
4,611111,POINT (-102.30135 21.86116),4638354829,36.750632
...,...,...,...,...
301,611112,POINT (-102.24547 21.87010),1349274663,21.598502
302,611112,POINT (-102.31882 21.90120),8573661848,53.097210
303,611112,POINT (-102.25430 21.85272),1618371055,64.540709
304,611112,POINT (-102.25421 21.85283),1618371055,49.378704


In [14]:
# Format
edges_gdf['length'].fillna(edges_gdf['length'].mean(),inplace=True)

# ELEMENTS NEEDED OUTSIDE THE LOOP
# df_temp: Each column will store a batch of procesed nodes.
df_temp = nodes_gdf.copy()

# ELEMENTS NEEDED OUTSIDE THE LOOP
# nodes_analysis is a nodes_gdf (index reseted) used in aup.calculate_distance_nearest_poi
nodes_analysis = nodes_gdf.reset_index().copy()

# ELEMENTS NEEDED OUTSIDE THE LOOP
#nodes_distance: Minimum time/distance found in all batches will be added from df_min (within if/elif/else) to nodes_distance 
# (which keeps x,y and geometry data)
nodes_distance = nodes_gdf.copy()

if len(nearest) % 250:
    batch_size = len(nearest)/200
    for k in range(int(batch_size)+1):
        #---------------------------------------------------------------------------------------------------------------------------------------------
        print(f"Starting range k = {k+1} of {int(batch_size)+1} for source {s}.")
        #---------------------------------------------------------------------------------------------------------------------------------------------
        source_process = nearest.iloc[int(200*k):int(200*(1+k))].copy()
        nodes_distance_prep = aup.calculate_distance_nearest_poi(source_process, nodes_analysis, edges_gdf, s, 'osmid', wght='length')

        #A middle gdf is created whose columns will be the name of the amenity and the batch number it belongs to
        df_int = pd.DataFrame()
        df_int['dist_'+str(k)+s] = nodes_distance_prep['dist_'+s]
        
        #The middle gdf is merged into the previously created temporary gdf to store the data by node, each batch in a column.
        df_temp = df_temp.merge(df_int, left_index=True, right_index=True)

    # Once finished, drop the non-distance values from the temporary gdf
    df_temp.drop(['x', 'y', 'street_count','geometry'], inplace = True, axis=1)

    #We apply the min function to find the minimum value. This value is sent to a new df_min
    df_min = pd.DataFrame()
    df_min['dist_'+s] = df_temp.min(axis=1)

    #We merge df_min which contains the shortest distance to the POI with nodes_distance which will store all final data
    nodes_distance = nodes_distance.merge(df_min, left_index=True, right_index=True)

    #Final data gets converted to time, assuming a walking speed of 4km/hr
    nodes_time = nodes_distance.copy()
    nodes_time['time'] = (nodes_time['dist_'+s]*60)/4000

else:
    batch_size = len(nearest)/250
    for k in range(int(batch_size)+1):
        #---------------------------------------------------------------------------------------------------------------------------------------------
        print(f"Starting range k = {k+1} of {int(batch_size)+1} for source {s}.")
        #---------------------------------------------------------------------------------------------------------------------------------------------
        source_process = nearest.iloc[int(250*k):int(250*(1+k))].copy()
        nodes_distance_prep = aup.calculate_distance_nearest_poi(source_process, nodes_analysis, edges_gdf, s, 'osmid', wght='length')

        #A middle gdf is created whose columns will be the name of the amenity and the batch number it belongs to
        df_int = pd.DataFrame()
        df_int['dist_'+str(k)+s] = nodes_distance_prep['dist_'+s]

        #The middle gdf is merged into the previously created temporary gdf to store the data by node, each batch in a column.
        df_temp = df_temp.merge(df_int, left_index=True, right_index=True)

    # Once finished, drop the non-distance values from the temporary gdf
    df_temp.drop(['x', 'y', 'street_count','geometry'], inplace = True, axis=1)

    #We apply the min function to find the minimum value. This value is sent to a new df_min
    df_min = pd.DataFrame()
    df_min['dist_'+s] = df_temp.min(axis=1)

    #We merge df_min which contains the shortest distance to the POI with nodes_distance which will store all final data
    nodes_distance = nodes_distance.merge(df_min, left_index=True, right_index=True)

    #Final data gets converted to time, assuming a walking speed of 4km/hr
    nodes_time = nodes_distance.copy()
    nodes_time['time'] = (nodes_time['dist_'+s]*60)/4000

#Format nodes_time
nodes_time['source'] = s
nodes_time.reset_index(inplace=True)
nodes_time = nodes_time.set_crs("EPSG:4326")
nodes_time = nodes_time[['osmid','time','source','x','y','geometry']]

Starting range k = 1 of 2 for source random_pois.
Starting range k = 2 of 2 for source random_pois.


In [15]:
# Show
print(nodes_time.shape)
nodes_time.head(2)

(60233, 6)


Unnamed: 0,osmid,time,source,x,y,geometry
0,301189389,6.400245,random_pois,-102.342212,21.848544,POINT (-102.34221 21.84854)
1,301189406,17.55636,random_pois,-102.350222,21.850815,POINT (-102.35022 21.85082)


## 99 - Result comparison

In [16]:
# Filter current data for comparison
nodes_time_f = nodes_time[['osmid','time']]

# Show
print(nodes_time_f.shape)
nodes_time_f.head(2)

(60233, 2)


Unnamed: 0,osmid,time
0,301189389,6.400245
1,301189406,17.55636


In [11]:
# Location of baseline data
nodes_proximity_dir = "../../data/external/prox_latam/03_nodes_proximity_2020_concat.gpkg"

# Load baseline data
nodes_proximity = gpd.read_file(nodes_proximity_dir)

# Filter baseline data
nodes_proximity_comp = nodes_proximity.loc[nodes_proximity.source=='denue_preescolar']
nodes_proximity_comp.rename(columns={'time':'baselinetime'},inplace=True)
nodes_proximity_comp = nodes_proximity_comp[['osmid','baselinetime']]

# Show
print(nodes_proximity_comp.shape)
nodes_proximity_comp.head(2)

(60233, 2)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nodes_proximity_comp.rename(columns={'time':'baselinetime'},inplace=True)


Unnamed: 0,osmid,baselinetime
0,301189389,6.400245
1,301189406,17.55636


In [17]:
# Merge current and baseline data
comparison = pd.merge(nodes_time_f, nodes_proximity_comp, on='osmid')

# Show difference in times
comparison['diff'] = comparison['time']-comparison['baselinetime']
print(f"Current difference between both methodologies is {comparison['diff'].sum()} minutes total.")
comparison

Current difference between both methodologies is 0.0 minutes total.


Unnamed: 0,osmid,time,baselinetime,diff
0,301189389,6.400245,6.400245,0.0
1,301189406,17.556360,17.556360,0.0
2,301191695,6.655365,6.655365,0.0
3,301191702,11.940270,11.940270,0.0
4,395436249,22.370580,22.370580,0.0
...,...,...,...,...
60228,11216001345,32.346510,32.346510,0.0
60229,11216001346,31.217775,31.217775,0.0
60230,11216001347,30.739860,30.739860,0.0
60231,11216001348,30.299505,30.299505,0.0


## Script mode

In [18]:
def find_nodes_proximity(aoi_dir,pois_dir,pois_name):

    #-------------------------------------------------- Create OSMnx network

    # Read area of interest (aoi)
    aoi = gpd.read_file(aoi_dir)

    # Create OSMnx network for area of interest
    G, nodes, edges = aup.create_osmnx_network(aoi,how='from_bbox',network_type='all_private')

    #-------------------------------------------------- Assign nearest node(osmid) to each point of interest

    # Load pois
    pois = gpd.read_file(pois_dir)
    pois = pois.set_crs("EPSG:4326")

    # Filter pois
    pois_aoi = gpd.sjoin(pois,aoi,how='inner')

    # Format to calculate nearest
    nodes_gdf = nodes.set_crs("EPSG:4326")
    edges_gdf = edges.set_crs("EPSG:4326")
    nodes_gdf = nodes_gdf.set_index('osmid')
    edges_gdf = edges_gdf.set_index(["u", "v", "key"])

    # Calculate nearest
    nearest = aup.find_nearest(G, nodes_gdf, pois, return_distance= True)
    print("Calculated distances from pois to nearest node.")

    #-------------------------------------------------- Calculate distance from each node to nearest poi

    # Format
    edges_gdf['length'].fillna(edges_gdf['length'].mean(),inplace=True)

    # ELEMENTS NEEDED OUTSIDE THE LOOP
    # df_temp: Each column will store a batch of procesed nodes.
    df_temp = nodes_gdf.copy()

    # ELEMENTS NEEDED OUTSIDE THE LOOP
    # nodes_analysis is a nodes_gdf (index reseted) used in aup.calculate_distance_nearest_poi
    nodes_analysis = nodes_gdf.reset_index().copy()

    # ELEMENTS NEEDED OUTSIDE THE LOOP
    #nodes_distance: Minimum time/distance found in all batches will be added from df_min (within if/elif/else) to nodes_distance 
    # (which keeps x,y and geometry data)
    nodes_distance = nodes_gdf.copy()

    s = pois_name

    if len(nearest) % 250:
        batch_size = len(nearest)/200
        for k in range(int(batch_size)+1):
            #---------------------------------------------------------------------------------------------------------------------------------------------
            print(f"Starting range k = {k+1} of {int(batch_size)+1} for source {s}.")
            #---------------------------------------------------------------------------------------------------------------------------------------------
            source_process = nearest.iloc[int(200*k):int(200*(1+k))].copy()
            nodes_distance_prep = aup.calculate_distance_nearest_poi(source_process, nodes_analysis, edges_gdf, s, 'osmid', wght='length')

            #A middle gdf is created whose columns will be the name of the amenity and the batch number it belongs to
            df_int = pd.DataFrame()
            df_int['dist_'+str(k)+s] = nodes_distance_prep['dist_'+s]

            #The middle gdf is merged into the previously created temporary gdf to store the data by node, each batch in a column.
            df_temp = df_temp.merge(df_int, left_index=True, right_index=True)

        # Once finished, drop the non-distance values from the temporary gdf
        df_temp.drop(['x', 'y', 'street_count','geometry'], inplace = True, axis=1)

        #We apply the min function to find the minimum value. This value is sent to a new df_min
        df_min = pd.DataFrame()
        df_min['dist_'+s] = df_temp.min(axis=1)

        #We merge df_min which contains the shortest distance to the POI with nodes_distance which will store all final data
        nodes_distance = nodes_distance.merge(df_min, left_index=True, right_index=True)

        #Final data gets converted to time, assuming a walking speed of 4km/hr
        nodes_time = nodes_distance.copy()
        nodes_time['time'] = (nodes_time['dist_'+s]*60)/4000

    else:
        batch_size = len(nearest)/250
        for k in range(int(batch_size)+1):
            #---------------------------------------------------------------------------------------------------------------------------------------------
            print(f"Starting range k = {k+1} of {int(batch_size)+1} for source {s}.")
            #---------------------------------------------------------------------------------------------------------------------------------------------
            source_process = nearest.iloc[int(250*k):int(250*(1+k))].copy()
            nodes_distance_prep = aup.calculate_distance_nearest_poi(source_process, nodes_analysis, edges_gdf, s, 'osmid', wght='length')

            #A middle gdf is created whose columns will be the name of the amenity and the batch number it belongs to
            df_int = pd.DataFrame()
            df_int['dist_'+str(k)+s] = nodes_distance_prep['dist_'+s]

            #The middle gdf is merged into the previously created temporary gdf to store the data by node, each batch in a column.
            df_temp = df_temp.merge(df_int, left_index=True, right_index=True)

        # Once finished, drop the non-distance values from the temporary gdf
        df_temp.drop(['x', 'y', 'street_count','geometry'], inplace = True, axis=1)

        #We apply the min function to find the minimum value. This value is sent to a new df_min
        df_min = pd.DataFrame()
        df_min['dist_'+s] = df_temp.min(axis=1)

        #We merge df_min which contains the shortest distance to the POI with nodes_distance which will store all final data
        nodes_distance = nodes_distance.merge(df_min, left_index=True, right_index=True)

        #Final data gets converted to time, assuming a walking speed of 4km/hr
        nodes_time = nodes_distance.copy()
        nodes_time['time'] = (nodes_time['dist_'+s]*60)/4000

    #Format nodes_time
    nodes_time['source'] = s
    nodes_time.reset_index(inplace=True)
    nodes_time = nodes_time.set_crs("EPSG:4326")
    nodes_time = nodes_time[['osmid','time','source','x','y','geometry']]

    return nodes_time

In [None]:
prueba = find_nodes_proximity(aoi_dir,pois_dir,s)
prueba

Extracted min and max coordinates from the municipality. Polygon N:22.10033, S:21.62227, E-102.06451, W-102.59887.
Created OSMnx graph from bounding box.
Converted OSMnx graph to 60233 nodes and 142263 edges GeoDataFrame.
Filtered columns.
Column: osmid in nodes gdf, has a list in it, the column data was converted to string.
Column: lanes in nodes gdf, has a list in it, the column data was converted to string.
Column: name in nodes gdf, has a list in it, the column data was converted to string.
Column: highway in nodes gdf, has a list in it, the column data was converted to string.
Column: maxspeed in nodes gdf, has a list in it, the column data was converted to string.
Column: ref in nodes gdf, has a list in it, the column data was converted to string.
Calculated distances from pois to nearest node.
Starting range k = 1 of 104 for source random_pois.
Starting range k = 2 of 104 for source random_pois.
Starting range k = 3 of 104 for source random_pois.
Starting range k = 4 of 104 for 