In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

  weights = 1.0 (dist + 1e-12)**power


## Required data

In [2]:
city = 'Aguascalientes'

In [3]:
# Location of area of interest (Required)
aoi_dir = "../../data/external/prox_latam/aoi_ags.gpkg"
# Location of points of interest (Required)
pois_dir = "../../data/external/prox_latam/pois_ags.gpkg"

## Code

In [7]:
# Read area of interest (aoi)
aoi = gpd.read_file(aoi_dir)

# Create OSMnx network for area of interest
G, nodes, edges = aup.create_osmnx_network(aoi,how='from_bbox',network_type='all_private')

Extracted min and max coordinates from the municipality. Polygon N:22.10033, S:21.62227, E-102.06451, W-102.59887.
Created OSMnx graph from bounding box.
Converted OSMnx graph to 60233 nodes and 142263 edges GeoDataFrame.
Filtered columns.
Column: osmid in nodes gdf, has a list in it, the column data was converted to string.
Column: lanes in nodes gdf, has a list in it, the column data was converted to string.
Column: name in nodes gdf, has a list in it, the column data was converted to string.
Column: highway in nodes gdf, has a list in it, the column data was converted to string.
Column: maxspeed in nodes gdf, has a list in it, the column data was converted to string.
Column: ref in nodes gdf, has a list in it, the column data was converted to string.


In [8]:
# Load pois
pois = gpd.read_file(pois_dir)
pois = pois.set_crs("EPSG:4326")

# Filter pois
pois_aoi = gpd.sjoin(pois,aoi,how='inner')

pois_aoi.head(2)

Unnamed: 0,code,geometry,index_right,city
0,611111,POINT (-102.27464 21.90191),0,Aguascalientes
1,611111,POINT (-102.26601 21.85971),0,Aguascalientes


In [9]:
# Format to calculate nearest
nodes_gdf = nodes.set_crs("EPSG:4326")
edges_gdf = edges.set_crs("EPSG:4326")
nodes_gdf = nodes_gdf.set_index('osmid')
edges_gdf = edges_gdf.set_index(["u", "v", "key"])

# Calculate nearest
nearest = aup.find_nearest(G, nodes_gdf, pois, return_distance= True)
print("Calculated distances from pois to nearest node.")

Calculated distances from pois to nearest node.


In [10]:
nearest

Unnamed: 0,code,geometry,osmid,distance_node
0,611111,POINT (-102.27464 21.90191),961580633,16.377978
1,611111,POINT (-102.26601 21.85971),2253747737,71.553289
2,611111,POINT (-102.28277 21.92966),1314259696,64.123810
3,611111,POINT (-102.28387 21.86783),8559538139,7.363815
4,611111,POINT (-102.30135 21.86116),4638354829,36.750632
...,...,...,...,...
20787,9321,POINT (-102.26780 22.07600),2168313153,74.797796
20788,9321,POINT (-102.25874 21.97471),8417685286,38.813478
20789,9321,POINT (-102.25767 22.03427),2117400623,44.197349
20790,9321,POINT (-102.24936 21.95630),4120694367,25.284346


In [16]:
# Points of interest name
s = 'random_pois'

In [19]:
nearest = nearest.loc[nearest.code==611111]
nearest

Unnamed: 0,code,geometry,osmid,distance_node
0,611111,POINT (-102.27464 21.90191),961580633,16.377978
1,611111,POINT (-102.26601 21.85971),2253747737,71.553289
2,611111,POINT (-102.28277 21.92966),1314259696,64.123810
3,611111,POINT (-102.28387 21.86783),8559538139,7.363815
4,611111,POINT (-102.30135 21.86116),4638354829,36.750632
...,...,...,...,...
108,611111,POINT (-102.32168 21.93889),8427973568,25.878509
109,611111,POINT (-102.31384 21.87490),899212737,38.539637
110,611111,POINT (-102.31346 21.94863),2138447267,20.797934
111,611111,POINT (-102.31779 21.89827),817349965,7.366319


In [37]:
# Format
edges_gdf['length'].fillna(edges_gdf['length'].mean(),inplace=True)

# ELEMENTS NEEDED OUTSIDE THE LOOP
# df_temp: Each column will store a batch of procesed nodes.
df_temp = nodes_gdf.copy()

# ELEMENTS NEEDED OUTSIDE THE LOOP
# nodes_analysis is a nodes_gdf (index reseted) used in aup.calculate_distance_nearest_poi
nodes_analysis = nodes_gdf.reset_index().copy()

# ELEMENTS NEEDED OUTSIDE THE LOOP
#nodes_distance: Minimum time/distance found in all batches will be added from df_min (within if/elif/else) to nodes_distance 
# (which keeps x,y and geometry data)
nodes_distance = nodes_gdf.copy()

if len(nearest) % 250:
    batch_size = len(nearest)/200
    for k in range(int(batch_size)+1):
        #---------------------------------------------------------------------------------------------------------------------------------------------
        print(f"Starting range k = {k} of {int(batch_size)} for source {s}.")
        #---------------------------------------------------------------------------------------------------------------------------------------------
        source_process = nearest.iloc[int(200*k):int(200*(1+k))].copy()
        nodes_distance_prep = aup.calculate_distance_nearest_poi(source_process, nodes_analysis, edges_gdf, s, 'osmid', wght='length')

        #A middle gdf is created whose columns will be the name of the amenity and the batch number it belongs to
        df_int = pd.DataFrame()
        df_int['dist_'+str(k)+s] = nodes_distance_prep['dist_'+s]

        #The middle gdf is merged into the previously created temporary gdf to store the data by node, each batch in a column.
        df_temp = df_temp.merge(df_int, left_index=True, right_index=True)

    # Once finished, drop the non-distance values from the temporary gdf
    df_temp.drop(['x', 'y', 'street_count','geometry'], inplace = True, axis=1)

    #We apply the min function to find the minimum value. This value is sent to a new df_min
    df_min = pd.DataFrame()
    df_min['dist_'+s] = df_temp.min(axis=1)

    #We merge df_min which contains the shortest distance to the POI with nodes_distance which will store all final data
    nodes_distance = nodes_distance.merge(df_min, left_index=True, right_index=True)

    #Final data gets converted to time, assuming a walking speed of 4km/hr
    nodes_time = nodes_distance.copy()
    nodes_time['time'] = (nodes_time['dist_'+s]*60)/4000

else:
    batch_size = len(nearest)/250
    for k in range(int(batch_size)+1):
        #---------------------------------------------------------------------------------------------------------------------------------------------
        print(f"Starting range k = {k} of {int(batch_size)} for source {s}.")
        #---------------------------------------------------------------------------------------------------------------------------------------------
        source_process = nearest.iloc[int(250*k):int(250*(1+k))].copy()
        nodes_distance_prep = aup.calculate_distance_nearest_poi(source_process, nodes_analysis, edges_gdf, s, 'osmid', wght='length')

        #A middle gdf is created whose columns will be the name of the amenity and the batch number it belongs to
        df_int = pd.DataFrame()
        df_int['dist_'+str(k)+s] = nodes_distance_prep['dist_'+s]

        #The middle gdf is merged into the previously created temporary gdf to store the data by node, each batch in a column.
        df_temp = df_temp.merge(df_int, left_index=True, right_index=True)

    # Once finished, drop the non-distance values from the temporary gdf
    df_temp.drop(['x', 'y', 'street_count','geometry'], inplace = True, axis=1)

    #We apply the min function to find the minimum value. This value is sent to a new df_min
    df_min = pd.DataFrame()
    df_min['dist_'+s] = df_temp.min(axis=1)

    #We merge df_min which contains the shortest distance to the POI with nodes_distance which will store all final data
    nodes_distance = nodes_distance.merge(df_min, left_index=True, right_index=True)

    #Final data gets converted to time, assuming a walking speed of 4km/hr
    nodes_time = nodes_distance.copy()
    nodes_time['time'] = (nodes_time['dist_'+s]*60)/4000

#Format nodes_time
nodes_time['source'] = s
nodes_time['city'] = city
nodes_time.reset_index(inplace=True)
nodes_time = nodes_time.set_crs("EPSG:4326")
nodes_time = nodes_time[['osmid','time','source','city','x','y','geometry']]

Starting range k = 0 of 0 for source random_pois.


In [38]:
nodes_time

Unnamed: 0,osmid,time,source,city,x,y,geometry
0,301189389,14.914155,random_pois,Aguascalientes,-102.342212,21.848544,POINT (-102.34221 21.84854)
1,301189406,26.070270,random_pois,Aguascalientes,-102.350222,21.850815,POINT (-102.35022 21.85082)
2,301191695,15.169275,random_pois,Aguascalientes,-102.342228,21.848392,POINT (-102.34223 21.84839)
3,301191702,11.940270,random_pois,Aguascalientes,-102.326375,21.843567,POINT (-102.32638 21.84357)
4,395436249,22.370580,random_pois,Aguascalientes,-102.331855,21.843707,POINT (-102.33186 21.84371)
...,...,...,...,...,...,...,...
60228,11216001345,91.468920,random_pois,Aguascalientes,-102.275587,21.817389,POINT (-102.27559 21.81739)
60229,11216001346,90.343740,random_pois,Aguascalientes,-102.275558,21.816713,POINT (-102.27556 21.81671)
60230,11216001347,89.865825,random_pois,Aguascalientes,-102.275250,21.816721,POINT (-102.27525 21.81672)
60231,11216001348,89.426760,random_pois,Aguascalientes,-102.275241,21.816457,POINT (-102.27524 21.81646)
