In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

## About amount (count) and how to divide ep_plaza_small pois

In [16]:
ep_plaza_small_dir = '../../../data/external/temporal_fromjupyter/santiago/pois/ep_plaza_small.gpkg'
ep_plaza_small = gpd.read_file(ep_plaza_small_dir)

In [28]:
ep_plaza_small = ep_plaza_small[['area_ha','ID','geometry']].copy()
ep_plaza_small.head(1)

Unnamed: 0,area_ha,ID,geometry
0,0.106,1,POINT (-70.87655 -33.29737)


In [29]:
print(len(ep_plaza_small))
print(len(ep_plaza_small.ID.unique()))

166287
9442


In [30]:
very_small_source_pois = ep_plaza_small.loc[ep_plaza_small['area_ha']<0.2].copy()
print(len(very_small_source_pois))
print(len(very_small_source_pois.ID.unique()))

112410
7533


In [31]:
small_source_pois = ep_plaza_small.loc[ep_plaza_small['area_ha']>=0.2].copy()
print(len(small_source_pois))
print(len(small_source_pois.ID.unique()))

53877
1909


In [32]:
print((len(very_small_source_pois))+(len(small_source_pois)))
print((len(very_small_source_pois.ID.unique()))+(len(small_source_pois.ID.unique())))

166287
9442


In [33]:
very_small_source_pois_uniqueid = very_small_source_pois.drop_duplicates(subset='ID')
print(len(very_small_source_pois_uniqueid))
print(len(very_small_source_pois_uniqueid.ID.unique()))

7533
7533


In [34]:
very_small_source_pois_uniqueid.head(5)

Unnamed: 0,area_ha,ID,geometry
0,0.106,1,POINT (-70.87655 -33.29737)
13,0.096,2,POINT (-70.88429 -33.29738)
25,0.114,3,POINT (-70.87039 -33.29725)
42,0.052,4,POINT (-70.86971 -33.29773)
53,0.053,5,POINT (-70.88295 -33.29735)


In [36]:
very_small_source_pois_2 = ep_plaza_small.loc[ep_plaza_small['area_ha']<0.2].copy().drop_duplicates(subset='ID')

print(len(very_small_source_pois_2))
print(len(very_small_source_pois_2.ID.unique()))

7533
7533


## About how to merge resulting source_pois

In [8]:
proximity_ciclovias_dir = '../../../data/processed/santiago/santiago_nodesproximity.gpkg'
proximity_ciclovias = gpd.read_file(proximity_ciclovias_dir)

# Format as source_nodes_time
source='ciclovias'
count_pois=(True,15)

source_nodes_time = proximity_ciclovias.copy()
source_nodes_time.drop(columns=['source'],inplace=True)
source_nodes_time.rename(columns={'source_time':source,
                                   'source_15min':f'{source}_{count_pois[1]}min'},inplace=True)

# Show
print(source_nodes_time.shape)
source_nodes_time.head(1)

(257999, 7)


Unnamed: 0,osmid,ciclovias,ciclovias_15min,x,y,city,geometry
0,386138,6.325455,16,-70.646254,-33.442901,AM_Santiago,POINT (-70.64625 -33.44290)


In [14]:
source_nodes_time_2 = source_nodes_time.copy()

# Show
print(source_nodes_time_2.shape)
source_nodes_time_2.head(1)

(257999, 7)


Unnamed: 0,osmid,ciclovias,ciclovias_15min,x,y,city,geometry
0,386138,6.325455,16,-70.646254,-33.442901,AM_Santiago,POINT (-70.64625 -33.44290)


In [19]:
source_nodes_time_all = source_nodes_time.merge(source_nodes_time_2[['osmid',source, f'{source}_{count_pois[1]}min']],on='osmid')

time_cols = [f'{source}_x', f'{source}_y']
source_nodes_time_all[source] = source_nodes_time_all[time_cols].min(axis=1)
source_nodes_time_all.drop(columns=time_cols,inplace=True)

count_cols = [f'{source}_{count_pois[1]}min_x',f'{source}_{count_pois[1]}min_y']
source_nodes_time_all[f'{source}_{count_pois[1]}min'] = source_nodes_time_all[count_cols].sum(axis=1)
source_nodes_time_all.drop(columns=count_cols,inplace=True)

# Show
print(source_nodes_time_all.shape)
source_nodes_time_all.head(5)

(257999, 7)


Unnamed: 0,osmid,x,y,city,geometry,ciclovias,ciclovias_15min
0,386138,-70.646254,-33.442901,AM_Santiago,POINT (-70.64625 -33.44290),6.325455,32
1,386139,-70.647249,-33.443129,AM_Santiago,POINT (-70.64725 -33.44313),4.889325,28
2,386140,-70.64767,-33.443035,AM_Santiago,POINT (-70.64767 -33.44304),4.59237,32
3,386145,-70.64897,-33.443343,AM_Santiago,POINT (-70.64897 -33.44334),2.75397,34
4,386147,-70.650375,-33.443669,AM_Santiago,POINT (-70.65038 -33.44367),0.81261,22


## About error in Script (Saved to local partial results, finding error)

In [8]:
#Traceback (most recent call last):
#  File "/home/jovyan/accesibilidad-urbana/scripts/23b-proximity-analysis-progressive_smallep_plazas.py", line 237, in <module>
#    main(source_list, aoi, nodes, edges, G, walking_speed, local_save, save)
#  File "/home/jovyan/accesibilidad-urbana/scripts/23b-proximity-analysis-progressive_smallep_plazas.py", line 108, in main
#--> nodes_analysis = nodes_analysis[column_order] <--
#  File "/opt/conda/envs/gds/lib/python3.9/site-packages/geopandas/geodataframe.py", line 1456, in __getitem__
#    result = super().__getitem__(key)
#  File "/opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/frame.py", line 3766, in __getitem__
#    indexer = self.columns._get_indexer_strict(key, "columns")[1]
#  File "/opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 5876, in _get_indexer_strict
#    self._raise_if_missing(keyarr, indexer, axis_name)
#  File "/opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 5938, in _raise_if_missing
#    raise KeyError(f"{not_found} not in index")
#KeyError: "['ep_plaza_small'] not in index"

In [5]:
source_nodes_time_1_dir = "../../../data/processed/santiago/source_nodes_time_1.gpkg"
source_nodes_time_1 = gpd.read_file(source_nodes_time_1_dir)

# Show
print(source_nodes_time_1.shape)
source_nodes_time_1.head(1)

(257999, 6)


Unnamed: 0,osmid,time_ep_plaza_small,ep_plaza_small_15min,x,y,geometry
0,386138,5.122773,15.0,-70.646254,-33.442901,POINT (-70.64625 -33.44290)


In [15]:
# Specific test
source_nodes_time_1.loc[source_nodes_time_1.osmid==4362364454]

Unnamed: 0,osmid,time_ep_plaza_small,ep_plaza_small_15min,x,y,geometry
127569,4362364454,0.291627,1.0,-70.871831,-33.401831,POINT (-70.87183 -33.40183)


In [17]:
source_nodes_time_2_dir = "../../../data/processed/santiago/source_nodes_time_2.gpkg"
source_nodes_time_2 = gpd.read_file(source_nodes_time_2_dir)

# Show
print(source_nodes_time_2.shape)
source_nodes_time_2.head(1)

(257999, 6)


Unnamed: 0,osmid,time_ep_plaza_small,ep_plaza_small_15min,x,y,geometry
0,386138,9.526093,9,-70.646254,-33.442901,POINT (-70.64625 -33.44290)


In [18]:
# Specific test
source_nodes_time_2.loc[source_nodes_time_2.osmid==4362364454]

Unnamed: 0,osmid,time_ep_plaza_small,ep_plaza_small_15min,x,y,geometry
127569,4362364454,86.03664,0,-70.871831,-33.401831,POINT (-70.87183 -33.40183)


In [23]:
source = 'ep_plaza_small'
count_pois = (True,15)

# Merge source_nodes_time_1 results with source_nodes_time_2 results
source_nodes_time_all = source_nodes_time_1.merge(source_nodes_time_2[['osmid', 'time_'+source, f'{source}_{count_pois[1]}min']],on='osmid')

# Find min time between both source_nodes_time
time_cols = [f'time_{source}_x', f'time_{source}_y']
source_nodes_time_all[f'time_{source}'] = source_nodes_time_all[time_cols].min(axis=1)
source_nodes_time_all.drop(columns=time_cols,inplace=True)

# Find sum of counted pois at {count_pois[1]} distance (minutes) for both source_nodes_time
count_cols = [f'{source}_{count_pois[1]}min_x',f'{source}_{count_pois[1]}min_y']
source_nodes_time_all[f'{source}_{count_pois[1]}min'] = source_nodes_time_all[count_cols].sum(axis=1)
source_nodes_time_all.drop(columns=count_cols,inplace=True)

# Show
print(source_nodes_time_all.shape)
source_nodes_time_all.head(1)

(257999, 6)


Unnamed: 0,osmid,x,y,geometry,time_ep_plaza_small,ep_plaza_small_15min
0,386138,-70.646254,-33.442901,POINT (-70.64625 -33.44290),5.122773,24.0


In [24]:
# Specific test
source_nodes_time_all.loc[source_nodes_time_all.osmid==4362364454]

Unnamed: 0,osmid,x,y,geometry,time_ep_plaza_small,ep_plaza_small_15min
127569,4362364454,-70.871831,-33.401831,POINT (-70.87183 -33.40183),0.291627,1.0


#### __The error was here__

In [25]:
# 1.1d) Nodes_analysis format
source_nodes_time_all.rename(columns={'time_'+source:source},inplace=True)
nodes_analysis = source_nodes_time_all.copy()

# ERROR FOUND: On Script I wrote:
# nodes_analysis = source_nodes_time_2.copy()

# Show
print(nodes_analysis.shape)
nodes_analysis.head(1)

(257999, 6)


Unnamed: 0,osmid,x,y,geometry,ep_plaza_small,ep_plaza_small_15min
0,386138,-70.646254,-33.442901,POINT (-70.64625 -33.44290),5.122773,24.0


In [26]:
# if count_pois, include generated col
if count_pois[0]:
    column_order = ['osmid'] + [source, f'{source}_{count_pois[1]}min'] + ['x','y','geometry']
else:
    column_order = ['osmid'] + [source] + ['x','y','geometry']
nodes_analysis = nodes_analysis[column_order]

# Show
print(nodes_analysis.shape)
nodes_analysis.head(1)

(257999, 6)


Unnamed: 0,osmid,ep_plaza_small,ep_plaza_small_15min,x,y,geometry
0,386138,5.122773,24.0,-70.646254,-33.442901,POINT (-70.64625 -33.44290)


In [27]:
city = 'AM_Santiago'

# 1.1e) Tidy data format (Allows loop-upload)
print(f"--- Reordering datased as tidy data format.")
# Add source column to be able to extract source proximity data. Fill with current source.
nodes_analysis['source'] = source
# Rename source-specific column names as name that apply to all sources (source_time, source_15min)
nodes_analysis.rename(columns={source:'source_time'},inplace=True)
if count_pois[0]:
    nodes_analysis.rename(columns={f'{source}_{count_pois[1]}min':f'source_{count_pois[1]}min'},inplace=True)
# Set column order
if count_pois[0]:
    nodes_analysis = nodes_analysis[['osmid','source','source_time',f'source_{count_pois[1]}min','x','y','geometry']]
else:
    nodes_analysis = nodes_analysis[['osmid','source','source_time','x','y','geometry']]
# Add city data
nodes_analysis['city'] = city

# Show
print(nodes_analysis.shape)
nodes_analysis.head(1)

--- Reordering datased as tidy data format.
(257999, 8)


Unnamed: 0,osmid,source,source_time,source_15min,x,y,geometry,city
0,386138,ep_plaza_small,5.122773,24.0,-70.646254,-33.442901,POINT (-70.64625 -33.44290),AM_Santiago


In [34]:
save = False
save_schema = 'projects_research'
walking_speed = 4.5
str_walk_speed = str(walking_speed).replace('.','_')
nodes_save_table = f'santiago_nodesproximity_{str_walk_speed}_kmh'
print(nodes_save_table)

local_save = False
nodes_local_save_dir = f"../../../data/processed/santiago/santiago_nodesproximity_ep_plaza_small.gpkg"

# 1.1f) Save output
print(f"--- Saving nodes proximity to {source}.")
if save:
    nodes_analysis['source_15min'] = nodes_analysis['source_15min'].astype(int)
    aup.gdf_to_db_slow(nodes_analysis, nodes_save_table, save_schema, if_exists='append')
    print(f"--- Saved nodes proximity to {source} in database.")

if local_save:
    nodes_analysis.to_file(nodes_local_save_dir, driver='GPKG')
    print(f"--- Saved nodes proximity to {source} locally.")

santiago_nodesproximity_4_5_kmh
--- Saving nodes proximity to ep_plaza_small.
