# Test nodes formatting and hexagon translation

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
import sys
module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup


  ox.config(


## Script variables

In [2]:
source_list = ['carniceria','hogar','local_mini_market']

# Pois proximity methodology - Count pois at a given time proximity?
count_pois = (True,15)

# walking_speed (float): Decimal number containing walking speed (in km/hr) to be used if prox_measure="length",
#						 or if prox_measure="time_min" but needing to fill time_min NaNs.
walking_speed = [4.5]
# WARNING: Make sure to change nodes_save_table to name {santiago_nodesproximity_n_n_kmh}, where n_n is walking_speed.
# e.g. 3.5km/hr --> 'santiago_nodesproximity_3_5_kmh'

# Area of interest (Run 'AM_Santiago', it represents Santiago's metropolitan area. We can clip data as soon as we know inputs extent.)
city = 'AM_Santiago'

# Save space in disk by deleting data that won't be used again?
save_space = True

##### WARNING ##### WARNING ##### WARNING #####
save = False # save output to database?
local_save = True # save output to local? (Make sure directory exists)
local_save_dir = f"../../../data/processed/santiago/"
##### WARNING ##### WARNING ##### WARNING #####

# ------------------------------ SCRIPT CONFIGURATION - DATABASE SCHEMAS AND TABLES ------------------------------

# Area of interest (aoi)
aoi_schema = 'projects_research'
aoi_table = 'santiago_aoi'
# OSMnx Network
network_schema = 'projects_research'
edges_table = 'santiago_edges'
nodes_table = 'santiago_nodes'
projected_crs = 'EPSG:32719'
# Save output to db
save_schema = 'projects_research'

## Download AOI and create hexagons

In [3]:
# Area of interest (aoi)
query = f"SELECT * FROM {aoi_schema}.{aoi_table} WHERE \"city\" LIKE \'{city}\'"
aoi = aup.gdf_from_query(query, geometry_col='geometry')
aoi = aoi.set_crs("EPSG:4326")

# Create hexgrid
hex_gdf = gpd.GeoDataFrame()

for r in range(8,10):
    hex_tmp = aup.create_hexgrid(aoi, r)
    hex_tmp.rename(columns={f'hex_id_{r}':'hex_id'}, inplace=True)
    hex_tmp['res'] = r

    # aup.log(f"--- Created {len(hex_tmp)} hexagons at resolution {r}.")

    hex_gdf = pd.concat([hex_gdf, hex_tmp], 
            ignore_index = True, axis = 0)
    
    del hex_tmp

## Download node data and format proximity node table

In [4]:
_, nodes, _ = aup.graph_from_hippo(aoi, network_schema, edges_table, nodes_table, projected_crs)

# add length data to edges
# edges['length'] = edges.to_crs(projected_crs).length

for walk_speed in walking_speed:
    str_walk_speed = str(walk_speed).replace('.','_')
    nodes_save_table = f'santiago_nodesproximity_{str_walk_speed}_kmh'
    source_speed_list = source_list.copy()

## Format node data

In [6]:
k = len(source_list)
i = 1

source_cols = []

# Create nodes analysis gdf
nodes_analysis = nodes.reset_index().copy()
del nodes
nodes_analysis = nodes_analysis[['osmid','geometry']]

for source in source_list:

    # aup.log(f"--- Starting nodes proximity to pois for source {i}/{k}: {source}. ")
    # Read pois from source
    query = f"SELECT * FROM {save_schema}.{nodes_save_table} WHERE \"source\" = \'{source}\'"
    nodes_source = aup.gdf_from_query(query, geometry_col='geometry')

    # aup.log(f"--- Loaded {len(nodes_source)} nodes from source {source}.")

    # Translate source to column name
    nodes_source.rename(columns={'source_time':f'{source}_time'}, inplace=True) 
    nodes_source.rename(columns={'source_15min':f'{source}_count_15min'}, inplace=True) 
    
    source_cols.append(f'{source}_time')
    source_cols.append(f'{source}_count_15min')

    # Filter nodes gdf
    nodes_source = nodes_source[['osmid', f'{source}_time', f'{source}_count_15min']]

    # Merge to nodes analysis
    nodes_analysis = nodes_analysis.merge(nodes_source, on='osmid', how='left')
    
    # aup.log(f"--- Appended {len(nodes_source)} nodes to nodes analysis.")
    del nodes_source

    i += 1

In [7]:
print(nodes_analysis.shape)
nodes_analysis.head(4)

(257999, 8)


Unnamed: 0,osmid,geometry,carniceria_time,carniceria_count_15min,hogar_time,hogar_count_15min,local_mini_market_time,local_mini_market_count_15min
0,386138,POINT (-70.64625 -33.44290),8.40633,6,1.633461,21,0.834904,150
1,386139,POINT (-70.64725 -33.44313),8.458257,6,0.525741,21,1.300392,158
2,386140,POINT (-70.64767 -33.44304),7.812775,6,1.171223,22,0.654911,154
3,386145,POINT (-70.64897 -33.44334),6.491894,7,2.368676,21,0.947829,165


In [8]:
source_cols

['carniceria_time',
 'carniceria_count_15min',
 'hogar_time',
 'hogar_count_15min',
 'local_mini_market_time',
 'local_mini_market_count_15min']

In [13]:
nodes_analysis['city'] = 'Santiago'

## Save node data

In [8]:
nodes_processed_table = f'santiago_nodesproximity_format_{str_walk_speed}_kmh'
nodes_analysis.to_file(local_save_dir + nodes_processed_table, driver='GPKG')

## Nodes to hexagon

In [9]:
hex_tmp = hex_gdf.loc[hex_gdf.res==8].copy()

In [14]:
hex_tmp = aup.group_by_hex_mean(nodes_analysis, hex_tmp, r, source_cols, 'hex_id')

TypeError: agg function failed [how->mean,dtype->object]

In [11]:
hex_tmp

Unnamed: 0,hex_id,geometry,res_x,carniceria_time,carniceria_count_15min,hogar_time,hogar_count_15min,local_mini_market_time,local_mini_market_count_15min,res_y
388,88b2c0b001fffff,"POLYGON ((-71.00896 -33.70843, -71.01339 -33.7...",8,64.508497,0.0,110.872759,0.000000,110.764741,0.0,8.0
3013,88b2c0b003fffff,"POLYGON ((-71.00326 -33.71401, -71.00770 -33.7...",8,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0
2051,88b2c0b005fffff,"POLYGON ((-71.01846 -33.70998, -71.02289 -33.7...",8,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0
3330,88b2c0b009fffff,"POLYGON ((-71.00515 -33.70129, -71.00958 -33.7...",8,49.371907,0.0,101.827495,0.000000,97.491151,0.0,8.0
2755,88b2c0b00bfffff,"POLYGON ((-70.99946 -33.70687, -71.00389 -33.7...",8,68.842102,0.0,115.206365,0.000000,115.098346,0.0,8.0
...,...,...,...,...,...,...,...,...,...,...
1200,88b2c5cf65fffff,"POLYGON ((-70.73375 -33.23822, -70.73816 -33.2...",8,75.728256,0.0,14.811368,0.692308,54.760208,0.0,8.0
861,88b2c5cf67fffff,"POLYGON ((-70.72807 -33.24383, -70.73249 -33.2...",8,74.720942,0.0,28.187659,0.000000,42.537184,0.0,8.0
2181,88b2c5cf69fffff,"POLYGON ((-70.72050 -33.22946, -70.72492 -33.2...",8,82.972627,0.0,48.316325,0.000000,53.943871,0.0,8.0
3978,88b2c5cf6bfffff,"POLYGON ((-70.71482 -33.23507, -70.71924 -33.2...",8,78.359106,0.0,53.935782,0.000000,46.299233,0.0,8.0


### Function analysis

In [30]:

hex_column_id = 'hex_id'
nodes_tmp = nodes_analysis.copy()
hex_bins = hex_tmp.copy()
osmid = True

nodes_in_hex = gpd.sjoin(nodes_tmp, hex_bins)
# Group data by hex_id
nodes_in_hex = nodes_in_hex.drop(columns=['geometry']) #Added this because it tried to calculate mean of geom
nodes_hex = nodes_in_hex.groupby([hex_column_id]).mean()
# Merge back to geometry
hex_new = pd.merge(hex_bins,nodes_hex,right_index=True,left_on=hex_column_id,how = 'outer')
if osmid:
    hex_new = hex_new.drop(['index_right','osmid'],axis=1)
else:
    hex_new = hex_new.drop(['index_right'],axis=1)
if type(dist_col) == list:
    for dc in dist_col:
        hex_new[dc].apply(lambda x: x+1 if x==0 else x )
else:
    hex_new[dist_col].apply(lambda x: x+1 if x==0 else x )

hex_new.fillna(0, inplace=True)

In [32]:
hex_processed_table = f'santiago_hexproximity_format_{str_walk_speed}_kmh'
hex_new.to_file(local_save_dir + hex_processed_table, driver='GPKG')

In [33]:
source = gpd.read_file('../../../data/processed/00_pois_formated/Sociability/local_mini_market.gpkg')