In [3]:
import geopandas as gpd
from osmnx.distance import nearest_nodes
import osmnx as ox
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx

WALK_RADIUS = 1609.34
SECTORS = ['comercio', 'servicios', 'salud', 'educacion']

In [4]:
def calculate_batch_walking_times(G, sources, targets):
  walking_speed_m_per_s = 1.4  # Average walking speed in meters per second
  # Calculate shortest path lengths in meters for all source-target pairs
  lengths = dict(nx.all_pairs_dijkstra_path_length(G, cutoff=900, weight='length'))  # 900 seconds = 15 minutes

  # Filter lengths for our sources and targets, convert to time in minutes
  walking_times = {}
  for source in sources:
    for target in targets:
      if source in lengths and target in lengths[source]:
        # Convert length to walking time (in minutes)
        walking_times[(source, target)] = lengths[source][target] / walking_speed_m_per_s / 60

  return walking_times

In [32]:
gdf_lots = gpd.read_file("../data/processed/predios.geojson", crs="EPSG:4326")
gdf_denue = gpd.read_file("../data/processed/denue.geojson", crs="EPSG:4326")
gdf_denue = gdf_denue.loc[gdf_denue['sector'].isin(SECTORS)]

In [33]:
# Generate the pedestrian network graph around the combined area of interest
unified_area = gdf_lots.unary_union.convex_hull | gdf_denue.unary_union.convex_hull
G = ox.graph_from_polygon(unified_area, network_type='walk')

In [34]:
# Find nearest network nodes to lot centroids and amenities
gdf_lots['nearest_node'] = ox.distance.nearest_nodes(G, gdf_lots.geometry.centroid.x, gdf_lots.geometry.centroid.y)
gdf_denue['nearest_node'] = ox.distance.nearest_nodes(G, gdf_denue.geometry.x, gdf_denue.geometry.y)


  gdf_lots['nearest_node'] = ox.distance.nearest_nodes(G, gdf_lots.geometry.centroid.x, gdf_lots.geometry.centroid.y)


In [35]:
# Get unique source and target nodes for batch processing
source_nodes = gdf_lots['nearest_node'].unique()
target_nodes = gdf_denue['nearest_node'].unique()

In [36]:
len(source_nodes), len(target_nodes), len(gdf_lots), len(gdf_denue)

(747, 2589, 5527, 6841)

In [37]:
import pandas as pd

# Calculate batch walking times between sources and targets
walking_times = calculate_batch_walking_times(G, source_nodes, target_nodes)

# Convert walking_times to DataFrame for easier processing
times_df = pd.DataFrame([(s, t, time) for (s, t), time in walking_times.items()],
                        columns=['source', 'target', 'time'])

In [45]:
decay_rate = 0.05

# Merge walking times back to amenities based on 'nearest_node'
gdf_denue_times = pd.merge(gdf_denue, times_df, left_on='nearest_node', right_on='target')
# Filter amenities within walking radius (15 minutes)
gdf_denue_within_radius = gdf_denue_times[gdf_denue_times['time'] <= WALK_RADIUS / 60]
# Calculate proximity scores
gdf_denue_within_radius['proximity_score'] = np.exp(-decay_rate * gdf_denue_within_radius['time'])
gdf_denue_within_radius

Unnamed: 0,sector,num_workers,nom_estab,fecha_alta,codigo_act,geometry,nearest_node,source,target,time,proximity_score
0,comercio,2.5,ABARROTES CHAN,2019-11,431110,POINT (-100.27697 25.65747),4968913494,7239649503,4968913494,7.034476,0.703474
1,comercio,2.5,ABARROTES CHAN,2019-11,431110,POINT (-100.27697 25.65747),4968913494,7979601801,4968913494,9.058714,0.635759
2,comercio,2.5,ABARROTES CHAN,2019-11,431110,POINT (-100.27697 25.65747),4968913494,4968913716,4968913494,8.844595,0.642602
3,comercio,2.5,ABARROTES CHAN,2019-11,431110,POINT (-100.27697 25.65747),4968913494,2485209317,4968913494,8.917143,0.640275
4,comercio,2.5,ABARROTES CHAN,2019-11,431110,POINT (-100.27697 25.65747),4968913494,4968896882,4968913494,7.464405,0.688514
...,...,...,...,...,...,...,...,...,...,...,...
264487,servicios,2.5,VULCANIZADORA SANCHEZ,2010-07,811191,POINT (-100.29839 25.65945),6990870395,7267438917,6990870395,4.089774,0.815064
264488,servicios,2.5,VULCANIZADORA SANCHEZ,2010-07,811191,POINT (-100.29839 25.65945),6990870395,1433046436,6990870395,8.768655,0.645047
264489,servicios,2.5,VULCANIZADORA SANCHEZ,2010-07,811191,POINT (-100.29839 25.65945),6990870395,2378528850,6990870395,10.240214,0.599289
264490,servicios,2.5,VULCANIZADORA SANCHEZ,2010-07,811191,POINT (-100.29839 25.65945),6990870395,7511829014,6990870395,10.262429,0.598624


In [46]:
sector_weight_mapping = {
    'comercio': 0.5,
    'servicios': 0.5,
    'salud': 0.9,
    'educacion': 1
}
# Apply sector weights
gdf_denue_within_radius['sector_weight'] = gdf_denue_within_radius['sector'].map(sector_weight_mapping)
gdf_denue_within_radius['weighted_score'] = gdf_denue_within_radius['proximity_score'] * \
    gdf_denue_within_radius['sector_weight']
# Merge back to get lot identifiers for each amenity
gdf_denue_within_radius = gdf_denue_within_radius.drop(columns=['geometry']).merge(
    gdf_lots[['nearest_node', 'geometry']], left_on='source', right_on='nearest_node', how='left')
# Use spatial join to associate amenities back to lots based on location
# This step assumes each amenity now has a 'lot_id' to identify which lot it's associated with
gdf_denue_within_radius = gpd.GeoDataFrame(gdf_denue_within_radius, geometry=gdf_denue_within_radius['geometry'])
gdf_denue_within_radius.head()

Unnamed: 0,sector,num_workers,nom_estab,fecha_alta,codigo_act,nearest_node_x,source,target,time,proximity_score,sector_weight,weighted_score,nearest_node_y,geometry
0,comercio,2.5,ABARROTES CHAN,2019-11,431110,4968913494,7239649503,4968913494,7.034476,0.703474,0.5,0.351737,7239649503,"MULTIPOLYGON (((-100.27983 25.65692, -100.2801..."
1,comercio,2.5,ABARROTES CHAN,2019-11,431110,4968913494,7979601801,4968913494,9.058714,0.635759,0.5,0.31788,7979601801,"POLYGON ((-100.28154 25.65853, -100.28217 25.6..."
2,comercio,2.5,ABARROTES CHAN,2019-11,431110,4968913494,7979601801,4968913494,9.058714,0.635759,0.5,0.31788,7979601801,"POLYGON ((-100.28187 25.65881, -100.28198 25.6..."
3,comercio,2.5,ABARROTES CHAN,2019-11,431110,4968913494,4968913716,4968913494,8.844595,0.642602,0.5,0.321301,4968913716,"POLYGON ((-100.28062 25.65732, -100.28068 25.6..."
4,comercio,2.5,ABARROTES CHAN,2019-11,431110,4968913494,2485209317,4968913494,8.917143,0.640275,0.5,0.320138,2485209317,"POLYGON ((-100.28221 25.65961, -100.28221 25.6..."


In [47]:
aggregated_scores = gdf_denue_within_radius.groupby(['source']).agg({
    'proximity_score': 'sum',
    'weighted_score': 'sum',
    'sector': lambda x: dict(x.value_counts())
}).reset_index().set_index('source')

for sector in SECTORS:
aggregated_scores[f"adj_{sector}"] = aggregated_scores['sector'].apply(
    lambda x: x.get(sector, 0) if isinstance(x, dict) else 0)
aggregated_scores['services_nearby'] = aggregated_scores[[f'adj_{x}' for x in SECTORS]].sum(axis=1)

IndentationError: expected an indented block after 'for' statement on line 7 (402764204.py, line 8)

In [5]:
# Join with gdf_lots to add new columns
gdf_lots = gdf_lots.drop(
    columns=['sector']).merge(
    aggregated_scores,
    left_on='nearest_node',
    right_index=True,
    how='left')
gdf_lots['total_score'] = gdf_lots['weighted_score']
print(gdf_lots)
print(gdf_lots.columns)