-
Notifications
You must be signed in to change notification settings - Fork 26
/
geom.py
434 lines (348 loc) · 14.7 KB
/
geom.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
import pandas as pd
import geopandas as gpd
import osmnx as ox
from h3 import h3
from rich.progress import track
from urbanpy.utils import geo_boundary_to_polygon
from typing import Sequence, Union
__all__ = [
"merge_geom_downloads",
"filter_population",
"remove_features",
"gen_hexagons",
"merge_shape_hex",
"overlay_polygons_hexs",
"resolution_downsampling",
"osmnx_coefficient_computation",
]
def merge_geom_downloads(
gdfs: Sequence[gpd.GeoDataFrame], crs: str = "EPSG:4326"
) -> gpd.GeoDataFrame:
"""
Merge several GeoDataFrames from OSM download_osm
Parameters
----------
dfs: array_like
Array of GeoDataFrames to merge. Assumes equal CRS.
crs: str
Valid string to pass to crs param of the geopandas.GeoDataFrame constructor function.
Returns
-------
concat: GeoDataFrame
Output from concatenation and unary union of geometries, providing a single geometry database for the city
Examples
--------
>>> lima = urbanpy.download.nominatim_osm("Lima, Peru", 2)
>>> callao = urbanpy.download.nominatim_osm("Callao, Peru", 1)
>>> lima = urbanpy.geom.merge_geom_downloads([lima, callao])
>>> lima.head()
geometry
MULTIPOLYGON (((-76.80277 -12.47562, -76.80261...)))
"""
concat = gpd.GeoDataFrame(geometry=[pd.concat(gdfs).unary_union], crs=crs)
return concat
def filter_population(
pop_df: pd.DataFrame, polygon_gdf: gpd.GeoDataFrame
) -> gpd.GeoDataFrame:
"""
Filter an HDX database download to the polygon bounds
Parameters
----------
pop_df: DataFrame
Result from download_hdx
polygon_gdf: GeoDataFrame
Result from download_osm or merge_geom_downloads
Returns
-------
filtered_points_gdf: GeoDataFrame
Population DataFrame filtered to polygon bounds
Examples
--------
>>> lima = urbanpy.download.nominatim_osm("Lima, Peru", 2)
>>> callao = urbanpy.download.nominatim_osm("Callao, Peru", 1)
>>> lima = urbanpy.geom.merge_geom_downloads([lima, callao])
>>> pop = urbanpy.download.hdx_fb_population('peru', 'full')
>>> urbanpy.geom.filter_population(pop, lima)
latitude | longitude | population_2015 | population_2020 | geometry
-12.519861 | -76.774583 | 2.633668 | 2.644757 | POINT (-76.77458 -12.51986)
-12.519861 | -76.745972 | 2.633668 | 2.644757 | POINT (-76.74597 -12.51986)
-12.519861 | -76.745694 | 2.633668 | 2.644757 | POINT (-76.74569 -12.51986)
-12.519861 | -76.742639 | 2.633668 | 2.644757 | POINT (-76.74264 -12.51986)
-12.519861 | -76.741250 | 2.633668 | 2.644757 | POINT (-76.74125 -12.51986)
"""
minx, miny, maxx, maxy = polygon_gdf.geometry.total_bounds
limits_filter = pop_df["longitude"].between(minx, maxx) & pop_df[
"latitude"
].between(miny, maxy)
filtered_points = pop_df[limits_filter]
geometry_ = gpd.points_from_xy(
filtered_points["longitude"], filtered_points["latitude"]
)
filtered_points_gdf = gpd.GeoDataFrame(
filtered_points, geometry=geometry_, crs="EPSG:4326"
)
return filtered_points_gdf
def remove_features(gdf: gpd.GeoDataFrame, bounds: Sequence[float]) -> gpd.GeoDataFrame:
"""
Remove a set of features based on bounds
Parameters
----------
gdf: GeoDataFrame
Input GeoDataFrame containing the point features filtered with filter_population
bounds: array_like
Array input following [minx, miny, maxx, maxy] for filtering (GeoPandas total_bounds method output)
Returns
-------
gdf: GeoDataFrame
Input DataFrame but without the desired features
Examples
--------
>>> lima = urbanpy.geom.filter_population(pop_lima, poly_lima)
>>> removed = urbanpy.geom.remove_features(lima, [-12.2,-12, -77.2,-77.17]) #Remove San Lorenzo Island
>>> print(lima.shape, removed.shape)
(348434, 4) (348427, 4)
"""
minx, miny, maxx, maxy = bounds
drop_ix = gdf.cx[minx:maxx, miny:maxy].index
return gdf.drop(drop_ix)
def gen_hexagons(resolution: int, city: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
"""
Converts an input multipolygon layer to H3 hexagons given a resolution.
Parameters
----------
resolution: int, 0:15
Hexagon resolution, higher values create smaller hexagons.
city: GeoDataFrame
Input city polygons to transform into hexagons.
Returns
-------
city_hexagons: GeoDataFrame
Hexagon geometry GeoDataFrame (hex_id, geom).
Examples
--------
>>> lima = urbanpy.geom.filter_population(pop_lima, poly_lima)
>>> lima_hex = urbanpy.geom.gen_hexagons(8, lima)
hex | geometry
888e620e41fffff | POLYGON ((-76.80007 -12.46917, -76.80439 -12.4...))
888e62c809fffff | POLYGON ((-77.22539 -12.08663, -77.22971 -12.0...))
888e62c851fffff | POLYGON ((-77.20708 -12.08484, -77.21140 -12.0...))
888e62c841fffff | POLYGON ((-77.22689 -12.07104, -77.23122 -12.0...))
888e62c847fffff | POLYGON ((-77.23072 -12.07929, -77.23504 -12.0...))
"""
# Polyfill the city boundaries
h3_polygons = list()
h3_indexes = list()
# Get every polygon in Multipolygon shape
city_poly = city.explode(index_parts=True).reset_index(drop=True)
for _, geo in city_poly.iterrows():
hexagons = h3.polyfill(
geo["geometry"].__geo_interface__, res=resolution, geo_json_conformant=True
)
for hexagon in hexagons:
h3_polygons.append(geo_boundary_to_polygon(hexagon))
h3_indexes.append(hexagon)
# Create hexagon dataframe
city_hexagons = gpd.GeoDataFrame(h3_indexes, geometry=h3_polygons).drop_duplicates()
city_hexagons.crs = "EPSG:4326"
city_hexagons = city_hexagons.rename(
columns={0: "hex"}
) # Format column name for readability
return city_hexagons
def merge_shape_hex(
hexs: gpd.GeoDataFrame,
shape: gpd.GeoDataFrame,
agg: dict,
how="inner",
predicate="intersects",
) -> gpd.GeoDataFrame:
"""
Merges a H3 hexagon GeoDataFrame with a Point GeoDataFrame and aggregates the
point gdf data.
Parameters
----------
hexs: GeoDataFrame
Input GeoDataFrame containing hexagon geometries
shape: GeoDataFrame
Input GeoDataFrame containing points and features to be aggregated
agg: dict
A dictionary with column names as keys and values as aggregation
operations. The aggregation must be one of {'sum', 'min', 'max'}.
how: str. One of {'inner', 'left', 'right'}. Default 'inner'.
Determines how to merge data:
'left' uses keys from left and only retains geometry from left
'right' uses keys from right and only retains geometry from right
'inner': use intersection of keys from both dfs; retain only left geometry column
op: str. One of {'intersects', 'contains', 'within'}. Default 'intersects'
Determines how geometries are queried for merging.
Returns
-------
hexs: GeoDataFrame
Result of a spatial join within hex and points. All features are aggregated
based on the input parameters
Examples
--------
>>> lima = urbanpy.download.nominatim_osm('Lima, Peru', 2)
>>> pop_lima = urbanpy.download.hdx_fb_population('peru', 'full')
>>> pop_df = urbanpy.filter_population(pop_lima, lima)
>>> hexs = urbanpy.geom.gen_hexagons(8, lima)
>>> urbanpy.geom.merge_point_hex(hexs, pop_df, 'inner', 'within', {'population_2020':'sum'})
0 | geometry | population_2020
888e628d8bfffff | POLYGON ((-76.66002 -12.20371, -76.66433 -12.2... | NaN
888e62c5ddfffff | POLYGON ((-76.94564 -12.16138, -76.94996 -12.1... | 14528.039097
888e62132bfffff | POLYGON ((-76.84736 -12.17523, -76.85167 -12.1... | 608.312696
888e628debfffff | POLYGON ((-76.67982 -12.18998, -76.68413 -12.1... | NaN
888e6299b3fffff | POLYGON ((-76.78876 -11.97286, -76.79307 -11.9... | 3225.658803
"""
joined = gpd.sjoin(shape, hexs, how=how, predicate=predicate)
# Uses index right based on the order of points and hex. Right takes hex index
hex_merge = joined.groupby("index_right").agg(agg)
# Avoid SpecificationError by copying the DataFrame
ret_hex = hexs.copy()
for key in agg:
ret_hex.loc[hex_merge.index, key] = hex_merge[key].values
return ret_hex
def overlay_polygons_hexs(
polygons: gpd.GeoDataFrame,
hexs: gpd.GeoDataFrame,
hex_col: str,
columns: Sequence[str],
) -> gpd.GeoDataFrame:
"""
Overlays a Polygon GeoDataFrame with a H3 hexagon GeoDataFrame and divide the 'columns' the values proportionally to the overlayed area.
Parameters
----------
polygons: GeoDataFrame
Input GeoDataFrame containing polygons and columns to be processed
hexs: GeoDataFrame
Input GeoDataFrame containing desired output hexagon resolution geometries
hex_col: str
Determines the column with the hex id.
columns: list
A list with column names of the columns that are going to be proportionally adjusted
Returns
-------
hexs: GeoDataFrame
Result of a spatial join within hex and points. All columns are adjusted
based on the overlayed area.
Examples
--------
>>> urbanpy.geom.overlay_polygons_hexs(zonas_pob, hex_lima, 'hex', pob_vulnerable)
hex | POB_TOTAL | geometry
898e6200493ffff | 193.705376 | POLYGON ((-76.80695 -12.35199, -76.80812 -12.3...
898e6200497ffff | 175.749780 | POLYGON ((-76.80412 -12.35395, -76.80528 -12.3...
898e620049bffff | 32.231078 | POLYGON ((-76.81011 -12.35342, -76.81127 -12.3...
898e62004a7ffff | 74.154973 | POLYGON ((-76.79911 -12.36468, -76.80027 -12.3...
898e62004b7ffff | 46.989828 | POLYGON ((-76.79879 -12.36128, -76.79995 -12.3...
"""
polygons_ = polygons.copy() # Preserve data state
polygons_["poly_area"] = polygons_.geometry.area # Calc polygon area
# Overlay intersection
overlayed = gpd.overlay(polygons_, hexs, how="intersection")
# Downsample indicators using proporional overlayed area w.r.t polygon area
area_prop = overlayed.geometry.area / overlayed["poly_area"]
overlayed[columns] = overlayed[columns].apply(lambda col: col * area_prop)
# Aggregate over Hex ID
per_hexagon_data = overlayed.groupby(hex_col)[columns].sum()
# Preserve data as GeoDataFrame
hex_df = pd.merge(
left=per_hexagon_data, right=hexs[[hex_col, "geometry"]], on=hex_col
)
hex_gdf = gpd.GeoDataFrame(
hex_df[[hex_col] + columns], geometry=hex_df["geometry"], crs=hexs.crs
)
return hex_gdf
def resolution_downsampling(
gdf: gpd.GeoDataFrame, hex_col: str, coarse_resolution: int, agg: dict
) -> gpd.GeoDataFrame:
"""
Downsample hexagon resolution aggregating indicated metrics (e.g. Transform hexagon resolution from 9 to 6).
Parameters
----------
gdf: GeoDataFrame
GeoDataFrame with hexagon geometries (output from gen_hexagons).
hex_col: str
Determines the column with the hex id.
coarse_resolution: int, 0:15
Hexagon resolution lower than gdf actual resolution (higher values create smaller hexagons).
Returns
-------
gdfc: GeoDataFrame
GeoDataFrame with lower resolution hexagons geometry and metrics aggregated as indicated.
"""
gdf_coarse = gdf.copy()
coarse_hex_col = "hex_{}".format(coarse_resolution)
gdf_coarse[coarse_hex_col] = gdf_coarse[hex_col].apply(
lambda x: h3.h3_to_parent(x, coarse_resolution)
)
dfc = gdf_coarse.groupby([coarse_hex_col]).agg(agg).reset_index()
gdfc_geometry = dfc[coarse_hex_col].apply(geo_boundary_to_polygon)
return gpd.GeoDataFrame(dfc, geometry=gdfc_geometry, crs=gdf.crs)
def osmnx_coefficient_computation(
gdf,
net_type,
basic_stats,
extended_stats,
connectivity=False,
anc=False,
ecc=False,
bc=False,
cc=False,
):
"""
Apply osmnx's graph from polygon to query a city's street network within a geometry.
This may be a long procedure given the hexagon layer resolution.
Parameters
----------
gdf: GeoDataFrame
GeoDataFrame with geometries to download graphs contained within them.
net_type: str
Network type to download. One of {'drive', 'drive_service', 'walk', 'bike', 'all', 'all_private'}
basic_stats: list
List of basic stats to compute from downloaded graph
extended_stats: list
List of extended stats to compute from graph
connectivity: bool. Default False.
Compute node and edge connectivity
anc: bool. Default False.
Compute avg node connectivity
ecc: bool. Default False.
Compute shortest paths, eccentricity and topological metric
bc: bool. Default False.
Compute node betweeness centrality
cc: bool. Default False.
Compute node closeness centrality
For more detail about these parameters, see https://osmnx.readthedocs.io/en/stable/osmnx.html#module-osmnx.stats
Returns
-------
gdf: GeoDataFrame
Input GeoDataFrame with updated columns containing the selected metrics
Examples
--------
>>> hexagons = urbanpy.geom.gen_hexagons(8, lima)
>>> urbanpy.geom.osmnx_coefficient_computation(hexagons.head(), 'walk', ['circuity_avg'], [])
On record 1: There are no nodes within the requested geometry
On record 3: There are no nodes within the requested geometry
hex | geometry | circuity_avg
888e62c64bfffff | POLYGON ((-76.89763 -12.03869, -76.90194 -12.0... | 1.021441
888e6212e1fffff | POLYGON ((-76.75291 -12.19727, -76.75722 -12.2... | NaN
888e62d333fffff | POLYGON ((-77.09253 -11.83762, -77.09685 -11.8... | 1.025313
888e666c2dfffff | POLYGON ((-76.93109 -11.79031, -76.93540 -11.7... | NaN
888e62d4b3fffff | POLYGON ((-76.87935 -12.03688, -76.88366 -12.0... | 1.044654
"""
# May be a lengthy download depending on the amount of features
for index, row in track(
gdf.iterrows(),
total=gdf.shape[0],
description="Computing road network coefficients...",
):
try:
graph = ox.graph_from_polygon(row["geometry"], net_type)
b_stats = ox.basic_stats(graph)
ext_stats = ox.extended_stats(graph, connectivity, anc, ecc, bc, cc)
for stat in basic_stats:
gdf.loc[index, stat] = b_stats.get(stat)
for stat in extended_stats:
gdf.loc[index, stat] = ext_stats.get(stat)
except Exception as err:
print(f"On record {index}: ", err)
return gdf