Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

initial implementation of linetrace #24

Merged
merged 5 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions h3pandas/const.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
COLUMN_H3_POLYFILL = "h3_polyfill"
COLUMN_H3_LINETRACE = "h3_linetrace"
51 changes: 49 additions & 2 deletions h3pandas/h3pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
from pandas.core.frame import DataFrame
from geopandas.geodataframe import GeoDataFrame

from .const import COLUMN_H3_POLYFILL
from .const import COLUMN_H3_POLYFILL, COLUMN_H3_LINETRACE
from .util.decorator import catch_invalid_h3_address, doc_standard
from .util.functools import wrapped_partial
from .util.shapely import polyfill
from .util.shapely import polyfill, linetrace

AnyDataFrame = Union[DataFrame, GeoDataFrame]

Expand Down Expand Up @@ -758,6 +758,53 @@ def polyfill_resample(

return result.h3.h3_to_geo_boundary() if return_geometry else result

def linetrace(
self, resolution : int, explode: bool = False
) -> AnyDataFrame:
"""Experimental. An H3 cell representation of a (Multi)LineString,
which permits repeated cells, but not if they are repeated in
immediate sequence.

Parameters
----------
resolution : int
H3 resolution
explode : bool
If True, will explode the resulting list vertically.
All other columns' values are copied.
Default: False

Returns
-------
(Geo)DataFrame with H3 cells with centroids within the input polygons.

Examples
--------
>>> from shapely.geometry import LineString
>>> gdf = gpd.GeoDataFrame(geometry=[LineString([[0, 0], [1, 0], [1, 1]])])
>>> gdf.h3.linetrace(4)
geometry h3_linetrace
0 LINESTRING (0.00000 0.00000, 1.00000 0.00000, ... [83754efffffffff, 83754cfffffffff, 837541fffff... # noqa E501
>>> gdf.h3.linetrace(4, explode=True)
geometry h3_linetrace
0 LINESTRING (0.00000 0.00000, 1.00000 0.00000, ... 83754efffffffff
0 LINESTRING (0.00000 0.00000, 1.00000 0.00000, ... 83754cfffffffff
0 LINESTRING (0.00000 0.00000, 1.00000 0.00000, ... 837541fffffffff

"""
def func(row):
return list(linetrace(row.geometry, resolution))

df = self._df

result = df.apply(func, axis=1)
if not explode:
assign_args = {COLUMN_H3_LINETRACE: result}
return df.assign(**assign_args)

result = result.explode().to_frame(COLUMN_H3_LINETRACE)
return df.join(result)

# Private methods

def _apply_index_assign(
Expand Down
25 changes: 24 additions & 1 deletion h3pandas/util/decorator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from functools import wraps
from typing import Callable
from typing import Callable, Iterator
from h3 import H3CellError


Expand Down Expand Up @@ -34,6 +34,29 @@ def safe_f(*args, **kwargs):
return safe_f


def sequential_deduplication(func: Iterator[str]) -> Iterator[str]:
"""
Decorator that doesn't permit two consecutive items of an iterator
to be the same.

Parameters
----------
f : Callable

Returns
-------
Yields from f, but won't yield two items in a row that are the same.
"""
def inner(*args):
iterable = func(*args)
last = None
while (cell := next(iterable, None)) is not None:
if cell != last:
yield cell
last = cell
return inner


# TODO: Test
def doc_standard(column_name: str, description: str) -> Callable:
"""Wrapper to provide a standard apply-to-H3-index docstring"""
Expand Down
45 changes: 43 additions & 2 deletions h3pandas/util/shapely.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from typing import Union, Set, Tuple, List
from shapely.geometry import Polygon, MultiPolygon
from typing import Union, Set, Tuple, List, Iterator
from shapely.geometry import Polygon, MultiPolygon, LineString, MultiLineString
from h3 import h3
from .decorator import sequential_deduplication

MultiPolyOrPoly = Union[Polygon, MultiPolygon]
MultiLineOrLine = Union[LineString, MultiLineString]


def _extract_coords(polygon: Polygon) -> Tuple[List, List[List]]:
Expand Down Expand Up @@ -46,3 +48,42 @@ def polyfill(
return set(h3_addresses)
else:
raise TypeError(f"Unknown type {type(geometry)}")


@sequential_deduplication
def linetrace(
geometry: MultiLineOrLine, resolution: int
) -> Iterator[str]:
"""h3.polyfill equivalent for shapely (Multi)LineString
Does not represent lines with duplicate sequential cells,
but cells may repeat non-sequentially to represent
self-intersections

Parameters
----------
geometry : LineString or MultiLineString
Line to trace with H3 cells
resolution : int
H3 resolution of the tracing cells

Returns
-------
Set of H3 addresses

Raises
------
TypeError if geometry is not a LineString or a MultiLineString
"""
if isinstance(geometry, MultiLineString):
# Recurse after getting component linestrings from the multiline
for line in map(lambda geom: linetrace(geom, resolution), geometry.geoms):
yield from line
elif isinstance(geometry, LineString):
coords = zip(geometry.coords, geometry.coords[1:])
while (vertex_pair := next(coords, None)) is not None:
i, j = vertex_pair
a = h3.geo_to_h3(*i[::-1], resolution)
b = h3.geo_to_h3(*j[::-1], resolution)
yield from h3.h3_line(a, b) # inclusive of a and b
else:
raise TypeError(f"Unknown type {type(geometry)}")
160 changes: 159 additions & 1 deletion tests/test_h3pandas.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from h3pandas import h3pandas # noqa: F401
from h3 import h3
import pytest
from shapely.geometry import Polygon, box, Point
from shapely.geometry import Polygon, LineString, MultiLineString, box, Point
import pandas as pd
import geopandas as gpd
from geopandas.testing import assert_geodataframe_equal
Expand Down Expand Up @@ -33,6 +33,33 @@ def basic_geodataframe_polygon(basic_geodataframe):
return gpd.GeoDataFrame(geometry=[geom], crs="epsg:4326")


@pytest.fixture
def basic_geodataframe_linestring():
geom = LineString([
(174.793092, -37.005372), (175.621138, -40.323142)
])
return gpd.GeoDataFrame(geometry=[geom], crs="epsg:4326")


@pytest.fixture
# NB one of the LineString parts traverses the antimeridian
def basic_geodataframe_multilinestring(basic_geodataframe):
geom = MultiLineString([
[[174.793092, -37.005372], [175.621138, -40.323142]],
[
[168.222656, -45.79817], [171.914063, -34.307144],
[178.769531, -37.926868], [183.515625, -43.992815]
]
])
return gpd.GeoDataFrame(geometry=[geom], crs="epsg:4326")


@pytest.fixture
def basic_geodataframe_empty_linestring():
"""GeoDataFrame with Empty geometry"""
return gpd.GeoDataFrame(geometry=[LineString()], crs="epsg:4326")


@pytest.fixture
def basic_geodataframe_polygons(basic_geodataframe):
geoms = [box(0, 0, 1, 1), box(0, 0, 2, 2)]
Expand Down Expand Up @@ -77,6 +104,11 @@ def h3_geodataframe_with_values(h3_dataframe_with_values):
)


@pytest.fixture
def h3_geodataframe_with_polyline_values(basic_geodataframe_linestring):
return basic_geodataframe_linestring.assign(val=10)


# Tests: H3 API
class TestGeoToH3:
def test_geo_to_h3(self, basic_dataframe):
Expand Down Expand Up @@ -271,6 +303,132 @@ def test_polyfill_explode_unequal_lengths(self, basic_geodataframe_polygons):
assert set(result["h3_polyfill"]) == expected_indices


class TestLineTrace:
def test_empty_linetrace(self, basic_geodataframe_empty_linestring):
result = basic_geodataframe_empty_linestring.h3.linetrace(2)
assert len(result.iloc[0]["h3_linetrace"]) == 0

def test_linetrace(self, basic_geodataframe_linestring):
result = basic_geodataframe_linestring.h3.linetrace(3)
expected_indices = [
"83bb50fffffffff",
"83bb54fffffffff",
"83bb72fffffffff",
"83bb0dfffffffff",
"83bb2bfffffffff"
]
assert len(result.iloc[0]["h3_linetrace"]) == 5
assert list(result.iloc[0]["h3_linetrace"]) == expected_indices

def test_linetrace_explode(self, basic_geodataframe_linestring):
result = basic_geodataframe_linestring.h3.linetrace(3, explode=True)
expected_indices = [
"83bb50fffffffff",
"83bb54fffffffff",
"83bb72fffffffff",
"83bb0dfffffffff",
"83bb2bfffffffff"
]
assert result.shape == (5, 2)
assert result.iloc[0]['h3_linetrace'] == expected_indices[0]
assert result.iloc[-1]['h3_linetrace'] == expected_indices[-1]

def test_linetrace_with_values(self, h3_geodataframe_with_polyline_values):
result = h3_geodataframe_with_polyline_values.h3.linetrace(3)
expected_indices = [
"83bb50fffffffff",
"83bb54fffffffff",
"83bb72fffffffff",
"83bb0dfffffffff",
"83bb2bfffffffff"
]
assert result.shape == (1, 3)
assert 'val' in result.columns
assert result.iloc[0]['val'] == 10
assert len(result.iloc[0]["h3_linetrace"]) == 5
assert list(result.iloc[0]["h3_linetrace"]) == expected_indices

def test_linetrace_with_values_explode(self,
h3_geodataframe_with_polyline_values):
result = h3_geodataframe_with_polyline_values.h3.linetrace(3, explode=True)
expected_indices = [
"83bb50fffffffff",
"83bb54fffffffff",
"83bb72fffffffff",
"83bb0dfffffffff",
"83bb2bfffffffff"
]
assert result.shape == (5, 3)
assert 'val' in result.columns
assert result.iloc[0]['val'] == 10
assert result.iloc[0]["h3_linetrace"] == expected_indices[0]
assert result.iloc[-1]['h3_linetrace'] == expected_indices[-1]
assert not result["val"].isna().any()

def test_linetrace_multiline(self, basic_geodataframe_multilinestring):
result = basic_geodataframe_multilinestring.h3.linetrace(2)
expected_indices = [
"82bb57fffffffff", "82bb0ffffffffff",
"82da87fffffffff", "82da97fffffffff",
"82bb67fffffffff", "82bb47fffffffff",
"82bb5ffffffffff", "82bb57fffffffff",
"82ba27fffffffff", "82bb1ffffffffff",
"82bb07fffffffff", "82bb37fffffffff"
]
assert len(result.iloc[0]["h3_linetrace"]) == 12 # 12 cells total
assert list(result.iloc[0]["h3_linetrace"]) == expected_indices

def test_linetrace_multiline_explode_index_parts(
self, basic_geodataframe_multilinestring
):
result = basic_geodataframe_multilinestring.explode(
index_parts=True
).h3.linetrace(
2, explode=True
)
expected_indices = [
[
"82bb57fffffffff", "82bb0ffffffffff"
],
[
"82da87fffffffff", "82da97fffffffff",
"82bb67fffffffff", "82bb47fffffffff",
"82bb5ffffffffff", "82bb57fffffffff",
"82ba27fffffffff", "82bb1ffffffffff",
"82bb07fffffffff", "82bb37fffffffff"
]
]
assert len(result["h3_linetrace"]) == 12 # 12 cells in total
assert result.iloc[0]["h3_linetrace"] == expected_indices[0][0]
assert result.iloc[-1]["h3_linetrace"] == expected_indices[-1][-1]

def test_linetrace_multiline_index_parts_no_explode(
self, basic_geodataframe_multilinestring
):
result = basic_geodataframe_multilinestring.explode(
index_parts=True
).h3.linetrace(
2, explode=False
)
expected_indices = [
[
"82bb57fffffffff", "82bb0ffffffffff"
],
[
"82da87fffffffff", "82da97fffffffff",
"82bb67fffffffff", "82bb47fffffffff",
"82bb5ffffffffff", "82bb57fffffffff",
"82ba27fffffffff", "82bb1ffffffffff",
"82bb07fffffffff", "82bb37fffffffff"
]
]
assert len(result["h3_linetrace"]) == 2 # 2 parts
assert len(result.iloc[0]["h3_linetrace"]) == 2 # 2 cells
assert result.iloc[0]["h3_linetrace"] == expected_indices[0]
assert len(result.iloc[-1]["h3_linetrace"]) == 10 # 10 cells
assert result.iloc[-1]["h3_linetrace"] == expected_indices[-1]


class TestCellArea:
def test_cell_area(self, indexed_dataframe):
expected = indexed_dataframe.assign(
Expand Down
13 changes: 12 additions & 1 deletion tests/util/test_decorator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from h3 import h3
import pytest

from h3pandas.util.decorator import catch_invalid_h3_address
from h3pandas.util.decorator import catch_invalid_h3_address, sequential_deduplication


class TestCatchInvalidH3Address:
Expand All @@ -18,3 +18,14 @@ def safe_h3_to_parent(h3_address):

with pytest.raises(ValueError):
safe_h3_to_parent("891f1d48177fff1") # Originally H3CellError


class TestSequentialDeduplication:
def test_catch_sequential_duplicate_h3_addresses(self):
@sequential_deduplication
def function_taking_iterator(iterator):
yield from iterator

_input = [1, 1, 2, 3, 3, 4, 5, 4, 3, 3, 2, 1, 1]
result = function_taking_iterator(_input)
assert list(result) == [1, 2, 3, 4, 5, 4, 3, 2, 1]
Loading