From 2d207e2743bc85494d6248c5caf941e56e62922b Mon Sep 17 00:00:00 2001 From: Karim Bahgat Date: Wed, 26 Aug 2020 16:02:26 +0200 Subject: [PATCH] Switch to much faster hole in exterior tests during geo interface Previously did point in exterior test for every hole vertex, but got very slow for complex real-world multi polygons. Instead, efficiently calculate a point guaranteed to be inside the hole, and then test only that one hole sample point. Additionally, limit exterior candidates even further as only those whose bbox fully contains the hole bbox (ie no point in testing nested exteriors inside the hole itself) Add tests for exterior-hole edge cases --- shapefile.py | 59 +++++++++++++++++++++++++++--- shapefiles/test/balancing.dbf | Bin 804 -> 804 bytes shapefiles/test/contextwriter.dbf | Bin 65 -> 65 bytes shapefiles/test/dtype.dbf | Bin 259 -> 259 bytes shapefiles/test/line.dbf | Bin 116 -> 116 bytes shapefiles/test/linem.dbf | Bin 116 -> 116 bytes shapefiles/test/linez.dbf | Bin 116 -> 116 bytes shapefiles/test/multipatch.dbf | Bin 116 -> 116 bytes shapefiles/test/multipoint.dbf | Bin 116 -> 116 bytes shapefiles/test/onlydbf.dbf | Bin 65 -> 65 bytes shapefiles/test/point.dbf | Bin 116 -> 116 bytes shapefiles/test/polygon.dbf | Bin 116 -> 116 bytes shapefiles/test/shapetype.dbf | Bin 65 -> 65 bytes shapefiles/test/testfile.dbf | Bin 65 -> 65 bytes test_shapefile.py | 24 +++++++++++- 15 files changed, 76 insertions(+), 7 deletions(-) diff --git a/shapefile.py b/shapefile.py index bce0350..f9f5639 100644 --- a/shapefile.py +++ b/shapefile.py @@ -182,6 +182,14 @@ def bbox_overlap(bbox1, bbox2): overlap = (xmin1 <= xmax2 and xmax1 >= xmin2 and ymin1 <= ymax2 and ymax1 >= ymin2) return overlap +def bbox_contains(bbox1, bbox2): + """Tests whether bbox1 fully contains bbox2, returning a boolean + """ + xmin1,ymin1,xmax1,ymax1 = bbox1 + xmin2,ymin2,xmax2,ymax2 = bbox2 + contains = (xmin1 < xmin2 and xmax1 > xmax2 and ymin1 < ymin2 and ymax1 > ymax2) + return contains + def ring_contains_point(coords, p): """Fast point-in-polygon crossings algorithm, MacMartin optimization. @@ -224,6 +232,44 @@ def ring_contains_point(coords, p): return inside_flag +def ring_sample(coords, ccw=False): + """Return a sample point guaranteed to be within a ring, by efficiently + finding the first centroid of a coordinate triplet whose orientation + matches the orientation of the ring and passes the point-in-ring test. + The orientation of the ring is assumed to be clockwise, unless ccw + (counter-clockwise) is set to True. + """ + coords = tuple(coords) + (coords[1],) # add the second coordinate to the end to allow checking the last triplet + triplet = [] + for p in coords: + # add point to triplet (but not if duplicate) + if p not in triplet: + triplet.append(p) + + # new triplet, try to get sample + if len(triplet) == 3: + # check that triplet does not form a straight line (not a triangle) + is_straight_line = (triplet[0][1] - triplet[1][1]) * (triplet[0][0] - triplet[2][0]) == (triplet[0][1] - triplet[2][1]) * (triplet[0][0] - triplet[1][0]) + if not is_straight_line: + # get triplet orientation + closed_triplet = triplet + [triplet[0]] + triplet_ccw = signed_area(closed_triplet) >= 0 + # check that triplet has the same orientation as the ring (means triangle is inside the ring) + if ccw == triplet_ccw: + # get triplet centroid + xs,ys = zip(*triplet) + xmean,ymean = sum(xs) / 3.0, sum(ys) / 3.0 + # check that triplet centroid is truly inside the ring + if ring_contains_point(coords, (xmean,ymean)): + return xmean,ymean + + # failed to get sample point from this triplet + # remove oldest triplet coord to allow iterating to next triplet + triplet.pop(0) + + else: + raise Exception('Unexpected error: Unable to find a ring sample point.') + def ring_contains_ring(coords1, coords2): '''Returns True if all vertexes in coords2 are fully inside coords1. ''' @@ -272,25 +318,26 @@ def organize_polygon_rings(rings): polys.append(poly) return polys - # first determine each hole's candidate exteriors based on simple bbox overlap test + # first determine each hole's candidate exteriors based on simple bbox contains test hole_exteriors = dict([(hole_i,[]) for hole_i in xrange(len(holes))]) exterior_bboxes = [ring_bbox(ring) for ring in exteriors] for hole_i in hole_exteriors.keys(): hole_bbox = ring_bbox(holes[hole_i]) for ext_i,ext_bbox in enumerate(exterior_bboxes): - if bbox_overlap(hole_bbox, ext_bbox): + if bbox_contains(ext_bbox, hole_bbox): hole_exteriors[hole_i].append( ext_i ) # then, for holes with still more than one possible exterior, do more detailed hole-in-ring test for hole_i,exterior_candidates in hole_exteriors.items(): if len(exterior_candidates) > 1: - # get new exterior candidates - hole = holes[hole_i] + # get hole sample point + hole_sample = ring_sample(holes[hole_i], ccw=True) + # collect new exterior candidates new_exterior_candidates = [] for ext_i in exterior_candidates: - ext = exteriors[ext_i] - hole_in_exterior = ring_contains_ring(ext, hole) + # check that hole sample point is inside exterior + hole_in_exterior = ring_contains_point(exteriors[ext_i], hole_sample) if hole_in_exterior: new_exterior_candidates.append(ext_i) diff --git a/shapefiles/test/balancing.dbf b/shapefiles/test/balancing.dbf index 4c7e992712282cc488ca77fac663109d48414cfc..db4fab4dc785d8b02cffefe65b39a0c5d23a9df7 100644 GIT binary patch delta 13 UcmZ3&wuFs^xq?G#BZ~qv02ev}&Hw-a delta 13 UcmZ3&wuFs^xq?GtBZ~qv02eR<%m4rY diff --git a/shapefiles/test/contextwriter.dbf b/shapefiles/test/contextwriter.dbf index 25aa008de35cf680023fecf9501004211120c6bd..15229fadec9b8fdded4f427e4e90141f3027fde1 100644 GIT binary patch delta 10 RcmZ>CWMQu0kebM13jhjg0rmg@ delta 10 RcmZ>CWMQu0keJ9~3jhjY0rUU> diff --git a/shapefiles/test/dtype.dbf b/shapefiles/test/dtype.dbf index d65e8e6cb3623591f436dc86c87c84821ff646b2..e753d24794414d1c6fc512794184f6a862c0fe00 100644 GIT binary patch delta 12 TcmZo>YGz_#uHcZG$nqZm5yb;Q delta 12 TcmZo>YGz_#uHcZE$nqZm5xoOG diff --git a/shapefiles/test/line.dbf b/shapefiles/test/line.dbf index 7a9fb5c8b8d6d7688f2901cd9c903ec2f38b4d39..49a0ad0211bc5db9c8d4f595a8482eded44a1693 100644 GIT binary patch delta 10 RcmXRZVPUS|kebL+000hV0*?Ry delta 10 RcmXRZVPUS|keJ9)000hN0*wFw diff --git a/shapefiles/test/linem.dbf b/shapefiles/test/linem.dbf index ddd59def48f5adc7c9096adcfcb5cb3f3f752b0f..38ca81eca0b5e91327feb0b20500e38f7b5eb4dc 100644 GIT binary patch delta 10 RcmXRZVPUS|kebL+000hV0*?Ry delta 10 RcmXRZVPUS|keJ9)000hN0*wFw diff --git a/shapefiles/test/linez.dbf b/shapefiles/test/linez.dbf index 46a339e4fb130f948726d5e97948487111a25b29..dcc50fde0c974e454af00d69ea621107ad6103d4 100644 GIT binary patch delta 10 RcmXRZVPUS|kebL+000hV0*?Ry delta 10 RcmXRZVPUS|keJ9)000hN0*wFw diff --git a/shapefiles/test/multipatch.dbf b/shapefiles/test/multipatch.dbf index 822cd313288e784de45e8f80c92324ab24d50c8d..e506fd7bdba575c74bf8e9afab26f2cead8ae8c2 100644 GIT binary patch delta 10 RcmXRZVPUS|kebL+000hV0*?Ry delta 10 RcmXRZVPUS|keJ9)000hN0*wFw diff --git a/shapefiles/test/multipoint.dbf b/shapefiles/test/multipoint.dbf index 8b7d2e1850061f2c073fdf441c6b265cff7baee7..9ddef881743ea795eb6a40b7d308495b619fc9bc 100644 GIT binary patch delta 10 RcmXRZVPUS|kebL+000hV0*?Ry delta 10 RcmXRZVPUS|keJ9)000hN0*wFw diff --git a/shapefiles/test/onlydbf.dbf b/shapefiles/test/onlydbf.dbf index 25aa008de35cf680023fecf9501004211120c6bd..15229fadec9b8fdded4f427e4e90141f3027fde1 100644 GIT binary patch delta 10 RcmZ>CWMQu0kebM13jhjg0rmg@ delta 10 RcmZ>CWMQu0keJ9~3jhjY0rUU> diff --git a/shapefiles/test/point.dbf b/shapefiles/test/point.dbf index 0bc56c52cbf46f6d24d0c79722df4368e91fcdc8..fa058bc0aa44b1607f070bc6151e1b602f931f66 100644 GIT binary patch delta 10 RcmXRZVPUS|kebL+000hV0*?Ry delta 10 RcmXRZVPUS|keJ9)000hN0*wFw diff --git a/shapefiles/test/polygon.dbf b/shapefiles/test/polygon.dbf index 0427d4fc893974c401dd52aa4fbfcb7d740946b5..20dc6aeb55bfc6e27b7bdc6cba1ec5c9baf117cf 100644 GIT binary patch delta 10 RcmXRZVPUS|kebL+000hV0*?Ry delta 10 RcmXRZVPUS|keJ9)000hN0*wFw diff --git a/shapefiles/test/shapetype.dbf b/shapefiles/test/shapetype.dbf index 25aa008de35cf680023fecf9501004211120c6bd..15229fadec9b8fdded4f427e4e90141f3027fde1 100644 GIT binary patch delta 10 RcmZ>CWMQu0kebM13jhjg0rmg@ delta 10 RcmZ>CWMQu0keJ9~3jhjY0rUU> diff --git a/shapefiles/test/testfile.dbf b/shapefiles/test/testfile.dbf index 25aa008de35cf680023fecf9501004211120c6bd..15229fadec9b8fdded4f427e4e90141f3027fde1 100644 GIT binary patch delta 10 RcmZ>CWMQu0kebM13jhjg0rmg@ delta 10 RcmZ>CWMQu0keJ9~3jhjY0rUU> diff --git a/test_shapefile.py b/test_shapefile.py index 81de1a6..21a4217 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -126,6 +126,28 @@ ], ]} ), + (shapefile.POLYGON, # multi polygon, nested exteriors with holes (unordered and tricky holes designed to throw off ring_sample() test) + [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior 1 + (3,3),(3,7),(7,7),(7,3),(3,3), # exterior 2 + (4.5,4.5),(4.5,5.5),(5.5,5.5),(5.5,4.5),(4.5,4.5), # exterior 3 + (4,4),(4,4),(6,4),(6,4),(6,4),(6,6),(4,6),(4,4), # hole 2.1 (hole has duplicate coords) + (2,2),(3,3),(4,2),(8,2),(8,8),(4,8),(2,8),(2,4),(2,2), # hole 1.1 (hole coords form straight line and starts in concave orientation) + ], + [0,5,10,15,20+3], + {'type':'MultiPolygon','coordinates':[ + [ # poly 1 + [(1,1),(1,9),(9,9),(9,1),(1,1)], # exterior 1 + [(2,2),(3,3),(4,2),(8,2),(8,8),(4,8),(2,8),(2,4),(2,2)], # hole 1.1 + ], + [ # poly 2 + [(3,3),(3,7),(7,7),(7,3),(3,3)], # exterior 2 + [(4,4),(4,4),(6,4),(6,4),(6,4),(6,6),(4,6),(4,4)], # hole 2.1 + ], + [ # poly 3 + [(4.5,4.5),(4.5,5.5),(5.5,5.5),(5.5,4.5),(4.5,4.5)], # exterior 3 + ], + ]} + ), (shapefile.POLYGON, # multi polygon, holes incl orphaned holes (unordered), should raise warning [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior 1 (11,11),(11,19),(19,19),(19,11),(11,11), # exterior 2 @@ -152,7 +174,7 @@ ], ]} ), - (shapefile.POLYGON, # multi polygon, exteriors with wrong orientation (be nice and interpret as such) + (shapefile.POLYGON, # multi polygon, exteriors with wrong orientation (be nice and interpret as such), should raise warning [(1,1),(9,1),(9,9),(1,9),(1,1), # exterior with hole-orientation (11,11),(19,11),(19,19),(11,19),(11,11), # exterior with hole-orientation ],