In [1]:
import os
import numpy as np
import pandas as pd

import fiona
import geopandas as gpd
from shapely.validation import explain_validity, make_valid

In [2]:
user = 'jcroff'
work_dir = f"/Users/{user}/Library/CloudStorage/Box-Box/DataViz Projects/Adhoc-Spatial-Analysis/Check Geometry Validity"

## Pull data with invalid geometries

In [3]:
jurs_url = os.path.join(
    "https://",
    "services3.arcgis.com",
    "i2dkYWmb4wHvYPda",
    "arcgis",
    "rest",
    "services",
    "region_jurisdiction_clp",
    "FeatureServer",
    "0",
    "query?outFields=*&where=1%3D1&f=geojson",
)

jurs_gdf = gpd.read_file(jurs_url, driver="GeoJSON").to_crs("EPSG:26910")

In [4]:
# check geom validity and explain validity issues

print("Before: Invalid geoms", jurs_gdf[~jurs_gdf.is_valid].shape[0])
print(jurs_gdf.geometry.apply(explain_validity).value_counts())


Before: Invalid geoms 20
Valid Geometry                                               89
Nested shells[560126.479699979 4240704.2614818]               1
Nested shells[569161.321667562 4217876.18877213]              1
Nested shells[591294.506168401 4250607.64132171]              1
Nested shells[596396.726551017 4243802.13433094]              1
Nested shells[599933.961133772 4139079.75898333]              1
Nested shells[582036.560659035 4139691.79373596]              1
Nested shells[565188.391601871 4146479.14318992]              1
Nested shells[565897.407518945 4148111.50553457]              1
Nested shells[560086.720554918 4240654.36368286]              1
Ring Self-intersection[542159.986077219 4193076.25494241]     1
Nested shells[589161.493942158 4152895.62400875]              1
Nested shells[540341.932192703 4205254.96913255]              1
Nested shells[516376.340698606 4251214.39383315]              1
Nested shells[547542.642964931 4236669.53058565]              1
Nested shells[5

## Read ESRI repaired features

Geometries repaired with ESRI's [Repair Geometry Tool](https://pro.arcgis.com/en/pro-app/latest/tool-reference/data-management/repair-geometry.htm) with two different [validation methods](https://pro.arcgis.com/en/pro-app/latest/tool-reference/data-management/repair-geometry.htm#:~:text=Boolean-,validation_method,-(Optional)):
1. ESRI-The Esri geometry validation method will be used. This is the default.
2. Open Geospatial Consortium (OGC)-The OGC geometry validation method will be used.

In [5]:
fgdb = os.path.join(work_dir, "check_repair_geometry.gdb")

jurs_repair_ogc = gpd.read_file(fgdb, driver="GeoDatabase",layer='region_jurisdi_ogc_repair')
jurs_repair_esri = gpd.read_file(fgdb, driver="GeoDatabase",layer='region_jurisdi_esri_repair')

In [6]:
# check geom validity and explain validity issues

print("After ESRI OGC Repair: Invalid geoms", jurs_repair_ogc[~jurs_repair_ogc.is_valid].shape[0])
print(jurs_repair_ogc.geometry.apply(explain_validity).value_counts())

After ESRI OGC Repair: Invalid geoms 0
Valid Geometry    109
Name: geometry, dtype: int64


In [7]:
# check geom validity and explain validity issues

print("After ESRI Repair: Invalid geoms", jurs_repair_esri[~jurs_repair_esri.is_valid].shape[0])
print(jurs_repair_esri.geometry.apply(explain_validity).value_counts())

After ESRI Repair: Invalid geoms 1
Valid Geometry                                             108
Ring Self-intersection[-122.520558243 37.8842109460001]      1
Name: geometry, dtype: int64


## Repair geometry open-source

In [8]:
def repair_geometry(gdf):
    """
    This function tests the validity of GeoDataFrame geometries and repairs invalid geometries.
    The function leverages the shapely methods is_valid() to check validity and the explain_validity()
    and make_valid() functions. For more information about how these methods and functions work,
    please refer to the shapely documentation: https://shapely.readthedocs.io/en/stable/manual.html#diagnostics

    Author: Joshua Croff

    Args:
        gdf (GeoDataFrame): A Geopandas GeoDataFrame object.

    Returns:
        GeoDataFrame: A Geopandas GeoDataFrame object.
    """
    import geopandas as gpd
    from shapely.validation import explain_validity, make_valid

    if gdf.geometry.is_valid.all():
        print("Geodataframe contains valid geometry. No repair necessary.")
        return gdf
    else:
        repaired_gdf = gdf.copy()
        print(
            "Geodataframe contains invalid geometry, starting geometry repair process...\n"
        )
        print(repaired_gdf.geometry.apply(explain_validity).value_counts())
        invalid_before_ct = repaired_gdf[~repaired_gdf.geometry.is_valid].shape[0]

        # Make valid
        repaired_gdf["geometry"] = repaired_gdf.geometry.apply(make_valid)
        invalid_after_ct = repaired_gdf[~repaired_gdf.geometry.is_valid].shape[0]

        if repaired_gdf.geometry.is_valid.all():
            print(
                f"\nGeometry repair complete.\nInvalid geometries before repair: {invalid_before_ct}\nInvalid Geometries after repair: {invalid_after_ct}"
            )
            return repaired_gdf
        else:
            print(
                "\nGeodataframe still contains invalid geometries. Consider manual fix or revisiting geoprocess for issues that may create invalid geometries."
            )

In [9]:
jurs_repair_gdf = repair_geometry(gdf=jurs_gdf)

Geodataframe contains invalid geometry, starting geometry repair process...

Valid Geometry                                               89
Nested shells[560126.479699979 4240704.2614818]               1
Nested shells[569161.321667562 4217876.18877213]              1
Nested shells[591294.506168401 4250607.64132171]              1
Nested shells[596396.726551017 4243802.13433094]              1
Nested shells[599933.961133772 4139079.75898333]              1
Nested shells[582036.560659035 4139691.79373596]              1
Nested shells[565188.391601871 4146479.14318992]              1
Nested shells[565897.407518945 4148111.50553457]              1
Nested shells[560086.720554918 4240654.36368286]              1
Ring Self-intersection[542159.986077219 4193076.25494241]     1
Nested shells[589161.493942158 4152895.62400875]              1
Nested shells[540341.932192703 4205254.96913255]              1
Nested shells[516376.340698606 4251214.39383315]              1
Nested shells[547542.642964

In [10]:
jurs_repair_gdf.explode()

  jurs_repair_gdf.explode()


Unnamed: 0,Unnamed: 1,objectid,fipst,fipco,coname,jurname,Shape__Area,Shape__Length,geometry
0,0,1,06,001,Alameda,Alameda,0.002786,0.499257,"POLYGON ((563164.283 4179845.720, 562832.651 4..."
0,1,1,06,001,Alameda,Alameda,0.002786,0.499257,"POLYGON ((566319.030 4181545.291, 566312.846 4..."
0,2,1,06,001,Alameda,Alameda,0.002786,0.499257,"POLYGON ((567235.990 4178319.316, 567227.429 4..."
0,3,1,06,001,Alameda,Alameda,0.002786,0.499257,"POLYGON ((567314.997 4175007.578, 567085.406 4..."
1,0,2,06,001,Alameda,Albany,0.000475,0.147444,"POLYGON ((562582.738 4194718.897, 562588.752 4..."
...,...,...,...,...,...,...,...,...,...
107,2,108,06,097,Sonoma,Petaluma,0.003871,0.544108,"POLYGON ((529015.826 4236819.120, 529088.102 4..."
107,3,108,06,097,Sonoma,Petaluma,0.003871,0.544108,"POLYGON ((534419.959 4235852.529, 534457.940 4..."
107,4,108,06,097,Sonoma,Petaluma,0.003871,0.544108,"POLYGON ((533779.053 4238694.625, 533745.188 4..."
107,5,108,06,097,Sonoma,Petaluma,0.003871,0.544108,"POLYGON ((536854.820 4238935.301, 536830.810 4..."
