In [12]:
import geopandas as gpd

# Load the full GeoJSON
gdf = gpd.read_file("../../dataset/England.geojson")

# Check what columns exist
print(gdf.columns)

# Peek at unique values in name-like columns
for col in gdf.columns:
    if gdf[col].dtype == object:
        print(f"\nUnique values in {col}:")
        print(gdf[col].unique())


Index(['id', 'name', 'uri', 'water-body-type', 'geometry-type', 'geometry'], dtype='object')

Unique values in id:
['GB40702G503900' 'GB104027063930' 'GB107042016440' ... 'GB31229197'
 'GB30846165' 'GB30432621']

Unique values in name:
['Kent Romney Marsh' 'Dorts Dike  Catchment (trib of Wharfe)'
 'Whiteparish Trib' ... 'Rydal Water' 'Stannon Lake' 'Middle Redmires']

Unique values in uri:
['http://environment.data.gov.uk/catchment-planning/so/WaterBody/GB40702G503900'
 'http://environment.data.gov.uk/catchment-planning/so/WaterBody/GB104027063930'
 'http://environment.data.gov.uk/catchment-planning/so/WaterBody/GB107042016440'
 ...
 'http://environment.data.gov.uk/catchment-planning/so/WaterBody/GB31229197'
 'http://environment.data.gov.uk/catchment-planning/so/WaterBody/GB30846165'
 'http://environment.data.gov.uk/catchment-planning/so/WaterBody/GB30432621']

Unique values in water-body-type:
['{ "string": "Groundwater Body", "lang": "en" }'
 '{ "string": "River", "lang": "en" }'
 '{

In [20]:
gdf = gpd.read_file("../../dataset/England.geojson")
# Keep only geometries where geometry-type = Catchment (i.e., polygon boundaries)
gdf = gdf[gdf['geometry-type'].str.endswith("/Catchment", na=False)]

# Optionally exclude groundwater if you only want surface catchments
# gdf = gdf[~gdf['water-body-type'].str.contains("Groundwater", na=False)]

# Optional: Drop unnecessary columns
gdf = gdf[['id', 'name', 'geometry']]

# Save the cleaned catchments to GeoJSON
# gdf.to_file("england_catchments_only.geojson", driver="GeoJSON")

# for tol in [0.1, 0.01, 0.001]:
#     temp_gdf = gdf.copy()
#     temp_gdf["geometry"] = temp_gdf["geometry"].simplify(tolerance=tol, preserve_topology=True)
#     temp_gdf.iloc[:1].to_file(f"england_catchments_only_{tol}.geojson", driver="GeoJSON")
#     # gdf.to_file(f"catchments_simplified_{tol}.geojson", driver="GeoJSON")
gdf["geometry"] = gdf["geometry"].simplify(tolerance=0.01, preserve_topology=True)
# Save the first 10 catchments to a new GeoJSON file
gdf.to_file("england_catchments_only.geojson", driver="GeoJSON")
# # Save the first 10 catchments to a new shapefile
# gdf.iloc[:10].to_file("england_catchments_only_10.shp", driver="ESRI Shapefile")
# # Save the first 10 catchments to a new GPKG file
# gdf.iloc[:10].to_file("england_catchments_only_10.gpkg", driver="GPKG")
