In [None]:
import geopandas as gpd

# Load the full GeoJSON
gdf = gpd.read_file("../../dataset/England.geojson")

# Check what columns exist
print(gdf.columns)

# Peek at unique values in name-like columns
for col in gdf.columns:
    # Check if the column is a geometry-type
    if gdf[col].dtype == "geometry-type":
        print(f"Column {col} is a geometry-type")
    # if gdf[col].dtype == object:


Index(['id', 'name', 'uri', 'water-body-type', 'geometry-type', 'geometry'], dtype='object')

Unique values in id:
['GB40702G503900' 'GB104027063930' 'GB107042016440' ... 'GB31229197'
 'GB30846165' 'GB30432621']

Unique values in name:
['Kent Romney Marsh' 'Dorts Dike  Catchment (trib of Wharfe)'
 'Whiteparish Trib' ... 'Rydal Water' 'Stannon Lake' 'Middle Redmires']

Unique values in uri:
['http://environment.data.gov.uk/catchment-planning/so/WaterBody/GB40702G503900'
 'http://environment.data.gov.uk/catchment-planning/so/WaterBody/GB104027063930'
 'http://environment.data.gov.uk/catchment-planning/so/WaterBody/GB107042016440'
 ...
 'http://environment.data.gov.uk/catchment-planning/so/WaterBody/GB31229197'
 'http://environment.data.gov.uk/catchment-planning/so/WaterBody/GB30846165'
 'http://environment.data.gov.uk/catchment-planning/so/WaterBody/GB30432621']

Unique values in water-body-type:
['{ "string": "Groundwater Body", "lang": "en" }'
 '{ "string": "River", "lang": "en" }'
 '{

In [15]:
for col in gdf.columns:
    # Check if the column is a geometry-type
    if col == "geometry-type":
        print(f"Column {col} is a geometry-type")
        # Split data into separate file by unique values of the column
        i = 1
        for value in gdf[col].unique():
            print(f"Processing {value}")
            # Filter the GeoDataFrame
            filtered_gdf = gdf[gdf[col] == value]
            # Save to a new GeoJSON file
            filtered_gdf.to_file(f"england_{i}.geojson", driver="GeoJSON")
            i+=1
    

Column geometry-type is a geometry-type
Processing http://environment.data.gov.uk/catchment-planning/def/geometry/Catchment
Processing http://environment.data.gov.uk/catchment-planning/def/geometry/RiverLine


In [None]:
# Split the GeoDataFrame into separate files by unique geometry-type
for geom_type in gdf.geometry.geom_type.unique():
    # Filter the GeoDataFrame for the current geometry type
    filtered_gdf = gdf[gdf.geometry.geom_type == geom_type]
    
    # Write the filtered GeoDataFrame to a new GeoJSON file
    output_file = f"England_{geom_type}.geojson"
    filtered_gdf.to_file(output_file, driver="GeoJSON")
    print(f"Saved {len(filtered_gdf)} features of type {geom_type} to {output_file}")

Saved 446 features of type MultiPolygon to England_MultiPolygon.geojson
Saved 4327 features of type Polygon to England_Polygon.geojson
Saved 3928 features of type MultiLineString to England_MultiLineString.geojson


In [20]:
gdf = gpd.read_file("../../dataset/England.geojson")
# Keep only geometries where geometry-type = Catchment (i.e., polygon boundaries)
gdf = gdf[gdf['geometry-type'].str.endswith("/Catchment", na=False)]

# Optionally exclude groundwater if you only want surface catchments
# gdf = gdf[~gdf['water-body-type'].str.contains("Groundwater", na=False)]

# Optional: Drop unnecessary columns
gdf = gdf[['id', 'name', 'geometry']]

# Save the cleaned catchments to GeoJSON
# gdf.to_file("england_catchments_only.geojson", driver="GeoJSON")

# for tol in [0.1, 0.01, 0.001]:
#     temp_gdf = gdf.copy()
#     temp_gdf["geometry"] = temp_gdf["geometry"].simplify(tolerance=tol, preserve_topology=True)
#     temp_gdf.iloc[:1].to_file(f"england_catchments_only_{tol}.geojson", driver="GeoJSON")
#     # gdf.to_file(f"catchments_simplified_{tol}.geojson", driver="GeoJSON")
gdf["geometry"] = gdf["geometry"].simplify(tolerance=0.01, preserve_topology=True)
# Save the first 10 catchments to a new GeoJSON file
gdf.to_file("england_catchments_only.geojson", driver="GeoJSON")
# # Save the first 10 catchments to a new shapefile
# gdf.iloc[:10].to_file("england_catchments_only_10.shp", driver="ESRI Shapefile")
# # Save the first 10 catchments to a new GPKG file
# gdf.iloc[:10].to_file("england_catchments_only_10.gpkg", driver="GPKG")
