In [None]:
import geopandas as gpd
from pathlib import Path

# ----------------------------
# Paths
# ----------------------------
reaches_path = r"C:\UNESCO\Code\data\shp\EU\eu_sword_reaches_hb22_v17b.shp"
basin_path   = r"C:\UNESCO\Code\data\Dnipro_basin_shapefiles\mrb_basins.json"

out_dir = Path(r"C:\UNESCO\Code\data\Dnipro_basin_shapefiles\derived")
out_dir.mkdir(parents=True, exist_ok=True)

out_shp = out_dir / "dnipro_sword_reaches_clip.shp"     # Shapefile output
# out_gpkg = out_dir / "dnipro_sword_reaches_clip.gpkg" # Alternative (recommended for robustness)

# ----------------------------
# Read data
# ----------------------------
reaches = gpd.read_file(reaches_path)
basin = gpd.read_file(basin_path)

print("Reaches CRS:", reaches.crs)
print("Basin CRS:", basin.crs)

# ----------------------------
# Ensure basin has CRS; your earlier code suggests it should be EPSG:4326
# ----------------------------
if basin.crs is None:
    basin = basin.set_crs(epsg=4326)

# ----------------------------
# Make basin a single geometry (dissolve)
# ----------------------------
basin_union = basin.dissolve()  # 1-row GeoDataFrame
# (Optional) fix invalid geometries if needed
basin_union["geometry"] = basin_union.geometry.buffer(0)

# ----------------------------
# Reproject basin to reaches CRS (clip requires same CRS)
# ----------------------------
if reaches.crs is None:
    raise ValueError("Reaches layer has no CRS. Cannot safely clip without CRS.")

basin_union = basin_union.to_crs(reaches.crs)

# ----------------------------
# Clip
# ----------------------------
reaches_clip = gpd.clip(reaches, basin_union)

print("Input reaches:", len(reaches))
print("Clipped reaches:", len(reaches_clip))

# ----------------------------
# Save
# ----------------------------
reaches_clip.to_file(out_shp)
print("Wrote:", out_shp)

# If you prefer GeoPackage (often safer than Shapefile):
# reaches_clip.to_file(out_gpkg, layer="dnipro_reaches", driver="GPKG")
# print("Wrote:", out_gpkg)
