In [3]:
# 1) Setup
import numpy as np
import geopandas as gpd
from shapely.geometry import shape
import rasterio
from rasterio import features

# --- paths (edit if you like) ---
raster_path = r"C:\temp\timor_leste\impact_observatory\east-timor_20240101-20241231_data.tif"
out_shp     = r"C:\temp\timor_leste\cropland_mask.shp"   # output shapefile
# Optional: a GeoPackage alternative (handles big layers better)
# out_gpkg = r"C:\temp\timor_leste\cropland_mask.gpkg"

# 2) Read raster and create a binary cropland mask (values 4 or 5)
with rasterio.open(raster_path) as src:
    arr = src.read(1, masked=True)  # masked array respects nodata
    # True where pixel is 4 or 5, and not nodata
    cropland_mask = np.isin(arr, [4, 5]) & (~arr.mask)
    transform = src.transform
    crs = src.crs

# 3) Polygonize (only where mask is True)
# We polygonize a 0/1 array; pass the mask so only True areas are considered.
# features.shapes yields (geometry, value) pairs; we keep value==1
shapes_iter = features.shapes(
    cropland_mask.astype(np.uint8),
    mask=cropland_mask,
    transform=transform
)

geoms = []
for geom, val in shapes_iter:
    if val == 1:
        geoms.append(shape(geom))

# Safety check: if nothing found, stop early
if not geoms:
    raise ValueError("No polygons found where raster value == 4 or 5.")

# 4) Build GeoDataFrame (no class value kept; only an 'id' field)
gdf = gpd.GeoDataFrame(
    {"id": np.arange(len(geoms), dtype="int64")},
    geometry=geoms,
    crs=crs
)

# (Optional) remove tiny sliver polygons below N pixels worth of area.
# To use this, set your pixel size in map units from the transform.
# pixel_area = abs(transform.a * transform.e)  # typically positive area
# min_pixels = 4
# min_area = min_pixels * pixel_area
# gdf = gdf[gdf.area >= min_area]

# (Optional) fix invalid geometries (occasionally helps with complex rasters)
# gdf["geometry"] = gdf.buffer(0)

# 5) Save to Shapefile
gdf.to_file(out_shp)

# Or, as a single-layer GeoPackage:
# gdf.to_file(out_gpkg, layer="cropland_mask", driver="GPKG")

print(f"Done. Wrote {len(gdf)} polygons to:\n{out_shp}")


Done. Wrote 8688 polygons to:
C:\temp\timor_leste\cropland_mask.shp
