geopandas → read and manipulate polygon data

rasterio → read/write raster (GeoTIFF) data

rasterize → convert polygons to raster masks

Path → handle file paths cleanly

tqdm → progress bars

numpy → array operations

In [None]:
import geopandas as gpd
import rasterio
from rasterio.features import rasterize
from pathlib import Path
import numpy as np
from tqdm import tqdm


Path Configuration
Organizes all paths in one place.

Creates the mask output directory if it doesn’t exist.

In [None]:
# Base project directory (update to your machine/server path)
BASE_PATH = Path("/content/cafo_project")

# Input NAIP tiles and output mask directory
TILE_DIR = BASE_PATH / "data/updated_naip_tiles_2018"
OUTPUT_MASK_DIR = BASE_PATH / "results/updated_chesapeake_predictions_2018/masks"
OUTPUT_MASK_DIR.mkdir(parents=True, exist_ok=True)

# Path to poultry barn GeoJSON file
LABELS_PATH = Path("/home/grad20/drodriguez/ML/poultry-cafos/data/Delmarva_PL_House_Final2_epsg4326.geojson")


Reads the GeoJSON polygons.

Sets the coordinate reference system (CRS) to match the tiles.

In [None]:
# Load poultry barn polygons
print("📦 Loading poultry barn polygons...")
barns = gpd.read_file(LABELS_PATH)
barns = barns.to_crs("EPSG:4326")  # Ensure CRS matches the NAIP tiles


Converts polygons into binary masks.

Ensures masks match tile size, CRS, and alignment.

Handles tiles with no overlapping polygons.

In [None]:
def rasterize_tile_mask(tile_path, barns_gdf, out_path):
    """
    Convert polygons to a binary mask aligned with a raster tile.
    """
    with rasterio.open(tile_path) as src:
        transform = src.transform
        out_shape = (src.height, src.width)
        tile_crs = src.crs
        bounds = src.bounds

        # Reproject barns to tile CRS and clip to tile bounds
        barns_projected = barns_gdf.to_crs(tile_crs)
        from shapely.geometry import box
        tile_bounds_geom = box(*bounds)
        barns_clipped = barns_projected.clip(tile_bounds_geom)

        # Create mask: 1 where polygons exist, 0 elsewhere
        if barns_clipped.empty:
            mask = np.zeros(out_shape, dtype=np.uint8)
        else:
            mask = rasterize(
                [(geom, 1) for geom in barns_clipped.geometry],
                out_shape=out_shape,
                transform=transform,
                fill=0,
                dtype=np.uint8
            )

        # Save mask
        meta = src.meta.copy()
        meta.update({"count": 1, "dtype": "uint8"})
        with rasterio.open(out_path, "w", **meta) as dst:
            dst.write(mask, 1)


Loops over all tiles.

Calls the rasterization function for each tile.

Saves masks in the output directory with the same name as the original tile.

In [None]:
# Get all NAIP tiles
tile_paths = sorted(TILE_DIR.glob("*.tif"))
print(f"🗺️ Found {len(tile_paths)} tiles to process...")

# Rasterize each tile
for tile_path in tqdm(tile_paths, desc="Rasterizing masks"):
    tile_name = tile_path.name
    mask_path = OUTPUT_MASK_DIR / tile_name  # same name as tile
    rasterize_tile_mask(tile_path, barns, mask_path)

print("\n✅ Rasterization complete. Binary masks saved to:", OUTPUT_MASK_DIR)
