# Generating Flood Percent Rasters from Resampled Binary Flood Extent Maps

This notebook demonstrates downloading a flood extent from FIM-C Benchmark STAC and generating a resampled raster where each pixel is percent flooded from original raster. The raster is aligned to the model CONUS grid. 

A CLI tool is shown. After, the steps of the CLI script are demonstrated as cells. Finally, alternative cases for CLI to be used for test cases are shown at the bottom.

In [23]:
import codecs
import os
import pathlib
import subprocess

import numpy as np
import rasterio
import rioxarray
from dotenv import load_dotenv
from rasterio import shutil as rio_shutil
from rasterio.enums import Resampling
from rasterio.vrt import WarpedVRT

from trainer.datasets.utils_stac import STACReader

load_dotenv()

True

In [None]:
cwd = pathlib.Path(os.getcwd())

stac_collection = "gfm-collection"
stac_item_id = "DFO-5074_tile-S1A_IW_GRDH_1SDV_20210518T001113_20210518T001138_037935_047A1E_FB14"
stac_asset_id = "E087N027T3_Observed_Flood_Extent"


conus_grid_path = "s3://fim-services-data/f1/data/conus.tif"
conus_grid_local = "./data/conus_grid_temp.tif"
output_resolution = 250

download_path = cwd / "data/flood/download"
processing_path = cwd / "data/flood/processed"

temp_ras = processing_path / "temp_percent.tif"
temp_ras_aligned = processing_path / "temp_aligned.tif"
final = processing_path / f"flood_percent_{stac_asset_id}.tif"

if not os.path.exists(cwd / "data"):
    os.mkdir("./data")

if not os.path.exists(download_path):
    os.makedirs(download_path)

if not os.path.exists(processing_path):
    os.makedirs(processing_path)

# check that local CONUS grid exists, if not download it for CLI script
try:
    temp_conus = rioxarray.open_rasterio(conus_grid_local)
except Exception: # noqa: BLE001
    temp_conus = rioxarray.open_rasterio(conus_grid_path)
    temp_conus.rio.to_raster(conus_grid_local)
    print("Downloaded and saved local CONUS grid")

In [3]:
# init a STAC reader
reader = STACReader(bucket="fimc-data", catalog="s3://fimc-data/benchmark/stac-bench-cat/catalog.json")

In [5]:
# save a flood extent
# can take some time: ~1 min
stac_item = reader.read_stac(
    collection=stac_collection, item_id=stac_item_id, asset_id=stac_asset_id, output_dir=download_path
)

Downloaded fimc-data/benchmark/rs/gfm/5074/S1A_IW_GRDH_1SDV_20210518T001113_20210518T001138_037935_047A1E_FB14/NA_E087N027T3_ENSEMBLE_FLOOD_20210518T001113_VV_NA020M_E087N027T3_20210518.tif to /home/quercus.hamlin/Documents/code/f1_trainer/examples/data/flood/download/NA_E087N027T3_ENSEMBLE_FLOOD_20210518T001113_VV_NA020M_E087N027T3_20210518.tif


## Run from CLI
Run the full process directly from CLI

Location: `f1_trainer/scripts/flood_percent_raster.py`

In [None]:
# A raster aligned to grid at 250 m resolution
cmd = f"python ../scripts/flood_percent_raster.py {stac_item} {final} --grid {conus_grid_local} --resolution 250 --overwrite".split()

output = subprocess.run(cmd, shell=False, capture_output=True)
print(output)
print(codecs.decode(output.stdout, "utf8"))

CompletedProcess(args=['python', '../scripts/flood_percent_raster.py', '/home/quercus.hamlin/Documents/code/f1_trainer/examples/data/flood/download/NA_E087N027T3_ENSEMBLE_FLOOD_20210518T001113_VV_NA020M_E087N027T3_20210518.tif', '/home/quercus.hamlin/Documents/code/f1_trainer/examples/data/flood/processed/flood_percent_E087N027T3_Observed_Flood_Extent.tif', '--grid', './data/conus_grid_temp2.tif', '--resolution', '250', '-o'], returncode=0, stdout=b'Reprojected raster to grid CRS\nClipped raster to final extent\nRemoved temporary /home/quercus.hamlin/Documents/code/f1_trainer/examples/data/flood/processed/temp_raster.tif\nRemoved temporary /home/quercus.hamlin/Documents/code/f1_trainer/examples/data/flood/processed/temp_raster_aligned.tif\n', stderr=b'')
Reprojected raster to grid CRS
Clipped raster to final extent
Removed temporary /home/quercus.hamlin/Documents/code/f1_trainer/examples/data/flood/processed/temp_raster.tif
Removed temporary /home/quercus.hamlin/Documents/code/f1_train

## Demonstrate steps of CLI
Breaks CLI script into segments to walk through process

In [None]:
# get the grid information
with rasterio.open(conus_grid_path) as src:
    conus_width, conus_height = src.width, src.height
    conus_transform = src.transform
    conus_crs = src.crs

In [None]:
# open STAC item and reproject to grid CRS
ras_in = rioxarray.open_rasterio(stac_item)
ras_working = ras_in.rio.reproject(conus_crs)
del ras_in

In [None]:
def calculate_flood_percentage(raster, target_resolution=250):
    """Converts a flood extent raster to a raster where each pixel represents % of pixels flooded when the
    dataset is resampled.

    Args:
        raster (xr.DataArray): Raster of flood extent (binary)
        target_resolution (int | float, optional): Output raster cell resolution to resample to. Defaults to 250.

    Returns
    -------
        xr.DataArray: Resampled raster representing percent flooded
    """
    
    # Get the current resolution and dimensions
    current_res_x, current_res_y = raster.rio.resolution()
    current_width = raster.rio.width
    current_height = raster.rio.height

    # Calculate the scale factor
    scale_x = abs(target_resolution / current_res_x)
    scale_y = abs(target_resolution / current_res_y)

    # Calculate new dimensions
    new_width = int(current_width / scale_x)
    new_height = int(current_height / scale_y)

    # Create an empty array for our percentages
    percentages = np.zeros((raster.rio.count, new_height, new_width))

    # Get the data as a numpy array
    data = raster.values

    # For each band
    for b in range(raster.rio.count):
        # Get the band data
        band_data = data[b]

        # Identify pixels with value 1 (flooded)
        binary_mask = (band_data == 1).astype(np.float32)

        # Calculate percentage for each block
        for i in range(new_height):
            for j in range(new_width):
                # Calculate corresponding indices in the original raster
                start_y = int(i * scale_y)
                end_y = int(min((i + 1) * scale_y, current_height))
                start_x = int(j * scale_x)
                end_x = int(min((j + 1) * scale_x, current_width))

                # Extract the block from the original data
                block = binary_mask[start_y:end_y, start_x:end_x]

                # Calculate total number of valid pixels in the block
                total_pixels = block.size

                # Skip if no valid pixels (avoid division by zero)
                if total_pixels == 0:
                    percentages[b, i, j] = np.nan
                    continue

                # Calculate percentage of flooded pixels
                flood_count = np.sum(block)
                percentages[b, i, j] = (flood_count / total_pixels) * 100

    # Create a new raster with the percentages
    try:
        percentage_raster = raster.rio.reproject(
            raster.rio.crs, shape=(new_height, new_width), resampling=Resampling.nearest
        )
    except ZeroDivisionError as e:
        raise ValueError(
            "There is a problem with the target resolution. Are you using a geographic CRS with resolutions < 1 degree? Try a different resolution."
        ) from e

    # Replace the values with our percentages
    percentage_raster.values = percentages

    return percentage_raster

In [None]:
target_resolution = output_resolution

# Apply the function to your reprojected raster
flood_percent_250m = calculate_flood_percentage(ras_working, target_resolution=target_resolution)

# save temp raster
flood_percent_250m.rio.to_raster(temp_ras)

# Print some statistics
total_flooded_pixels_original = (ras_working.values == 1).sum()

# Calculate the equivalent number of original pixels from the percentage data
# Each target pixel represents (scale_x * scale_y) original pixels
current_res_x, current_res_y = ras_working.rio.resolution()
scale_x = abs(target_resolution / current_res_x)
scale_y = abs(target_resolution / current_res_y)
pixels_per_target = scale_x * scale_y

# For each target pixel, its percentage tells us how many original pixels were flooded
# Sum up (percentage/100 * pixels_per_target) for all target pixels
equivalent_flooded_pixels = np.nansum(flood_percent_250m.values / 100 * pixels_per_target)

print(f"Total flooded pixels in original: {total_flooded_pixels_original}")
print(f"Equivalent flooded pixels from percentage raster: {equivalent_flooded_pixels:.1f}")
print(f"Difference: {equivalent_flooded_pixels - total_flooded_pixels_original:.1f} pixels")
print(
    f"Percentage difference: {((equivalent_flooded_pixels - total_flooded_pixels_original) / total_flooded_pixels_original) * 100:.2f}%"
)

Total flooded pixels in original: 268862
Equivalent flooded pixels from percentage raster: 268562.5
Difference: -299.5 pixels
Percentage difference: -0.11%


In [None]:
# align flood ras  to conus - this will align grid and expand the raster to match conus size
vrt_options = {
    "resampling": Resampling.nearest,  # nearest resampling to preserve % value
    "crs": conus_crs,
    "transform": conus_transform,
    "height": conus_height,
    "width": conus_width,
}

with rasterio.open(temp_ras) as src:
    with WarpedVRT(src, dtype="float32", **vrt_options) as vrt:
        rio_shutil.copy(vrt, temp_ras_aligned, driver="GTiff", tiled="YES", compress="LZW")

In [None]:
# clip flood back to original extent. Keps conus grid alignment
cmd = f"rio clip {temp_ras_aligned} {final} --like {stac_item} --overwrite".split()

subprocess.run(cmd, shell=False, capture_output=True)

0

In [None]:
# cleanup
temp_files = [temp_ras, temp_ras_aligned]
for f in temp_files:
    if os.path.exists(f):
        os.remove(f)

## Other cases for CLI
Demonstrates other uses from CLI and outputs generated. These can be used to generate tests cases.

In [33]:
# A raster re-projected but not aligned to grid - should succeed, but result will not match grid alignment
final = processing_path / f"flood_percent_{stac_asset_id}_no_grid.tif"
cmd = f"python ../scripts/flood_percent_raster.py {stac_item} {final} --crs 6350 --resolution 250 --overwrite".split()

output = subprocess.run(cmd, shell=False, capture_output=True)
print(output)
print(codecs.decode(output.stdout, "utf8"))

CompletedProcess(args=['python', '../scripts/flood_percent_raster.py', '/home/quercus.hamlin/Documents/code/f1_trainer/examples/data/flood/download/NA_E087N027T3_ENSEMBLE_FLOOD_20210518T001113_VV_NA020M_E087N027T3_20210518.tif', '/home/quercus.hamlin/Documents/code/f1_trainer/examples/data/flood/processed/flood_percent_E087N027T3_Observed_Flood_Extent_no_grid.tif', '--crs', '6350', '--resolution', '250', '--overwrite'], returncode=0, stdout=b'Reprojected raster to new CRS\nRemoved temporary /home/quercus.hamlin/Documents/code/f1_trainer/examples/data/flood/processed/temp_raster.tif\n', stderr=b'')
Reprojected raster to new CRS
Removed temporary /home/quercus.hamlin/Documents/code/f1_trainer/examples/data/flood/processed/temp_raster.tif



In [35]:
# A raster not reprojected that has a resolution too big for its input resolution (0.00003) and causes a reproject error
# Should fail
final = processing_path / f"flood_percent_{stac_asset_id}_bad_resolution.tif"
cmd = f"python ../scripts/flood_percent_raster.py {stac_item} {final} --resolution 250 --overwrite".split()

output = subprocess.run(cmd, shell=False, capture_output=True)
print(output)
print(codecs.decode(output.stderr, "utf8"))

CompletedProcess(args=['python', '../scripts/flood_percent_raster.py', '/home/quercus.hamlin/Documents/code/f1_trainer/examples/data/flood/download/NA_E087N027T3_ENSEMBLE_FLOOD_20210518T001113_VV_NA020M_E087N027T3_20210518.tif', '/home/quercus.hamlin/Documents/code/f1_trainer/examples/data/flood/processed/flood_percent_E087N027T3_Observed_Flood_Extent_bad_resolution.tif', '--resolution', '250', '--overwrite'], returncode=1, stdout=b'', stderr=b'Traceback (most recent call last):\n  File "/home/quercus.hamlin/Documents/code/f1_trainer/examples/../scripts/flood_percent_raster.py", line 81, in calculate_flood_percentage\n    percentage_raster = raster.rio.reproject(\n                        ^^^^^^^^^^^^^^^^^^^^^\n  File "/home/quercus.hamlin/Documents/code/f1_trainer/.venv/lib/python3.11/site-packages/rioxarray/raster_array.py", line 470, in reproject\n    dst_affine, dst_width, dst_height = _make_dst_affine(\n                                        ^^^^^^^^^^^^^^^^^\n  File "/home/quercu

In [38]:
# Trying to overwrite a raster without specifying --overwrite 1
# Should fail
final = processing_path / f"flood_percent_{stac_asset_id}_no_overwrite.tif"
f = open(final, "w")

cmd = f"python ../scripts/flood_percent_raster.py {stac_item} {final} --grid {conus_grid_local} --resolution 250".split()

output = subprocess.run(cmd, shell=False, capture_output=True)
print(output)
print(codecs.decode(output.stderr, "utf8"))

CompletedProcess(args=['python', '../scripts/flood_percent_raster.py', '/home/quercus.hamlin/Documents/code/f1_trainer/examples/data/flood/download/NA_E087N027T3_ENSEMBLE_FLOOD_20210518T001113_VV_NA020M_E087N027T3_20210518.tif', '/home/quercus.hamlin/Documents/code/f1_trainer/examples/data/flood/processed/flood_percent_E087N027T3_Observed_Flood_Extent_no_overwrite.tif', '--grid', './data/conus_grid_temp2.tif', '--resolution', '250'], returncode=1, stdout=b'', stderr=b'Traceback (most recent call last):\n  File "/home/quercus.hamlin/Documents/code/f1_trainer/examples/../scripts/flood_percent_raster.py", line 183, in <module>\n    generate_flood_percent()\n  File "/home/quercus.hamlin/Documents/code/f1_trainer/.venv/lib/python3.11/site-packages/click/core.py", line 1161, in __call__\n    return self.main(*args, **kwargs)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File "/home/quercus.hamlin/Documents/code/f1_trainer/.venv/lib/python3.11/site-packages/click/core.py", line 1082, in main\n   