# Pre-processing Static Inputs - CONUS

In [None]:
import math
import os

import geopandas as gpd
import numpy as np
import rasterio
import requests
from dotenv import load_dotenv
from rasterio import shutil as rio_shutil
from rasterio.enums import Resampling
from rasterio.merge import merge
from rasterio.vrt import WarpedVRT
from rasterio.warp import calculate_default_transform

load_dotenv()

# Static Input: Surface Water Extent

From https://global-surface-water.appspot.com/download

Surface water extent is served as 10x10 degree tiles. Here, we generate the list of intersecting tiles to cover the geometry of our extent. Tiles are downloaded, reprojected, resampled, merged, and aligned to model grid. Rasterio's Warped VRT is used to convert between initial raster and conus grid.

In [None]:
# url sample: https://storage.googleapis.com/global-surface-water/downloads2021/extent/extent_80W_50Nv1_4_2021.tif
url_base = (
    "https://storage.googleapis.com/global-surface-water/downloads2021/extent/extent_lon_latv1_4_2021.tif"
)
conus_geom = "s3://fim-services-data/f1/data/conus_geom.gpkg"
conus_grid_path = "s3://fim-services-data/f1/data/conus.tif"
conus_grid_local = "./data/conus_grid_temp.tif"

surface_extent_crs = 4326
output_crs = 6350
output_resolution = 250
temp_extent_path = "./data/extent/surface_extent_merged.tif"
output_extent_path = "./data/extent/surface_extent.tif"

extent_download_path = "./data/extent/download/"
extent_processing_path = "./data/extent/processed/"

if not os.path.exists("./data"):
    os.mkdir("./data")

if not os.path.exists(extent_download_path):
    os.mkdir(extent_download_path)

if not os.path.exists(extent_processing_path):
    os.mkdir(extent_processing_path)

In [None]:
# helper functions to download surface water extent
# tiles are named by upper left corner in latlon


def build_extent_url(lat: float, lon: float) -> str:
    """Building url"""
    lat = f"{abs(lat)}N" if lat >= 0 else f"{abs(lat)}S"
    lon = f"{abs(lon)}E" if lon >= 0 else f"{abs(lon)}W"
    return f"https://storage.googleapis.com/global-surface-water/downloads2021/extent/extent_{lon}_{lat}v1_4_2021.tif"


# we need to round up for positives and round down for negatives to get full extents
def roundup(i, to):
    """Rounds up for positives"""
    return int(math.ceil(i / to) * to)


def rounddown(i, to):
    """Rounds down for positives"""
    return int(math.floor(i / to) * to)


def get_rounded_values(values):
    """Rounds the values"""
    return [roundup(i, 10) if i > 0 else rounddown(i, 10) for i in values]


def get_steps(minv, maxv, step):
    """Make the range inclusive of the max value (arange is exclusive)"""
    if (maxv / step).is_integer():
        return np.arange(minv, maxv + 1, step)
    else:
        return np.arange(minv, maxv, step)


def build_full_urls(miny, minx, maxy, maxx) -> list[str]:
    """Building url"""
    full_extent = [miny, minx, maxy, maxx]
    rounded_extent = get_rounded_values(full_extent)
    steps_lat = get_steps(rounded_extent[0], rounded_extent[2], 10)
    steps_lon = get_steps(rounded_extent[1], rounded_extent[3], 10)

    urls = []
    for lon in steps_lon:
        for lat in steps_lat:
            urls.append(build_extent_url(lat, lon))

    return urls

In [None]:
# import conus for extent
gdf = gpd.read_file(conus_geom)

# convert to CRS of dataset, extract bounds
gdf = gdf.to_crs(surface_extent_crs)
extent = gdf.bounds
minx, miny, maxx, maxy = (
    extent["minx"].min(),
    extent["miny"].min(),
    extent["maxx"].max(),
    extent["maxy"].max(),
)

In [None]:
# download tiles
# download flag - set to false if re-running and don't need to download
download: bool = False

urls = build_full_urls(miny, minx, maxy, maxx)
fnames = [f"{extent_download_path}{url.split('/')[-1]}" for url in urls]

if download:
    for url, fname in zip(urls, fnames, strict=False):
        response = requests.get(url)
        with open(fname, "wb") as f:
            f.write(response.content)
else:
    print("No download performed")

In [None]:
# reproject and resample
# TODO: would prefer to merge in native CRS and do all resampling as warped VRT, but having memory issues because native is ~30 m

output_names = []
for f in fnames:
    name = f.split("/")[-1]
    output_names.append(f"{extent_processing_path}{name.split('.')[0]}_250.tif")

# loop through tiles, only run if file not exists
for f, output_name in zip(fnames, output_names, strict=False):
    if not os.path.isfile(output_name):
        with rasterio.open(f, "r") as src:
            crs = src.crs
            width, height = src.width, src.height
            bounds = src.bounds

        # this calculates what the bounds and transform would be of the input dataset in a different CRS
        transform, width, height = calculate_default_transform(
            src_crs=crs,
            dst_crs=output_crs,
            width=width,
            height=height,
            left=bounds.left,
            right=bounds.right,
            top=bounds.top,
            bottom=bounds.bottom,
            resolution=output_resolution,
        )

        print(f"Starting warped VRT resampling for {output_name}")
        # warped VRT resamples and reprojects using float datatype to preserver 0 - 1 flood extent
        with rasterio.open(f) as src:
            with WarpedVRT(
                src,
                crs=output_crs,
                resampling=Resampling.bilinear,
                dtype="float32",
                height=height,
                width=width,
                transform=transform,
            ) as vrt:
                outfile = output_name
                rio_shutil.copy(vrt, outfile, driver="GTiff", tiled="YES", compress="LZW")

In [None]:
# merge rasters
dest, out_affine = merge(output_names, dtype="float32", resampling=Resampling.bilinear)

# save merged raster
with rasterio.open(
    temp_extent_path,
    "w",
    driver="GTiff",
    height=int(dest.shape[1]),
    width=int(dest.shape[2]),
    count=1,
    dtype=dest.dtype,
    crs=output_crs,
    transform=out_affine,
    tiled="YES",
    compress="LZW",
) as dst:
    dst.write(dest)

In [None]:
# get CONUS grid profile
with rasterio.open(conus_grid_path) as src:
    bounds = src.bounds
    width, height = src.width, src.height
    transform = src.transform

vrt_options = {
    "resampling": Resampling.bilinear,
    "crs": output_crs,
    "transform": transform,
    "height": height,
    "width": width,
}

In [None]:
# warp to CONUS grid CRS, resolution, and alignment
with rasterio.open(temp_extent_path) as src:
    with WarpedVRT(src, dtype="float32", **vrt_options) as vrt:
        rio_shutil.copy(vrt, output_extent_path, driver="GTiff", tiled="YES", compress="LZW")

# Static Input: Flow Direction / Flow Accumulation

Flow direction / accumulation from hydrosheds https://www.hydrosheds.org/hydrosheds-core-downloads

NOTE: For this MVP, we are resampling (changing resolutions) and aligning to CONUS grid. This is may not be an accurate representation of flow direction and flow accumulation because they are landscape connected variables. Resampling will assign a pixel value based on surrounding values and may break up the connectivity of a landscape.

Future enhancemnt is to research developing our own FA/FD grid at our model's resolution

In [None]:
output_crs = 6350
output_resolution = 250
conus_grid_path = "s3://fim-services-data/f1/data/conus.tif"

flow_acc_path = "s3://fim-services-data/f1/data/inputs/flow-accumulation-direction/raw/hyd_na_acc_15s.tif"
flow_dir_path = "s3://fim-services-data/f1/data/inputs/flow-accumulation-direction/raw/hyd_na_dir_15s.tif"

# output
fa_output = "./data/flow/flow_accumulation.tif"
fd_output = "./data/flow/flow_direction.tif"

if not os.path.exists("./data"):
    os.mkdir("./data")

if not os.path.exists("./data/flow"):
    os.mkdir("./data/flow")

In [None]:
# get CONUS grid profile
with rasterio.open(conus_grid_path) as src:
    bounds = src.bounds
    width, height = src.width, src.height
    transform = src.transform

vrt_options = {
    "resampling": Resampling.bilinear,
    "crs": output_crs,
    "transform": transform,
    "height": height,
    "width": width,
}

In [None]:
# warp to CONUS grid CRS, resolution, and alignment
with rasterio.open(flow_acc_path) as src:
    with WarpedVRT(src, dtype="float32", **vrt_options) as vrt:
        rio_shutil.copy(vrt, fa_output, driver="GTiff", tiled="YES", compress="LZW")

In [None]:
# warp to CONUS grid CRS, resolution, and alignment
with rasterio.open(flow_dir_path) as src:
    with WarpedVRT(src, dtype="uint8", **vrt_options) as vrt:
        rio_shutil.copy(vrt, fd_output, driver="GTiff", tiled="YES", compress="LZW")