# Filter GIRI Flood Maps for Brazil

Simple clip to Brazil boundaries - no aggregation, maintains original resolution.

In [1]:
import os
import glob
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from pathlib import Path
import numpy as np

# Define paths
project_root = Path().resolve().parent
giri_raw_dir = project_root / 'workspace' / 'GIRI_raw'
# Use ADM2 boundaries to ensure all municipalities (including Fernando de Noronha) are included
# ADM1 boundaries only extend to -34.66°E, but Fernando de Noronha extends to -32.38°E
brazil_boundaries_file = project_root / 'workspace' / "Brazil Borders" / "geoBoundaries-BRA-ADM0-all" / "geoBoundaries-BRA-ADM0.shp"
output_dir = giri_raw_dir / 'brazil'
output_dir.mkdir(exist_ok=True)

# Load Brazil boundaries (ADM2 includes all municipalities including Fernando de Noronha)
brazil_gdf = gpd.read_file(brazil_boundaries_file)
brazil_union = brazil_gdf.geometry.union_all()

buffer_km = 50
buffer_m = buffer_km * 1000

print(f"\nApplying {buffer_km}km buffer...")
# Project to metric CRS (EPSG:3857) for accurate buffering in meters
brazil_metric = gpd.GeoSeries([brazil_union], crs=brazil_gdf.crs).to_crs("EPSG:3857")
brazil_buffered_metric = brazil_metric.buffer(buffer_m)
# Project back to original CRS
brazil_buffered = brazil_buffered_metric.to_crs(brazil_gdf.crs)
brazil_union = brazil_buffered.iloc[0]

print(f"Buffered bounds: {brazil_buffered.total_bounds}")

# Verify the bounds include Fernando de Noronha
print(f"Brazil boundaries bounds: {brazil_gdf.total_bounds}")
print(f"Easternmost longitude: {brazil_gdf.total_bounds[2]}")
print(f"(Fernando de Noronha is at ~-32.38°E, so this should be >= -32.38)")

# Find all TIFF files
tif_files = sorted(glob.glob(str(giri_raw_dir / '*.tif')))

print(f'Found {len(tif_files)} TIFF files to process')

# Process each file
for tif_file in tif_files:
    tif_name = Path(tif_file).name
    output_file = output_dir / tif_name
    
    if output_file.exists():
        print(f'Skipping {tif_name} (already exists)')
        continue
    
    print(f'Processing: {tif_name}')
    
    # Get original file size
    original_size_gb = os.path.getsize(tif_file) / (1024**3)
    print(f'   Original size: {original_size_gb:.2f} GB')
    
    with rasterio.open(tif_file) as src:
        print(f'   Original dimensions: {src.width} x {src.height}')
        print(f'   Original bounds: {src.bounds}')
        print(f'   Original dtype: {src.dtypes[0]}')
        print(f'   Original nodata: {src.nodata}')
        
        # Reproject Brazil geometry to match raster CRS
        if brazil_gdf.crs != src.crs:
            brazil_reprojected = gpd.GeoSeries([brazil_union], crs=brazil_gdf.crs).to_crs(src.crs)
            brazil_geom = brazil_reprojected.iloc[0]
        else:
            brazil_geom = brazil_union
        
        # Clip the raster - crop=True reduces extent, filled=False preserves nodata
        # This keeps EXACT same values, just crops spatial extent
        out_image, out_transform = mask(src, [brazil_geom], crop=True, filled=False)
        
        print(f'   Clipped dimensions: {out_image.shape[2]} x {out_image.shape[1]}')
        
        # Preserve ALL original metadata for exact value reproduction
        out_meta = src.meta.copy()
        out_meta.update({
            'driver': 'GTiff',
            'height': out_image.shape[1],
            'width': out_image.shape[2],
            'transform': out_transform,
            # Compression settings - these don't change values, just file size
            'compress': 'DEFLATE',
            'predictor': 2,  # Works with DEFLATE for numeric data
            'tiled': True,
            'blockxsize': 256,
            'blockysize': 256,
            'ZLEVEL': 9
        })
        
        # Write with exact same data type and nodata value
        with rasterio.open(output_file, 'w', **out_meta) as dst:
            dst.write(out_image)
    
    # Verify the output
    print(f'   Verifying output integrity...')
    with rasterio.open(output_file) as verify:
        print(f'   Output dtype: {verify.dtypes[0]} (should match original)')
        print(f'   Output nodata: {verify.nodata} (should match original)')
        
        # Read a sample to check value range
        sample = verify.read(1)
        valid_data = sample[~np.isnan(sample)] if np.isnan(verify.nodata or np.nan) else sample[sample != verify.nodata]
        if len(valid_data) > 0:
            print(f'   Data range: [{valid_data.min():.6f}, {valid_data.max():.6f}]')
    
    # Get output file size
    output_size_gb = os.path.getsize(output_file) / (1024**3)
    print(f'   Clipped size: {output_size_gb:.2f} GB')
    
    if output_size_gb < original_size_gb:
        reduction_pct = (1 - output_size_gb / original_size_gb) * 100
        print(f'   Size reduction: {reduction_pct:.1f}%')
    else:
        increase_pct = (output_size_gb / original_size_gb - 1) * 100
        print(f'   WARNING: Size increased by {increase_pct:.1f}%!')
    
    print(f'   ✓ Saved to: {output_file}')
    print()

print('Processing complete')
print(f'Output files saved in: {output_dir}')



Applying 50km buffer...
Buffered bounds: [-74.43960587 -34.12331173 -28.40041271   5.71770308]
Brazil boundaries bounds: [-73.99046792 -33.75076853 -28.84916792   5.27113147]
Easternmost longitude: -28.849167916992187
(Fernando de Noronha is at ~-32.38°E, so this should be >= -32.38)
Found 24 TIFF files to process
Skipping flood_pc_100_glob.tif (already exists)
Processing: flood_pc_10_glob.tif
   Original size: 3.18 GB
   Original dimensions: 432000 x 216000
   Original bounds: BoundingBox(left=-180.0, bottom=-90.0, right=180.0, top=90.0)
   Original dtype: uint32
   Original nodata: 0.0
   Clipped dimensions: 55248 x 47810
   Verifying output integrity...
   Output dtype: uint32 (should match original)
   Output nodata: 0.0 (should match original)
   Data range: [0.000000, 10428.000000]
   Clipped size: 0.18 GB
   Size reduction: 94.5%
   ✓ Saved to: /Users/bertrandgallice/code/Theia-Finance-Labs/climate.risk.tool/workspace/GIRI_raw/brazil/flood_pc_10_glob.tif

Skipping flood_pc_200_

In [5]:
import os
import numpy as np
import xarray as xr
import rioxarray as rxr  # make sure: pip/conda install rioxarray

folderr_brazil = "../workspace/GIRI_raw/brazil"

# ---------------- config ----------------
scenarios = ["pc", "rcp85", "rcp26"]
return_periods = [500, 200, 100, 50, 25, 5, 2]


out_nc  = "../workspace/Flood/depth/ensemble_return_period.nc"

# map to human-readable names (as a coord)
dict_scenarios = {
    "pc": "Present Climate",
    "rcp85": "SSP5-8.5",
    "rcp26": "SSP1-2.6",
}

# crop extent (lon_min, lon_max, lat_max, lat_min)
LON_MIN, LON_MAX = -75.0, -30.0
LAT_MAX, LAT_MIN = 10.0, -35.0

# chunk sizes for dask/xarray and NetCDF
CHUNK_LAT, CHUNK_LON = 512, 512

# ----------------------------------------
list_da = []

for scenario in scenarios:
    for rp in return_periods:
        fpath = os.path.join(folderr_brazil, f"flood_{scenario}_{rp}_glob.tif")
        if not os.path.exists(fpath):
            print(f"[WARN] Missing: {fpath} (skipping)")
            continue

        print(f"[INFO] Opening: {fpath}")
        try:
            da = rxr.open_rasterio(
                fpath,
                chunks={"x": CHUNK_LON, "y": CHUNK_LAT},
                masked=True,
            )
        except Exception as e:
            print(f"[ERROR] Failed to open {fpath}: {e}")
            continue

        # squeeze band -> 2D (y,x)
        if "band" in da.dims and da.sizes["band"] == 1:
            da = da.squeeze("band", drop=True)

        # ensure CRS; many global hazard tifs are EPSG:4326
        if da.rio.crs is None:
            da = da.rio.write_crs("EPSG:4326")

        # crop (note: y usually descends; slice handles either order)
        da = da.sel(x=slice(LON_MIN, LON_MAX), y=slice(LAT_MAX, LAT_MIN))

        # rename dims/var
        da = da.rename({"x": "lon", "y": "lat"})
        da.name = "flood_depth"

        # set dtype (float32 is compact and typical for depths)
        da = da.astype("float32")

        # expand coords with scenario + return_period
        da = da.expand_dims(
            {
                "GWL": [dict_scenarios.get(scenario, scenario)],
                "return_period": [rp],
                "ensemble": ["mean"],
            }
        )

        list_da.append(da)

# guard: anything loaded?
if not list_da:
    raise RuntimeError("No rasters were found/loaded. Check file names and folder.")

# combine by coords into one cube
print("[INFO] Combining arrays…")
da_all = xr.combine_by_coords(list_da, combine_attrs="override")

# order dims
da_all = da_all.transpose("ensemble","GWL", "return_period", "lat", "lon")

# set some attrs
da_all.attrs.update(
    {
        "long_name": "Flood depth",
        "units": "m",
        "source": "UNEP GRID hazards data",
        "note": "Cropped to South America; scenarios labelled by human-readable names.",
    }
)
da_all["GWL"].attrs["description"] = "Scenario label"
da_all["return_period"].attrs["units"] = "years"

# rechunk uniformly for writing
da_all = da_all.chunk({"ensemble": 1, "GWL": 1, "return_period": 1, "lat": CHUNK_LAT, "lon": CHUNK_LON})



[INFO] Opening: ../workspace/GIRI_raw/brazil/flood_pc_500_glob.tif
[INFO] Opening: ../workspace/GIRI_raw/brazil/flood_pc_200_glob.tif
[INFO] Opening: ../workspace/GIRI_raw/brazil/flood_pc_100_glob.tif
[INFO] Opening: ../workspace/GIRI_raw/brazil/flood_pc_50_glob.tif
[INFO] Opening: ../workspace/GIRI_raw/brazil/flood_pc_25_glob.tif
[INFO] Opening: ../workspace/GIRI_raw/brazil/flood_pc_5_glob.tif
[INFO] Opening: ../workspace/GIRI_raw/brazil/flood_pc_2_glob.tif
[INFO] Opening: ../workspace/GIRI_raw/brazil/flood_rcp85_500_glob.tif
[INFO] Opening: ../workspace/GIRI_raw/brazil/flood_rcp85_200_glob.tif
[INFO] Opening: ../workspace/GIRI_raw/brazil/flood_rcp85_100_glob.tif
[INFO] Opening: ../workspace/GIRI_raw/brazil/flood_rcp85_50_glob.tif
[INFO] Opening: ../workspace/GIRI_raw/brazil/flood_rcp85_25_glob.tif
[INFO] Opening: ../workspace/GIRI_raw/brazil/flood_rcp85_5_glob.tif
[INFO] Opening: ../workspace/GIRI_raw/brazil/flood_rcp85_2_glob.tif
[INFO] Opening: ../workspace/GIRI_raw/brazil/flood_rc

In [None]:
# encode & save
encoding = {
    "flood_depth": {
        "zlib": True,
        "complevel": 4,
        "dtype": "float32",
        "chunksizes": (1, 1, 1, CHUNK_LAT, CHUNK_LON),
        "_FillValue": np.float32(np.nan),  # keep NaN for no-data
    }
}


out_nc  = "../workspace/hazards/Flood/depth(cm)/ensemble_return_period.nc"
print(f"[INFO] Writing NetCDF -> {out_nc}")
# Create output directory if it doesn't exist
os.makedirs(os.path.dirname(out_nc), exist_ok=True)
# da_all is already a Dataset, so just call to_netcdf directly
da_all.to_netcdf(out_nc, encoding=encoding)
print("[OK] Done.")


[INFO] Writing NetCDF -> ../workspace/hazards/Flood/depth(cm)/ensemble_return_period.nc
