In [1]:
# batch_resample_to_reference_paths.py
# -----------------------------------------------------------
# Edit the PATHS section below (already filled with your paths), then run:
#   python batch_resample_to_reference_paths.py
# -----------------------------------------------------------

# ============== PATHS (EDIT THESE IF NEEDED) ==============
IN_DIR   = r"D:/Earthquake_Project/GIS processing"  # folder with rasters to resample
REF_PATH = r"D:/Earthquake_Project/GEM-GSHM_PGA-475y-rock_v2023/v2023_1_pga_475_rock_3min.tif"  # reference raster
OUT_DIR  = r"D:/Earthquake_Project/Resampled"       # output folder
METHOD   = "bilinear"   # choose: "nearest" (categorical) | "bilinear" | "cubic" | "cubic_spline" | "lanczos"
OVERWRITE = True        # set False to skip files that already exist
RECURSIVE = False       # set True to include subfolders
# ==========================================================

from pathlib import Path
import numpy as np
import rasterio
from rasterio.warp import reproject, Resampling

# Only interpolation methods (no min/max/average)
RESAMPLING_MAP = {
    "nearest": Resampling.nearest,       # categorical (classes)
    "bilinear": Resampling.bilinear,     # continuous (default)
    "cubic": Resampling.cubic,           # continuous
    "cubic_spline": Resampling.cubic_spline,
    "lanczos": Resampling.lanczos
}

def resample_to_match(src_path, ref_path, out_path, resampling="bilinear", compress="LZW"):
    """
    Reproject + resample src raster to EXACTLY match the reference raster's grid.
    Interpolation only (nearest/bilinear/cubic/cubic_spline/lanczos).
    """
    src_path, ref_path, out_path = map(Path, (src_path, ref_path, out_path))
    if resampling not in RESAMPLING_MAP:
        raise ValueError(f"Invalid resampling '{resampling}'. Choose from {list(RESAMPLING_MAP)}")
    resampling_enum = RESAMPLING_MAP[resampling]

    # Reference grid
    with rasterio.open(ref_path) as ref_ds:
        dst_crs = ref_ds.crs
        dst_transform = ref_ds.transform
        dst_width = ref_ds.width
        dst_height = ref_ds.height

    with rasterio.open(src_path) as src_ds:
        src_nodata = src_ds.nodata
        src_dtype = src_ds.dtypes[0]
        count = src_ds.count

        # If using interpolating resampling on integer data, promote to float32
        out_dtype = src_dtype
        if resampling_enum in {Resampling.bilinear, Resampling.cubic,
                               Resampling.cubic_spline, Resampling.lanczos} and \
           np.issubdtype(np.dtype(src_dtype), np.integer):
            out_dtype = "float32"

        # Allocate destination array
        dst = np.zeros((count, dst_height, dst_width), dtype=out_dtype)

        # Reproject/resample band by band
        for i in range(1, count + 1):
            reproject(
                source=rasterio.band(src_ds, i),
                destination=dst[i - 1],
                src_transform=src_ds.transform,
                src_crs=src_ds.crs,
                src_nodata=src_nodata,
                dst_transform=dst_transform,
                dst_crs=dst_crs,
                dst_nodata=src_nodata if np.issubdtype(np.dtype(out_dtype), np.number) else None,
                resampling=resampling_enum,
            )

        # Output profile based on reference grid + source metadata
        profile = src_ds.profile.copy()
        profile.update(
            driver="GTiff",
            height=dst_height,
            width=dst_width,
            transform=dst_transform,
            crs=dst_crs,
            dtype=out_dtype,
            count=count,
            tiled=True,
            # block sizes must be multiples of 16 to avoid RasterBlockError
            blockxsize=256,
            blockysize=256,
            compress=compress if compress else None,
            BIGTIFF="IF_SAFER",
        )
        # Set nodata sensibly
        if np.dtype(out_dtype).kind == "f":
            profile["nodata"] = np.nan if src_nodata is None else src_nodata
        else:
            profile["nodata"] = src_nodata

        out_path.parent.mkdir(parents=True, exist_ok=True)
        with rasterio.open(out_path, "w", **profile) as dst_ds:
            dst_ds.write(dst)

def batch_resample_dir(in_dir, ref_path, out_dir, resampling="bilinear", overwrite=False, recursive=False):
    """
    Resample all .tif/.tiff in in_dir to match ref_path, writing to out_dir.
    Skips the reference file itself if it lives in the input tree.
    """
    in_dir = Path(in_dir)
    out_dir = Path(out_dir)
    ref_path = Path(ref_path).resolve()
    out_dir.mkdir(parents=True, exist_ok=True)

    patterns = ["*.tif", "*.tiff", "*.TIF", "*.TIFF"]  # handle mixed-case extensions
    files = []
    for pat in patterns:
        files += list(in_dir.rglob(pat) if recursive else in_dir.glob(pat))
    files = sorted(set(files))  # de-dup if multiple patterns matched same file

    if not files:
        print(f"No GeoTIFFs found in {in_dir} (recursive={recursive})")
        return

    for src in files:
        if src.resolve() == ref_path:
            print(f"Skip reference: {src.name}")
            continue
        # Preserve subfolder structure when recursive
        rel = src.relative_to(in_dir) if recursive else src.name
        out = out_dir / rel
        out.parent.mkdir(parents=True, exist_ok=True)

        if out.exists() and not overwrite:
            print(f"Skip (exists): {out}")
            continue

        print(f"Resampling {src} -> {out}")
        try:
            resample_to_match(src, ref_path, out, resampling=resampling)
        except Exception as e:
            print(f"FAILED: {src} | {e}")

if __name__ == "__main__":
    batch_resample_dir(
        IN_DIR,
        REF_PATH,
        OUT_DIR,
        resampling=METHOD,
        overwrite=OVERWRITE,
        recursive=RECURSIVE
    )


Resampling D:\Earthquake_Project\GIS processing\Sea level rise 2020 ssp119_hist.tif -> D:\Earthquake_Project\Resampled\Sea level rise 2020 ssp119_hist.tif
Resampling D:\Earthquake_Project\GIS processing\Sea level rise 2020 ssp126.tif -> D:\Earthquake_Project\Resampled\Sea level rise 2020 ssp126.tif
Resampling D:\Earthquake_Project\GIS processing\Sea level rise 2020 ssp245.tif -> D:\Earthquake_Project\Resampled\Sea level rise 2020 ssp245.tif
Resampling D:\Earthquake_Project\GIS processing\Sea level rise 2020 ssp585.tif -> D:\Earthquake_Project\Resampled\Sea level rise 2020 ssp585.tif
Resampling D:\Earthquake_Project\GIS processing\Sea level rise 2030 ssp126.tif -> D:\Earthquake_Project\Resampled\Sea level rise 2030 ssp126.tif
Resampling D:\Earthquake_Project\GIS processing\Sea level rise 2030 ssp245.tif -> D:\Earthquake_Project\Resampled\Sea level rise 2030 ssp245.tif
Resampling D:\Earthquake_Project\GIS processing\Sea level rise 2030 ssp585.tif -> D:\Earthquake_Project\Resampled\Sea le

In [2]:
# batch_divide_by_reference.py
# --------------------------------------------
# For every resampled TIFF in IN_DIR, compute:
#   (target_band1) / (reference_band1)
# Reference file: "Sea level rise 2020 ssp119_hist.tif"
# Outputs go to OUT_DIR with suffix "_ratio_to_ssp119_hist.tif"
# --------------------------------------------

import os
from pathlib import Path
import numpy as np
import rasterio
from rasterio.enums import Resampling
from rasterio.warp import reproject
from rasterio.windows import Window
from tqdm import tqdm

# ========== EDIT THESE PATHS ==========
IN_DIR   = Path(r"D:/Earthquake_Project/Resampled")  # folder with all resampled TIFFs
OUT_DIR  = Path(r"D:/Earthquake_Project/ratios_out")       # where to save ratio tiffs
REF_TIF  = Path(r"D:/Earthquake_Project/Resampled/Sea level rise 2020 ssp119_hist.tif")
# =====================================

OUT_DIR.mkdir(parents=True, exist_ok=True)

def open_ref(ref_path: Path):
    ref = rasterio.open(ref_path)
    # Read reference band 1 fully
    ref_arr = ref.read(1, masked=True)  # masked array using nodata
    ref_meta = ref.meta.copy()
    ref_nodata = ref.nodata
    return ref, ref_arr, ref_meta, ref_nodata

def align_to_ref(src_path: Path, ref_ds, ref_meta):
    """Read src band1 and align (reproject/resample) to reference grid."""
    with rasterio.open(src_path) as src:
        # Prepare destination array aligned to reference shape/dtype
        dst_arr = np.zeros((ref_meta['height'], ref_meta['width']), dtype=np.float32)
        dst_arr[:] = np.nan

        # Build reproject call
        reproject(
            source=rasterio.band(src, 1),
            destination=dst_arr,
            src_transform=src.transform,
            src_crs=src.crs,
            src_nodata=src.nodata,
            dst_transform=ref_ds.transform,
            dst_crs=ref_ds.crs,
            dst_nodata=np.nan,
            resampling=Resampling.nearest  # input is already resampled, so nearest is fine & fast
        )
        # Return as a masked array where NaN becomes masked
        dst_mask = np.isnan(dst_arr)
        return np.ma.array(dst_arr, mask=dst_mask)

def divide_safe(num_ma, den_ma, den_nodata=None):
    """Compute num/den with masking where denominator is nodata or zero."""
    # Start with combined mask of any nodata
    mask = np.ma.getmaskarray(num_ma) | np.ma.getmaskarray(den_ma)
    den = den_ma.filled(np.nan)

    # Also mask where denominator is 0 (avoid divide-by-zero)
    zero_mask = (den == 0)
    mask = mask | zero_mask

    # Perform division
    num = num_ma.filled(np.nan)
    out = np.divide(num, den, where=~mask, out=np.full_like(num, np.nan, dtype=np.float32)).astype(np.float32)

    # Return masked array
    return np.ma.array(out, mask=mask)

def main():
    # --- Load reference once ---
    if not REF_TIF.exists():
        raise FileNotFoundError(f"Reference not found: {REF_TIF}")

    ref_ds, ref_arr, ref_meta, ref_nodata = open_ref(REF_TIF)

    # Normalize reference to float32 masked array
    ref_arr = ref_arr.astype(np.float32)

    # Collect candidate tiffs
    tiffs = sorted([p for p in IN_DIR.glob("*.tif") if p.is_file()])

    # Skip the reference file itself
    tiffs = [p for p in tiffs if p.resolve() != REF_TIF.resolve()]

    if not tiffs:
        print("No TIFFs found to process in:", IN_DIR)
        return

    print(f"Reference: {REF_TIF.name}")
    print(f"Found {len(tiffs)} tiffs to divide.\n")

    for tif in tqdm(tiffs, desc="Dividing by reference", unit="file"):
        try:
            # Read/align numerator (current tif) to reference grid
            num_ma = align_to_ref(tif, ref_ds, ref_meta)

            # Ensure both are masked arrays float32
            num_ma = num_ma.astype(np.float32)
            den_ma = ref_arr  # already masked float32

            # Compute safe ratio
            ratio_ma = divide_safe(num_ma, den_ma, ref_nodata)

            # Build output path
            out_name = tif.stem + "_ratio_to_ssp119_hist.tif"
            out_path = OUT_DIR / out_name

            # Prepare output profile
            profile = ref_meta.copy()
            profile.update(
                dtype="float32",
                count=1,
                compress="lzw",
                tiled=True,
                blockxsize=min(256, profile['width']),
                blockysize=min(256, profile['height']),
                BIGTIFF="IF_SAFER",
                nodata=-9999.0
            )

            # Write
            with rasterio.open(out_path, "w", **profile) as dst:
                # Fill masked with nodata value
                data_to_write = ratio_ma.filled(profile["nodata"])
                dst.write(data_to_write, 1)

            # Optional: copy over overviews can be added later if needed
            # rasterio.shutil.copyfiles() is not required here.

        except Exception as e:
            print(f"\n[WARN] Failed on {tif.name}: {e}")

    ref_ds.close()
    print("\nDone! Outputs written to:", OUT_DIR)

if __name__ == "__main__":
    main()


Reference: Sea level rise 2020 ssp119_hist.tif
Found 15 tiffs to divide.



Dividing by reference: 100%|█████████████████████████████████████████████████████████| 15/15 [00:31<00:00,  2.09s/file]


Done! Outputs written to: D:\Earthquake_Project\ratios_out





In [4]:
# pga_times_divided_batch_fixed.py
import os, glob, warnings
import numpy as np
import rasterio
from rasterio.warp import reproject, Resampling
from tqdm import tqdm

warnings.filterwarnings("ignore")

# ---------------- EDIT THESE PATHS ----------------
PGA_TIF      = r"D:/Earthquake_Project/GEM-GSHM_PGA-475y-rock_v2023/v2023_1_pga_475_rock_3min.tif"
DIVIDED_DIR  = r"D:/Earthquake_Project/ratios_out"
OUT_DIR      = r"D:/Earthquake_Project/PGA_x_SeaLevel"
GLOB_PATTERN = "*.tif"
# -------------------------------------------------

os.makedirs(OUT_DIR, exist_ok=True)

F32_MAX = np.finfo(np.float32).max

def safe_nodata_for_float32(nodata_value):
    """
    Return a nodata suitable for float32 outputs.
    If nodata is None, NaN is fine for float32.
    If nodata is +/-inf, NaN, or beyond float32 range, use NaN.
    Otherwise return the value.
    """
    if nodata_value is None:
        return np.float32(np.nan)
    if not np.isfinite(nodata_value):
        return np.float32(np.nan)
    if abs(nodata_value) > F32_MAX:
        return np.float32(np.nan)
    return np.float32(nodata_value)

def reproject_to_ref(src_path, ref_profile):
    """Reproject src raster (band 1) to reference grid defined by ref_profile."""
    with rasterio.open(src_path) as src:
        src_nodata = src.nodata
        dst = np.full((ref_profile["height"], ref_profile["width"]), np.nan, dtype=np.float32)

        reproject(
            source=rasterio.band(src, 1),
            destination=dst,
            src_transform=src.transform,
            src_crs=src.crs,
            src_nodata=src_nodata,
            dst_transform=ref_profile["transform"],
            dst_crs=ref_profile["crs"],
            dst_nodata=np.nan,  # standardize to NaN in working grid
            resampling=Resampling.bilinear
        )
    return dst  # contains NaNs where source was nodata

def process_one(divided_path, pga_arr, pga_valid_mask, ref_profile, out_nodata):
    """Compute output = PGA * DIVIDED where DIVIDED valid else PGA."""
    div_arr = reproject_to_ref(divided_path, ref_profile)
    div_valid = np.isfinite(div_arr)

    out = pga_arr.astype(np.float32).copy()

    multiply_mask = pga_valid_mask & div_valid
    out[multiply_mask] = (pga_arr[multiply_mask] * div_arr[multiply_mask]).astype(np.float32)

    # Keep PGA NoData wherever PGA was invalid
    invalid_pga = ~pga_valid_mask
    if np.isnan(out_nodata):
        out[invalid_pga] = np.nan
    else:
        out[invalid_pga] = out_nodata

    return out

def main():
    with rasterio.open(PGA_TIF) as psrc:
        pga_profile = psrc.profile.copy()
        pga_arr = psrc.read(1)  # keep original dtype for mask creation
        pga_nodata_src = psrc.nodata

        # Valid where finite and not equal to declared nodata (if any)
        if pga_nodata_src is None:
            pga_valid_mask = np.isfinite(pga_arr)
        else:
            pga_valid_mask = np.isfinite(pga_arr) & (pga_arr != pga_nodata_src)

        # Prepare output profile: float32 + safe nodata (NaN if needed)
        out_nodata = safe_nodata_for_float32(pga_nodata_src)
        pga_profile.update(
            dtype="float32",
            count=1,
            compress="LZW",
            predictor=2,
            BIGTIFF="IF_SAFER",
            nodata=float(out_nodata)  # OK to be NaN for float32
        )

    divided_paths = sorted(glob.glob(os.path.join(DIVIDED_DIR, GLOB_PATTERN)))
    if not divided_paths:
        print("No divided rasters found. Check DIVIDED_DIR and GLOB_PATTERN.")
        return

    for dpath in tqdm(divided_paths, desc="Processing"):
        try:
            out_arr = process_one(dpath, pga_arr, pga_valid_mask, pga_profile, out_nodata)

            base = os.path.splitext(os.path.basename(dpath))[0]
            out_name = f"PGAx_{base}.tif"
            out_path = os.path.join(OUT_DIR, out_name)

            with rasterio.open(out_path, "w", **pga_profile) as dst:
                dst.write(out_arr.astype(np.float32), 1)

        except Exception as e:
            print(f"Failed on {dpath}: {e}")

    print(f"Done. Outputs in: {OUT_DIR}")

if __name__ == "__main__":
    main()


Processing: 100%|██████████████████████████████████████████████████████████████████████| 15/15 [00:56<00:00,  3.78s/it]

Done. Outputs in: D:/Earthquake_Project/PGA_x_SeaLevel





In [6]:
# pga_times_divided_batch_blended.py
# ----------------------------------
# Multiplies PGA by a set of ratio rasters, but blends the ratio edges
# using a distance-based smoothstep ramp so transitions are soft.

import os, glob, warnings
import numpy as np
import rasterio
from rasterio.warp import reproject, Resampling
from tqdm import tqdm
from scipy.ndimage import distance_transform_edt, gaussian_filter

warnings.filterwarnings("ignore")

# ---------------- EDIT THESE PATHS ----------------
PGA_TIF      = r"D:/Earthquake_Project/GEM-GSHM_PGA-475y-rock_v2023/v2023_1_pga_475_rock_3min.tif"
DIVIDED_DIR  = r"D:/Earthquake_Project/ratios_out"   # folder of ratio *.tif files
OUT_DIR      = r"D:/Earthquake_Project/PGA_x_SeaLevel3"
GLOB_PATTERN = "*.tif"
# ------------- BLENDING PARAMETERS ----------------
# Width of the feather band around the ratio edge, in PIXELS.
# Increase to make edges softer and wider (e.g., 24–48).
BLEND_PX = 32
# Small polish blur after blending (0 to disable).
POST_GAUSS_SIGMA = 1.0
# --------------------------------------------------

os.makedirs(OUT_DIR, exist_ok=True)
F32_MAX = np.finfo(np.float32).max

def safe_nodata_for_float32(nodata_value):
    """
    Ensure a nodata suitable for float32 outputs.
    Prefer NaN if nodata is None, inf, or outside float32 range.
    """
    if nodata_value is None:
        return np.float32(np.nan)
    if not np.isfinite(nodata_value):
        return np.float32(np.nan)
    if abs(nodata_value) > F32_MAX:
        return np.float32(np.nan)
    return np.float32(nodata_value)

def reproject_to_ref(src_path, ref_profile):
    """Reproject src raster (band 1) to reference grid defined by ref_profile."""
    with rasterio.open(src_path) as src:
        src_nodata = src.nodata
        dst = np.full((ref_profile["height"], ref_profile["width"]), np.nan, dtype=np.float32)

        reproject(
            source=rasterio.band(src, 1),
            destination=dst,
            src_transform=src.transform,
            src_crs=src.crs,
            src_nodata=src_nodata,
            dst_transform=ref_profile["transform"],
            dst_crs=ref_profile["crs"],
            dst_nodata=np.nan,  # standardize to NaN in working grid
            resampling=Resampling.bilinear
        )
    return dst  # contains NaNs where source was nodata

def smoothstep01(t):
    """Cubic Hermite smoothstep mapping t in [0,1] -> [0,1]."""
    return t * t * (3.0 - 2.0 * t)

def process_one(divided_path, pga_arr, pga_valid_mask, ref_profile, out_nodata):
    """Compute blended: PGA * blended_multiplier, preserving PGA nodata."""
    # 1) Read ratio/divided layer on the PGA grid
    div_arr = reproject_to_ref(divided_path, ref_profile)

    # 2) Valid mask for the ratio
    valid = np.isfinite(div_arr)

    # 3) Fill NaNs with 1.0 so "outside" leaves PGA unchanged
    div_filled = div_arr.copy()
    div_filled[~valid] = 1.0

    # 4) Distance-based feather around the edge
    #    dist_in  > 0 inside valid area; dist_out > 0 outside area
    dist_in  = distance_transform_edt(valid)
    dist_out = distance_transform_edt(~valid)

    # Signed distance: positive inside valid, negative outside
    signed = dist_in - dist_out

    # Map signed distance to a smooth blend weight in [0,1] over 2*BLEND_PX band
    # signed = -BLEND_PX -> w≈0 (use 1.0); signed = +BLEND_PX -> w≈1 (use div_filled)
    t = np.clip((signed + BLEND_PX) / (2.0 * BLEND_PX), 0.0, 1.0)
    w = smoothstep01(t).astype(np.float32)

    # 5) Blend multiplier between div_filled and 1.0 using weight w
    multiplier = w * div_filled.astype(np.float32) + (1.0 - w) * 1.0

    # 6) Optional tiny Gaussian to polish any banding from the ramp
    if POST_GAUSS_SIGMA and POST_GAUSS_SIGMA > 0:
        multiplier = gaussian_filter(multiplier, sigma=POST_GAUSS_SIGMA, mode="nearest")

    # 7) Multiply PGA only where PGA is valid
    out = pga_arr.astype(np.float32).copy()
    out[pga_valid_mask] = (pga_arr[pga_valid_mask] * multiplier[pga_valid_mask]).astype(np.float32)

    # 8) Keep PGA NoData wherever PGA was invalid
    invalid_pga = ~pga_valid_mask
    if np.isnan(out_nodata):
        out[invalid_pga] = np.nan
    else:
        out[invalid_pga] = out_nodata

    return out

def main():
    with rasterio.open(PGA_TIF) as psrc:
        pga_profile = psrc.profile.copy()
        pga_arr = psrc.read(1)  # keep original dtype for mask creation
        pga_nodata_src = psrc.nodata

        # Valid where finite and not equal to declared nodata (if any)
        if pga_nodata_src is None:
            pga_valid_mask = np.isfinite(pga_arr)
        else:
            pga_valid_mask = np.isfinite(pga_arr) & (pga_arr != pga_nodata_src)

        # Prepare output profile: float32 + safe nodata (NaN if needed)
        out_nodata = safe_nodata_for_float32(pga_nodata_src)
        pga_profile.update(
            dtype="float32",
            count=1,
            compress="LZW",
            predictor=2,
            BIGTIFF="IF_SAFER",
            nodata=float(out_nodata)  # NaN is fine for float32
        )

    divided_paths = sorted(glob.glob(os.path.join(DIVIDED_DIR, GLOB_PATTERN)))
    if not divided_paths:
        print("No divided rasters found. Check DIVIDED_DIR and GLOB_PATTERN.")
        return

    for dpath in tqdm(divided_paths, desc="Processing"):
        try:
            out_arr = process_one(dpath, pga_arr, pga_valid_mask, pga_profile, out_nodata)

            base = os.path.splitext(os.path.basename(dpath))[0]
            out_name = f"PGAx_{base}.tif"
            out_path = os.path.join(OUT_DIR, out_name)

            with rasterio.open(out_path, "w", **pga_profile) as dst:
                dst.write(out_arr.astype(np.float32), 1)

        except Exception as e:
            print(f"Failed on {dpath}: {e}")

    print(f"Done. Outputs in: {OUT_DIR}")

if __name__ == "__main__":
    main()


Processing: 100%|██████████████████████████████████████████████████████████████████████| 15/15 [01:50<00:00,  7.40s/it]

Done. Outputs in: D:/Earthquake_Project/PGA_x_SeaLevel3





In [18]:
# batch_classify_zones_colored.py

import os
import glob
import numpy as np
import rasterio
from rasterio.enums import ColorInterp

# ============== PATHS (EDIT THESE) ==============
IN_DIR  = r"D:/Earthquake_Project/PGA_x_SeaLevel_2"   # folder with input .tif files
OUT_DIR = r"D:/Earthquake_Project/classified_out"      # output folder
# =================================================

os.makedirs(OUT_DIR, exist_ok=True)

def classify_array(arr, nodata_in=None):
    """
    Zones:
      0 = <0.05
      1 = 0.05–<0.08
      2 = 0.08–<0.16
      3 = 0.16–<0.24
      4 = 0.24–≤0.32
      5 = >0.32
    Output NoData will be 255 (separate from classes).
    """
    out = np.full(arr.shape, 255, dtype=np.uint8)  # start as NoData=255
    valid = np.isfinite(arr)
    if nodata_in is not None:
        valid &= (arr != nodata_in)

    a = arr[valid]
    c = np.zeros(a.shape, dtype=np.uint8)  # class codes 0..5 for valid pixels

    c[(a >= 0.00) & (a < 0.05)] = 0
    c[(a >= 0.05) & (a < 0.08)] = 1
    c[(a >= 0.08) & (a < 0.16)] = 2
    c[(a >= 0.16) & (a < 0.24)] = 3
    c[(a >= 0.24) & (a <= 0.32)] = 4
    c[(a >  0.32)]              = 5

    out[valid] = c
    return out

# ======= COLOR MAP (RGBA) =======
# 0 sky blue, 1 blue, 2 green, 3 yellow, 4 orange, 5 red
# 255 is NoData (transparent) – not added to palette.
COLORMAP = {
    0:  (135, 206, 235, 255),   # sky blue
    1:  (0,   0,   255, 255),   # blue
    2:  (0,   128, 0,   255),   # green
    3:  (255, 255, 0,   255),   # yellow
    4:  (255, 165, 0,   255),   # orange
    5:  (255, 0,   0,   255),   # red
}

def process_file(in_path, out_path):
    with rasterio.open(in_path) as src:
        profile = src.profile.copy()
        nodata_in = src.nodata

        data = src.read(1)
        classified = classify_array(data, nodata_in)

        profile.update(
            dtype=rasterio.uint8,
            count=1,
            nodata=255  # keep 255 as transparent NoData
        )

        with rasterio.open(out_path, 'w', **profile) as dst:
            dst.write(classified, 1)
            try:
                dst.write_colormap(1, COLORMAP)
                dst.colorinterp = (ColorInterp.palette,)
            except Exception:
                pass

def main():
    tifs = sorted(glob.glob(os.path.join(IN_DIR, "*.tif")))
    if not tifs:
        print(f"No .tif files found in: {IN_DIR}")
        return

    for i, tif in enumerate(tifs, 1):
        base = os.path.splitext(os.path.basename(tif))[0]
        out_path = os.path.join(OUT_DIR, f"{base}_zones.tif")
        print(f"[{i}/{len(tifs)}] Classifying: {base}.tif -> {os.path.basename(out_path)}")
        process_file(tif, out_path)

    print("✅ Done. Saved to:", OUT_DIR)
    print("Classes: 0=SkyBlue, 1=Blue, 2=Green, 3=Yellow, 4=Orange, 5=Red; NoData=255 transparent.")

if __name__ == "__main__":
    main()


[1/16] Classifying: PGAx_Sea level rise 2020 ssp126_ratio_to_ssp119_hist.tif -> PGAx_Sea level rise 2020 ssp126_ratio_to_ssp119_hist_zones.tif
[2/16] Classifying: PGAx_Sea level rise 2020 ssp245_ratio_to_ssp119_hist.tif -> PGAx_Sea level rise 2020 ssp245_ratio_to_ssp119_hist_zones.tif
[3/16] Classifying: PGAx_Sea level rise 2020 ssp585_ratio_to_ssp119_hist.tif -> PGAx_Sea level rise 2020 ssp585_ratio_to_ssp119_hist_zones.tif
[4/16] Classifying: PGAx_Sea level rise 2030 ssp126_ratio_to_ssp119_hist.tif -> PGAx_Sea level rise 2030 ssp126_ratio_to_ssp119_hist_zones.tif
[5/16] Classifying: PGAx_Sea level rise 2030 ssp245_ratio_to_ssp119_hist.tif -> PGAx_Sea level rise 2030 ssp245_ratio_to_ssp119_hist_zones.tif
[6/16] Classifying: PGAx_Sea level rise 2030 ssp585_ratio_to_ssp119_hist.tif -> PGAx_Sea level rise 2030 ssp585_ratio_to_ssp119_hist_zones.tif
[7/16] Classifying: PGAx_Sea level rise 2050 ssp126_ratio_to_ssp119_hist.tif -> PGAx_Sea level rise 2050 ssp126_ratio_to_ssp119_hist_zones.tif

In [19]:
# batch_clip_to_india.py
# -----------------------------------------------------------
# Edit the PATHS below, then run:
#   python batch_clip_to_india.py
# -----------------------------------------------------------

import os
import glob
import rasterio
from rasterio.mask import mask
from rasterio.enums import ColorInterp
import geopandas as gpd
from shapely.geometry import mapping

# ============== PATHS (EDIT THESE) ==============
IN_DIR   = r"D:/Earthquake_Project/classified_out"      # folder with input .tif files
INDIA_SHP = r"D:/Thesis & Internship/Dissertation/West Bengal/India Shapefile With Kashmir/India Shape/india_st.shp"   # path to your India shapefile
OUT_DIR  = r"D:/Earthquake_Project/clipped_to_india"    # output folder
# =================================================

os.makedirs(OUT_DIR, exist_ok=True)

def load_india_geom():
    # Read India boundary and dissolve to one multipart polygon
    gdf = gpd.read_file(INDIA_SHP)
    # if multiple features, dissolve all into one
    india = gdf.dissolve()  # dissolves by all rows -> single feature
    return india

def reproject_geom_to_raster(india_gdf, raster_crs):
    # Reproject to raster CRS
    if india_gdf.crs is None:
        raise ValueError("India shapefile has no CRS set. Please define it.")
    india_r = india_gdf.to_crs(raster_crs)
    # get geometry as GeoJSON-like mapping
    geoms = [mapping(india_r.geometry.iloc[0])]
    return geoms

def clip_one_tif(in_path, india_gdf):
    with rasterio.open(in_path) as src:
        # Reproject India geometry to this raster's CRS
        geoms = reproject_geom_to_raster(india_gdf, src.crs)

        # Preserve input nodata if present; if none and data are classes, you can force 255
        out_nodata = src.nodata if src.nodata is not None else None

        # Do the mask (crop=True to trim bounds)
        clipped, out_transform = mask(
            src,
            geoms,
            crop=True,
            filled=True,          # fill outside with nodata
            nodata=out_nodata     # keep same nodata; set to 255 manually if you prefer for classes
        )

        profile = src.profile.copy()
        profile.update(
            height=clipped.shape[1],
            width=clipped.shape[2],
            transform=out_transform
        )

        # If you want to force class nodata to 255 (common for paletted class rasters), uncomment:
        # profile.update(nodata=255)
        # clipped[clipped == (src.nodata if src.nodata is not None else -999999)] = 255

        # Write output, preserving colormap if present
        base = os.path.splitext(os.path.basename(in_path))[0]
        out_path = os.path.join(OUT_DIR, f"{base}_IND.tif")
        with rasterio.open(out_path, "w", **profile) as dst:
            dst.write(clipped)

            # Preserve color table if the source had one
            try:
                cm = src.colormap(1)
                if cm:
                    dst.write_colormap(1, cm)
                    dst.colorinterp = (ColorInterp.palette,)
            except Exception:
                pass

        return out_path

def main():
    india_gdf = load_india_geom()
    tifs = sorted(glob.glob(os.path.join(IN_DIR, "*.tif")))
    if not tifs:
        print(f"No .tif files found in: {IN_DIR}")
        return

    for i, tif in enumerate(tifs, 1):
        out_path = clip_one_tif(tif, india_gdf)
        print(f"[{i}/{len(tifs)}] Clipped: {os.path.basename(tif)} -> {os.path.basename(out_path)}")

    print("✅ Done. Outputs saved to:", OUT_DIR)

if __name__ == "__main__":
    main()


[1/16] Clipped: PGAx_Sea level rise 2020 ssp126_ratio_to_ssp119_hist_zones.tif -> PGAx_Sea level rise 2020 ssp126_ratio_to_ssp119_hist_zones_IND.tif
[2/16] Clipped: PGAx_Sea level rise 2020 ssp245_ratio_to_ssp119_hist_zones.tif -> PGAx_Sea level rise 2020 ssp245_ratio_to_ssp119_hist_zones_IND.tif
[3/16] Clipped: PGAx_Sea level rise 2020 ssp585_ratio_to_ssp119_hist_zones.tif -> PGAx_Sea level rise 2020 ssp585_ratio_to_ssp119_hist_zones_IND.tif
[4/16] Clipped: PGAx_Sea level rise 2030 ssp126_ratio_to_ssp119_hist_zones.tif -> PGAx_Sea level rise 2030 ssp126_ratio_to_ssp119_hist_zones_IND.tif
[5/16] Clipped: PGAx_Sea level rise 2030 ssp245_ratio_to_ssp119_hist_zones.tif -> PGAx_Sea level rise 2030 ssp245_ratio_to_ssp119_hist_zones_IND.tif
[6/16] Clipped: PGAx_Sea level rise 2030 ssp585_ratio_to_ssp119_hist_zones.tif -> PGAx_Sea level rise 2030 ssp585_ratio_to_ssp119_hist_zones_IND.tif
[7/16] Clipped: PGAx_Sea level rise 2050 ssp126_ratio_to_ssp119_hist_zones.tif -> PGAx_Sea level rise 2050

In [13]:
import os
import rasterio
from rasterio.features import shapes, rasterize
from shapely.geometry import shape
import geopandas as gpd

# ========== CONFIG ==========
IN_DIR  = r"D:/Earthquake_Project/clipped_to_india"        
OUT_DIR = r"D:/Earthquake_Project/clipped_to_india_smooth122" 
SMOOTH  = 1.5   # increase for more smoothing (try 2, 3, 5)

os.makedirs(OUT_DIR, exist_ok=True)

def smooth_edges(data, transform, nodata=None):
    # Create India mask (all non-nodata area)
    india_mask = (data != nodata) if nodata is not None else (data > 0)

    # Vectorize zones
    results = (
        {"properties": {"val": v}, "geometry": s}
        for s, v in shapes(data, mask=india_mask, transform=transform)
    )
    gdf = gpd.GeoDataFrame.from_features(results)

    # Save original India outline (outer boundary)
    india_outline = gdf.unary_union.buffer(0)

    # Smooth polygons using buffer in/out
    gdf["geometry"] = gdf.buffer(SMOOTH).buffer(-SMOOTH)

    # Clip smoothed polygons back to India outline (keep outer boundary fixed)
    gdf["geometry"] = gdf["geometry"].intersection(india_outline)

    # Rasterize back (zones preserved)
    out_data = rasterize(
        [(geom, val) for geom, val in zip(gdf.geometry, gdf["val"]) if geom is not None],
        out_shape=data.shape,
        transform=transform,
        fill=nodata if nodata is not None else 0,
        dtype=data.dtype,
    )
    return out_data

# ========== LOOP OVER ALL TIFFS ==========
for fname in os.listdir(IN_DIR):
    if not fname.lower().endswith(".tif"):
        continue

    in_path  = os.path.join(IN_DIR, fname)
    out_path = os.path.join(OUT_DIR, fname.replace(".tif", "_smooth.tif"))

    with rasterio.open(in_path) as src:
        data = src.read(1)
        profile = src.profile
        transform = src.transform
        nodata = profile.get("nodata", None)

    smoothed = smooth_edges(data, transform, nodata=nodata)

    profile.update(dtype=data.dtype)
    with rasterio.open(out_path, "w", **profile) as dst:
        dst.write(smoothed, 1)

    print(f"✅ Smoothed saved: {out_path}")

print("🎯 All TIFFs processed — smooth internal edges, India boundary preserved.")


  india_outline = gdf.unary_union.buffer(0)


✅ Smoothed saved: D:/Earthquake_Project/clipped_to_india_smooth122\PGAx_Sea level rise 2020 ssp126_ratio_to_ssp119_hist_zones_IND_smooth.tif


  india_outline = gdf.unary_union.buffer(0)


✅ Smoothed saved: D:/Earthquake_Project/clipped_to_india_smooth122\PGAx_Sea level rise 2020 ssp245_ratio_to_ssp119_hist_zones_IND_smooth.tif


  india_outline = gdf.unary_union.buffer(0)


✅ Smoothed saved: D:/Earthquake_Project/clipped_to_india_smooth122\PGAx_Sea level rise 2020 ssp585_ratio_to_ssp119_hist_zones_IND_smooth.tif


  india_outline = gdf.unary_union.buffer(0)


✅ Smoothed saved: D:/Earthquake_Project/clipped_to_india_smooth122\PGAx_Sea level rise 2030 ssp126_ratio_to_ssp119_hist_zones_IND_smooth.tif


  india_outline = gdf.unary_union.buffer(0)


✅ Smoothed saved: D:/Earthquake_Project/clipped_to_india_smooth122\PGAx_Sea level rise 2030 ssp245_ratio_to_ssp119_hist_zones_IND_smooth.tif


  india_outline = gdf.unary_union.buffer(0)


✅ Smoothed saved: D:/Earthquake_Project/clipped_to_india_smooth122\PGAx_Sea level rise 2030 ssp585_ratio_to_ssp119_hist_zones_IND_smooth.tif


  india_outline = gdf.unary_union.buffer(0)


✅ Smoothed saved: D:/Earthquake_Project/clipped_to_india_smooth122\PGAx_Sea level rise 2050 ssp126_ratio_to_ssp119_hist_zones_IND_smooth.tif


  india_outline = gdf.unary_union.buffer(0)


✅ Smoothed saved: D:/Earthquake_Project/clipped_to_india_smooth122\PGAx_Sea level rise 2050 ssp245_ratio_to_ssp119_hist_zones_IND_smooth.tif


  india_outline = gdf.unary_union.buffer(0)


✅ Smoothed saved: D:/Earthquake_Project/clipped_to_india_smooth122\PGAx_Sea level rise 2050 ssp585_ratio_to_ssp119_hist_zones_IND_smooth.tif


  india_outline = gdf.unary_union.buffer(0)


✅ Smoothed saved: D:/Earthquake_Project/clipped_to_india_smooth122\PGAx_Sea level rise 2100 ssp126_ratio_to_ssp119_hist_zones_IND_smooth.tif


  india_outline = gdf.unary_union.buffer(0)


✅ Smoothed saved: D:/Earthquake_Project/clipped_to_india_smooth122\PGAx_Sea level rise 2100 ssp245_ratio_to_ssp119_hist_zones_IND_smooth.tif


  india_outline = gdf.unary_union.buffer(0)


✅ Smoothed saved: D:/Earthquake_Project/clipped_to_india_smooth122\PGAx_Sea level rise 2100 ssp585_ratio_to_ssp119_hist_zones_IND_smooth.tif


  india_outline = gdf.unary_union.buffer(0)


✅ Smoothed saved: D:/Earthquake_Project/clipped_to_india_smooth122\PGAx_Sea level rise 2150 ssp126_ratio_to_ssp119_hist_zones_IND_smooth.tif


  india_outline = gdf.unary_union.buffer(0)


✅ Smoothed saved: D:/Earthquake_Project/clipped_to_india_smooth122\PGAx_Sea level rise 2150 ssp456_ratio_to_ssp119_hist_zones_IND_smooth.tif


  india_outline = gdf.unary_union.buffer(0)


✅ Smoothed saved: D:/Earthquake_Project/clipped_to_india_smooth122\PGAx_Sea level rise 2150 ssp585_ratio_to_ssp119_hist_zones_IND_smooth.tif


  india_outline = gdf.unary_union.buffer(0)


✅ Smoothed saved: D:/Earthquake_Project/clipped_to_india_smooth122\v2023_1_pga_475_rock_3min_zones_IND_smooth.tif
🎯 All TIFFs processed — smooth internal edges, India boundary preserved.
