## Runoff tiff prep

In [None]:
import os
import shutil
from pathlib import Path

In [None]:
# Specify the path to your folder containing the TIFF files
source_folder = Path(r"D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\flood-data-ecosystem-Odisha\Sources\NRSC\data\tiffs\Daily-tiffs")

# Loop through all the TIFF files in the source folder
for tiff_file in source_folder.glob("NRSC_NHP_VIC_RO_*.tif"):
    # Extract the date string from the filename (last 8 characters of the base name)
    date_str = tiff_file.stem.split('_')[-1]
    
    # Parse the date string into the required format
    year = date_str[:4]
    month = date_str[4:6]
    day = date_str[6:8]
    
    # Create the target directory in the format yyyy_mm
    target_folder = source_folder / f"{year}_{month}"
    target_folder.mkdir(parents=True, exist_ok=True)
    
    # Move the TIFF file to the target directory
    shutil.move(str(tiff_file), str(target_folder / tiff_file.name))

print("TIFF files have been organized by month.")


TIFF files have been organized by month.


### Create composite file for the month

In [2]:
pip install rasterio

Note: you may need to restart the kernel to use updated packages.


In [3]:
import geopandas as gpd
import rasterio
from rasterstats import zonal_stats
from pathlib import Path
import pandas as pd

# Paths to data
shapefile_path = Path(r"D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\flood-data-ecosystem-Odisha\Maps\od_ids-drr_shapefiles\odisha_block_final.geojson")
base_tiff_folder = Path(r"D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\flood-data-ecosystem-Odisha\Sources\NRSC\data\tiffs\Daily-tiffs")
output_csv_folder = Path(r"D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\flood-data-ecosystem-Odisha\Sources\NRSC\data\variables")

# Ensure output folder exists
output_csv_folder.mkdir(parents=True, exist_ok=True)

# Define the range of months (start and end)
start_year, start_month = 2021, 4
end_year, end_month = 2024, 10

# Function to generate month-year combinations
def generate_months_range(start_year, start_month, end_year, end_month):
    from datetime import date
    months = []
    current_date = date(start_year, start_month, 1)
    end_date = date(end_year, end_month, 1)
    while current_date <= end_date:
        months.append(current_date.strftime("%Y_%m"))
        # Increment to the next month
        next_month = current_date.month + 1
        next_year = current_date.year + (next_month - 1) // 12
        current_date = date(next_year, next_month % 12 or 12, 1)
    return months

# Get the range of months to process
months = generate_months_range(start_year, start_month, end_year, end_month)

# Load the shapefile
block_gdf = gpd.read_file(shapefile_path)

# Loop through each month and calculate zonal statistics
for month in months:
    month_folder = base_tiff_folder / month
    
    if not month_folder.exists():
        print(f"Skipping non-existent folder: {month_folder}")
        continue

    tiff_files = list(month_folder.glob("*.tif"))
    if not tiff_files:
        print(f"No TIFF files found in: {month_folder}")
        continue

    # Initialize stats for the month
    monthly_stats = []

    # Process each daily TIFF file
    for tiff_file in tiff_files:
        
        with rasterio.open(tiff_file) as src:
            block_gdf = block_gdf.to_crs(src.crs)
            # Calculate zonal statistics for the current TIFF
            stats = zonal_stats(
                block_gdf,
                src.read(1),
                affine=src.transform,
                stats=["mean", "sum", "max"],
                nodata=src.nodata,
                geojson_out=True
            )

            # Append daily stats with subdistrict IDs and TIFF file date
            for stat, block in zip(stats, block_gdf.itertuples()):
                monthly_stats.append({

                    "block_name": block.block_name,  # Update to the correct column in your shapefile
                    "object_id": block.object_id,
                    "Date": tiff_file.stem,  # Use the TIFF file name as date (modify as needed)
                    "Mean Daily Runoff": stat["properties"]["mean"],
                    "Sum Runoff": stat["properties"]["sum"],
                    "Peak Runoff": stat["properties"]["max"],
                    #"object_id":object_id

                })

    # Convert monthly stats to a DataFrame
    stats_df = pd.DataFrame(monthly_stats)

    # Calculate monthly statistics for each subdistrict
    monthly_summary = stats_df.groupby(["block_name","object_id"]).agg(
        Mean_Daily_Runoff=("Mean Daily Runoff", "mean"),
        Sum_Runoff=("Sum Runoff", "sum"),
        Peak_Runoff=("Peak Runoff", "max")
    ).reset_index()

    # Save the summary to a CSV file
    output_csv_file = output_csv_folder / f"runoff_{month}.csv"
    monthly_summary.to_csv(output_csv_file, index=False)
    print(f"Zonal statistics saved for {month}: {output_csv_file}")


Zonal statistics saved for 2021_04: D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\flood-data-ecosystem-Odisha\Sources\NRSC\data\variables\runoff_2021_04.csv
Zonal statistics saved for 2021_05: D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\flood-data-ecosystem-Odisha\Sources\NRSC\data\variables\runoff_2021_05.csv
Zonal statistics saved for 2021_06: D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\flood-data-ecosystem-Odisha\Sources\NRSC\data\variables\runoff_2021_06.csv
Zonal statistics saved for 2021_07: D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\flood-data-ecosystem-Odisha\Sources\NRSC\data\variables\runoff_2021_07.csv
Zonal statistics saved for 2021_08: D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\flood-data-ecosystem-Odisha\Sources\NRSC\data\variables\runoff_2021_08.csv
Zonal statistics saved for 2021_09: D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\flood-data-ecosystem-Odisha\Sources\NRSC\data\variables\runoff_2021_09.csv
Zonal statistics saved for 2021_10: D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\flood-data-ecosystem-Odis

In [10]:
import os
import numpy as np
import rasterio
from rasterio.merge import merge
from pathlib import Path


tiff_folder = Path(r"D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\HP\flood-data-ecosystem-Himachal-Pradesh\Sources\NRSC\data\tiffs\Daily-tiffs")

# Specify the folder containing the TIFF files for a specific month (e.g., "2021_04")
month_folder = Path(r"D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\HP\flood-data-ecosystem-Himachal-Pradesh\Sources\NRSC\data\tiffs\Daily-tiffs\2021_04")


# Get a list of all TIFF files in the folder
tiff_files = list(month_folder.glob("*.tif"))

# Initialize an array to accumulate the sum of all TIFFs
sum_array = None

# Iterate through each TIFF file
for tiff_file in tiff_files:
    with rasterio.open(tiff_file) as src:
        # Read the data into an array
        data = src.read(1)  # Read the first band

        # Initialize the sum_array with the shape of the data
        if sum_array is None:
            sum_array = np.zeros_like(data, dtype=np.float64)

        # Accumulate the sum
        sum_array += data

# Create a new TIFF file with the summed data
output_file = month_folder / f"cum-sr_{month_folder.name}.tif"

# Use the metadata from the last TIFF file to write the output
with rasterio.open(tiff_files[-1]) as src:
    meta = src.meta

# Update the metadata to reflect the new data type and count
meta.update(dtype=rasterio.float64, count=1)

# Write the summed array to the new TIFF file
with rasterio.open(output_file, 'w', **meta) as dst:
    dst.write(sum_array, 1)

print(f"Cumulative TIFF file created: {output_file}")

Cumulative TIFF file created: D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\HP\flood-data-ecosystem-Himachal-Pradesh\Sources\NRSC\data\tiffs\Daily-tiffs\2021_04\cum-sr_2021_04.tif
