In [4]:
import os
import rasterio
from rasterio.features import rasterize
import numpy as np
from scipy.ndimage import distance_transform_edt
import geopandas as gpd

def calculate_distance_raster(vector_path, reference_raster_path, output_raster_path):
    """
    Calculates the Euclidean distance from each pixel to the nearest vector feature.

    Args:
        vector_path (str): Path to the input vector file (e.g., river shapefile).
        reference_raster_path (str): Path to a reference raster (e.g., DEM) to define
                                     the output grid's extent, CRS, and resolution.
        output_raster_path (str): Path to save the output distance raster.
    """
    # --- Step 1: Check if input files exist ---
    if not os.path.exists(vector_path):
        print(f"!!! Error: Input vector file not found at '{vector_path}'.")
        return
    if not os.path.exists(reference_raster_path):
        print(f"!!! Error: Reference raster file not found at '{reference_raster_path}'.")
        return

    print(f"\n--- Starting Distance Calculation ---\n")
    print(f"Input Features: {os.path.basename(vector_path)}")
    print(f"Reference Grid: {os.path.basename(reference_raster_path)}")
    print(f"Output File: {os.path.basename(output_raster_path)}\n")

    try:
        # --- Step 2: Open reference raster to get grid properties ---
        with rasterio.open(reference_raster_path) as ref_src:
            meta = ref_src.meta.copy()
            transform = ref_src.transform
            out_shape = ref_src.shape
            # Get pixel size (resolution) - assuming square pixels
            pixel_size = ref_src.res[0] 
            print(f"Reference raster pixel size: {pixel_size:.2f} meters/pixel")
        
        # --- Step 3: Read the vector data using geopandas ---
        gdf = gpd.read_file(vector_path)
        
        # CRITICAL ADDITION: Explicitly set the CRS of the GeoDataFrame if it's not set
        # This ensures geopandas recognizes the CRS before attempting transformation.
        # We use EPSG:4326 as confirmed for your DEM and river data.
        if gdf.crs is None:
            print(f"Warning: Input vector CRS is not set. Assuming EPSG:4326 and assigning.")
            gdf.set_crs(epsg=4326, inplace=True) # Set the CRS directly
        
        # Ensure vector data is in the same CRS as the reference raster
        # Now, gdf.crs should either be already set or explicitly set by the line above.
        if gdf.crs != meta['crs']:
            print(f"Reprojecting vector data from {gdf.crs} to match reference raster CRS {meta['crs']}...")
            gdf = gdf.to_crs(meta['crs'])
        else:
            print(f"Vector data CRS ({gdf.crs}) already matches reference raster CRS. No reprojection needed.")

        # --- Step 4: Rasterize the vector features ---\n",
        # "Burn" the river lines into a NumPy array. Where rivers exist, pixels will be 1, otherwise 0.
        print("Rasterizing river features...")
        river_pixels = rasterize(
            [(geom, 1) for geom in gdf.geometry],
            out_shape=out_shape,
            transform=transform,
            fill=0,
            dtype='uint8'
        )

        # --- Step 5: Calculate Euclidean Distance ---
        # The distance_transform_edt function calculates the distance from each 0 pixel
        # to the nearest 1 pixel. The result is in pixel units.
        print("Calculating Euclidean distance...")
        distance_in_pixels = distance_transform_edt(river_pixels == 0)

        # Convert distance from pixel units to map units (meters)
        distance_in_meters = distance_in_pixels * pixel_size

        # --- Step 6: Prepare metadata and save the output raster ---
        meta.update(
            dtype='float32',
            nodata=-9999.0,
            compress='lzw'
        )
        
        print("Saving output distance raster...")
        with rasterio.open(output_raster_path, 'w', **meta) as dst:
            dst.write(distance_in_meters.astype(rasterio.float32), 1)

        print(f"\nSuccessfully created distance raster: {output_raster_path}")

    except Exception as e:
        print(f"An error occurred: {e}")

# --- MAIN SCRIPT EXECUTION ---
if __name__ == "__main__":
    # --- USER: VERIFY YOUR INPUTS AND SETTINGS HERE ---

    # 1. Path to your River Network shapefile
    # This should be the final shapefile you saved from QGIS, e.g., River_Data_Final_for_Python.shp
    river_shapefile_path = "C:/Users/Lenovo/Documents/Dam_Suitability_Analysis/DATA/Processed_Rasters/River_Data/River_Data_Final_for_Python.shp" 

    # 2. Path to your merged DEM file (used as a grid reference)
    dem_reference_path = "C:/Users/Lenovo/Documents/Dam_Suitability_Analysis/DATA/Processed_Rasters/DEM_Merged_Final.tif"

    # 3. Path to save the new distance raster file
    distance_output_path = "C:/Users/Lenovo/Documents/Dam_Suitability_Analysis/DATA/Processed_Rasters/Distance_to_Rivers.tif"

    # Create the output directory if it doesn't exist
    output_dir = os.path.dirname(distance_output_path)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # --- END OF USER SETTINGS ---\n",
    # --- Run the Distance Calculation ---
    calculate_distance_raster(river_shapefile_path, dem_reference_path, distance_output_path)

    print("\nDistance calculation process finished.")


--- Starting Distance Calculation ---

Input Features: River_Data_Final_for_Python.shp
Reference Grid: DEM_Merged_Final.tif
Output File: Distance_to_Rivers.tif

Reference raster pixel size: 0.00 meters/pixel
Vector data CRS (EPSG:4326) already matches reference raster CRS. No reprojection needed.
Rasterizing river features...
Calculating Euclidean distance...
Saving output distance raster...

Successfully created distance raster: C:/Users/Lenovo/Documents/Dam_Suitability_Analysis/DATA/Processed_Rasters/Distance_to_Rivers.tif

Distance calculation process finished.
