Input data 

In [None]:
# Define the paths
ndwi_folder = r"D:\River connectivity\Whole Danube Reach in Hungary\NDWI input\clear_NDWI"  #water masks folder
polygon_shapefile = r"D:\River connectivity\Whole Danube Reach in Hungary\Shp_Danube_in_Hungary.shp"  #polygon of study area (e.g.. river corridoer)
output_excel = r"D:\River connectivity\Whole Danube Reach in Hungary\xlsx\nan pixels inside the corridor.xlsx"   #path of output xlsx file

In [None]:
import os
import rasterio
import geopandas as gpd
import pandas as pd
from shapely.geometry import box

# Load the polygon shapefile
polygons = gpd.read_file(polygon_shapefile)

# Initialize a list to store the results
results = []

# Loop through each NDWI file in the directory
for filename in os.listdir(ndwi_folder):
    if filename.endswith('.tif'):
        ndwi_path = os.path.join(ndwi_folder, filename)

        # Open the NDWI raster
        with rasterio.open(ndwi_path) as src:
            # Get the CRS of the NDWI raster
            ndwi_crs = src.crs
            
            # Reproject polygons to match NDWI raster CRS
            polygons_reprojected = polygons.to_crs(ndwi_crs)
            
            # Create a rectangle from raster bounds
            raster_bounds = box(*src.bounds)  # Create a shapely box
            raster_bounds_gdf = gpd.GeoSeries([raster_bounds], crs=ndwi_crs)  # Convert to GeoSeries

            # Check if any polygon overlaps the raster
            if polygons_reprojected.intersects(raster_bounds_gdf).any():
                # Mask NDWI with polygon
                out_image, out_transform = rasterio.mask.mask(src, polygons_reprojected.geometry, crop=True)

                # Count total NaN values inside the polygon
                nan_count = pd.isna(out_image).sum()  # Count NaN pixels

                # Extract date from filename (adjust this based on your filename format)
                date = filename.split('_')[1].split('.')[0]  # Assuming the format is like "ndwi_2024-10-25.tif"

                # Append results
                results.append({'Date': date, 'NaN Count': nan_count})
            else:
                print(f'No intersection for {filename}')

# Create a DataFrame and export to Excel
df = pd.DataFrame(results)
df.to_excel(output_excel, index=False)

print(f'Results exported to {output_excel}')
