In [1]:
import os
import rasterio
import numpy as np
import pandas as pd
import xarray as xr

def build_xarray_dataset(folder_path, file_prefix, start_month=1):
    # List TIFF files with the specified prefix
    tiff_files = [
        os.path.join(folder_path, f) for f in os.listdir(folder_path)
        if f.startswith(file_prefix) and f.endswith('.tif')
    ]
    tiff_files.sort()  # Ensure files are processed in order
    
    all_data = []
    all_time_indices = []

    for file_path in tiff_files:
        # Extract year from file name (e.g., 'aet_1km_2000_...')
        file_year = int(file_path.split('_')[-2])
        start_date = pd.Timestamp(year=file_year, month=start_month, day=1)
        
        with rasterio.open(file_path) as src:
            # Read all bands and get spatial info
            bands = [src.read(band) for band in range(1, src.count + 1)]
            time_index = pd.date_range(start=start_date, periods=src.count, freq="ME")
            all_data.append(np.stack(bands, axis=0))
            all_time_indices.extend(time_index)
            
            # Get spatial coordinates from the first file
            if len(all_data) == 1:
                transform = src.transform
                latitudes = np.arange(src.height) * transform[4] + transform[5]
                longitudes = np.arange(src.width) * transform[0] + transform[2]
    
    # Combine all data along the time dimension
    all_data_combined = np.concatenate(all_data, axis=0)
    
    # Create xarray.Dataset
    dataset_ETA = xr.Dataset(
        {
            "value": (["time", "latitude", "longitude"], all_data_combined)
        },
        coords={
            "time": pd.to_datetime(all_time_indices),
            "latitude": latitudes,
            "longitude": longitudes
        }
    )
    return dataset_ETA

# Example usage
folder_path = 'ETA_Khuzestan_2000_2023/test'
file_prefix = 'aet_1km_'

dataset_ETA = build_xarray_dataset(folder_path, file_prefix)

# Example: Inspect the dataset
print(dataset_ETA)


FileNotFoundError: [WinError 3] The system cannot find the path specified: 'ETA_Khuzestan_2000_2023/test'

In [None]:
dataset_ETA