In [None]:
# for CDI calculation
#WAter Balance
import xarray as xr
import numpy as np

# Load the precipitation data
precipitation_ds = xr.open_dataset("/PR_Monthly_2000_23.nc")
precipitation = precipitation_ds['precipitation']

# Load the evapotranspiration data
evapotranspiration_ds = xr.open_dataset("2000_2023_monthly_E.nc")
evapotranspiration = evapotranspiration_ds['E']

# Align the time dimension
precipitation['time'] = xr.cftime_range(start='2000-01-31', periods=precipitation.shape[0], freq='MS')
evapotranspiration['time'] = xr.cftime_range(start='2000-01-31', periods=evapotranspiration.shape[0], freq='MS')

# Select the common time period (2000-2022)
precipitation = precipitation.sel(time=slice('2000-01-31', '2022-12-31'))
evapotranspiration = evapotranspiration.sel(time=slice('2000-01-31', '2022-12-31'))

# Fill missing values before interpolation (if any)
precipitation = precipitation.fillna(0)
evapotranspiration = evapotranspiration.fillna(0)

# Resample to match dimensions if needed (precipitation: lat=721, lon=1441; evapotranspiration: lat=720, lon=1440)
precipitation = precipitation.interp(lat=evapotranspiration.lat, lon=evapotranspiration.lon, method='linear')

# Ensure there are no remaining NaNs after interpolation
precipitation = precipitation.fillna(0)

# Calculate the water balance (precipitation - evapotranspiration)
water_balance = precipitation - evapotranspiration

# Create a new dataset for water balance
water_balance_ds = xr.Dataset(
    {
        'water_balance': (['time', 'lat', 'lon'], water_balance.data)
    },
    coords={
        'time': water_balance['time'].data,
        'lat': evapotranspiration['lat'].data,
        'lon': evapotranspiration['lon'].data
    }
)

# Save to a new NetCDF file
output_path = "/Water_Balance_2000_2022.nc"
water_balance_ds.to_netcdf(output_path)
print(f"Water balance data saved to {output_path}")


In [None]:
#mRAI and mWBAI (The calculation is the same for both
#Please see the comments for calculating the mWBAI

import pandas as pd
import numpy as np
import xarray as xr

# Parameters
time_scale = 1                   # for mWBAI change to 3 (three-month scale) in our case, we use a three-month scale for CDI
xfactor = 1.7                    # We use a fixed scale factor as per the original methodology see the method section 

# Read the NetCDF data
data = xr.open_dataset('/PR_Monthly_2000_23.nc') # monthly precipitation data (data sources are available in the dataset section)
water_balance = data['precipitation'] # for mWBAI please change to water balance that is climate water balance could be obtained by precipitation and evapotranspiration) 

num_of_stations = water_balance.shape[1] * water_balance.shape[2]  # lat * lon
total_months = water_balance.shape[0]
total_years = total_months // 12

# Adjust the total_months for time_scale
adjusted_total_months = total_years * 12

# Prepare output array
mRAI_output = np.full((adjusted_total_months, water_balance.shape[1], water_balance.shape[2]), np.nan)

# Process each location (lat, lon)
for lat in range(water_balance.shape[1]):
    for lon in range(water_balance.shape[2]):
        station_data = water_balance[:, lat, lon].values
        if np.all(np.isnan(station_data)):
            continue  # Skip if all data is NaN

        station_data = np.concatenate((np.zeros(time_scale-1), station_data))

        # Step 1: RRi
        RRi = np.zeros(total_months)
        for i_month in range(total_months):
            RRi[i_month] = np.sum(station_data[i_month:i_month+time_scale])

        # Adjust total_months if necessary
        if total_months > adjusted_total_months:
            RRi = RRi[:adjusted_total_months]

        # Step 2: median
        RRi_matrix = RRi.reshape((12, total_years), order='F')
        RRmed = np.median(RRi_matrix, axis=1)

        # Step 3
        E_lo = np.zeros(12)
        E_hi = np.zeros(12)
        lo_hi_10 = int(total_years * 0.1)

        for i_m in range(12):
            RRi_sorted = np.sort(RRi_matrix[i_m, :])
            E_lo[i_m] = np.mean(RRi_sorted[:lo_hi_10])
            E_hi[i_m] = np.mean(RRi_sorted[-lo_hi_10:])

        mRAI_matrix = np.zeros((12, total_years))

        for i_RR in range(12):
            num_array = RRi_matrix[i_RR, :] - RRmed[i_RR]
            for i_years in range(total_years):
                num = num_array[i_years]
                if num < 0:
                    factor = -xfactor
                    den = E_lo[i_RR] - RRmed[i_RR]
                else:
                    factor = xfactor
                    den = E_hi[i_RR] - RRmed[i_RR]

                mRAI_matrix[i_RR, i_years] = factor * (num / den)

        mRAI = mRAI_matrix.reshape((-1,), order='F')
        mRAI_output[:, lat, lon] = mRAI

# Save the output to a new NetCDF file
mRAI_da = xr.DataArray(mRAI_output, coords=[data.time[:adjusted_total_months], data.lat, data.lon], dims=["time", "lat", "lon"], name="mRAI")
# for mWBAI changed to mWBAI as per index name
mRAI_ds = xr.Dataset({"mRAI": mRAI_da})
mRAI_ds.to_netcdf('/1mRAI_P_2000_2022.nc')


In [None]:
#mWBAI (example based on above code) 
import pandas as pd
import numpy as np
import xarray as xr

# Parameters
time_scale = 3
xfactor = 1.7

# Read the NetCDF data
data = xr.open_dataset('/.nc')
water_balance = data['water_balance']

num_of_stations = water_balance.shape[1] * water_balance.shape[2]  # lat * lon
total_months = water_balance.shape[0]
total_years = total_months // 12

# Adjust the total_months for time_scale
adjusted_total_months = total_years * 12

# Prepare output array
mWBAI_output = np.full((adjusted_total_months, water_balance.shape[1], water_balance.shape[2]), np.nan)

# Process each location (lat, lon)
for lat in range(water_balance.shape[1]):
    for lon in range(water_balance.shape[2]):
        try:
            station_data = water_balance[:, lat, lon].values
            if np.all(np.isnan(station_data)):
                continue  # Skip if all data is NaN

            station_data = np.concatenate((np.zeros(time_scale-1), station_data))

            # Step 1: RRi
            RRi = np.zeros(total_months)
            for i_month in range(total_months):
                RRi[i_month] = np.sum(station_data[i_month:i_month+time_scale])

            # Adjust total_months if necessary
            if total_months > adjusted_total_months:
                RRi = RRi[:adjusted_total_months]

            # Step 2: median
            RRi_matrix = RRi.reshape((12, total_years), order='F')
            RRmed = np.median(RRi_matrix, axis=1)

            # Debugging: Print intermediate values occasionally
            if lat % 50 == 0 and lon % 50 == 0:
                print(f"Latitude: {lat}, Longitude: {lon}")
                print("RRi:", RRi[:12])
                print("RRmed:", RRmed)

            # Step 3
            E_lo = np.zeros(12)
            E_hi = np.zeros(12)
            lo_hi_10 = int(total_years * 0.1)

            for i_m in range(12):
                RRi_sorted = np.sort(RRi_matrix[i_m, :])
                E_lo[i_m] = np.mean(RRi_sorted[:lo_hi_10])
                E_hi[i_m] = np.mean(RRi_sorted[-lo_hi_10:])
            
            # Debugging: Print E_lo and E_hi occasionally
            if lat % 50 == 0 and lon % 50 == 0:
                print("E_lo:", E_lo)
                print("E_hi:", E_hi)

            mWBAI_matrix = np.zeros((12, total_years))

            for i_RR in range(12):
                num_array = RRi_matrix[i_RR, :] - RRmed[i_RR]
                for i_years in range(total_years):
                    num = num_array[i_years]
                    if num < 0:
                        factor = -xfactor
                        den = E_lo[i_RR] - RRmed[i_RR]
                    else:
                        factor = xfactor
                        den = E_hi[i_RR] - RRmed[i_RR]

                    # Avoid division by zero
                    if den != 0:
                        mWBAI_matrix[i_RR, i_years] = factor * (num / den)
                    else:
                        mWBAI_matrix[i_RR, i_years] = np.nan
            
            # Debugging: Print part of mWBAI_matrix occasionally
            if lat % 50 == 0 and lon % 50 == 0:
                print("mWBAI_matrix sample:", mWBAI_matrix[:, :1])

            mWBAI = mWBAI_matrix.reshape((-1,), order='F')
            mWBAI_output[:, lat, lon] = mWBAI

        except Exception as e:
            print(f"Error at lat {lat}, lon {lon}: {e}")

# Save the output to a new NetCDF file
try:
    mWBAI_da = xr.DataArray(
        mWBAI_output,
        coords=[data.time[:adjusted_total_months], data.lat, data.lon],
        dims=["time", "lat", "lon"],
        name="mWBAI"
    )
    mWBAI_ds = xr.Dataset({"mWBAI": mWBAI_da})
    mWBAI_ds.to_netcdf('/mWBAI.nc')
    print("NetCDF file saved successfully.")
except Exception as e:
    print(f"Error saving NetCDF file: {e}")
