In [1]:
# If you haven't installed these yet, run this cell once:
# !pip install xarray netCDF4 pandas numpy spei scipy

import os
import glob
import numpy as np
import pandas as pd
import xarray as xr

import spei as si           # SPEI package
import scipy.stats as sps   # for the log-logistic distribution (fisk)

# === CONFIG: update paths to your actual folders ===
RAINFALL_DIR = r"Data\Climate Data\3. Tasmania_Monthly\Rainfall_tas_monthly"
EVAPOTRANSPIRATION_DIR = r"Data\Climate Data\3. Tasmania_Monthly\ET_tas_monthly"
OUTPUT_DIR = r"Data\Climate Data\3. Tasmania_Monthly\SPEI_tas_monthly"

RAIN_VAR = "monthly_rain"    # change if different in your .nc
PET_VAR  = "et_short_crop"   # change if different in your .nc
SPEI_SCALE = 1               # SPEI-1 (1-month timescale)

os.makedirs(OUTPUT_DIR, exist_ok=True)


In [2]:
def load_monthly_series(data_dir: str, var_name: str) -> xr.DataArray:
    """
    Load all NetCDFs in a folder, combine into one DataArray (time, lat, lon),
    and sort by time.
    """
    print(f"üìÇ Loading {var_name} from: {data_dir}")
    files = sorted(glob.glob(os.path.join(data_dir, "*.nc")))
    if not files:
        raise FileNotFoundError(f"No .nc files found in {data_dir}")

    # If your files all have proper time coordinates, by_coords is safest
    ds = xr.open_mfdataset(
        files,
        combine="by_coords",
        parallel=False  # keep simple; can turn True if needed
    )

    if var_name not in ds:
        raise KeyError(f"Variable '{var_name}' not found in files under {data_dir}")

    da = ds[var_name].sortby("time")

    # Ensure we are working with an in-memory numpy array to avoid dask headaches
    da = da.load()

    print(f"‚úÖ Loaded {var_name}: shape {da.shape}, dims {da.dims}")
    return da


In [3]:
def compute_spei_grid(rain_da: xr.DataArray,
                      pet_da: xr.DataArray,
                      scale: int = 1) -> xr.DataArray:
    """
    Compute SPEI (with given timescale) for every grid cell using the `spei` package.
    - rain_da, pet_da: monthly (time, lat, lon)
    - scale: SPEI timescale in months (1 = SPEI-1)
    """
    print("üåßÔ∏è  Aligning rainfall and PET ...")
    rain_da, pet_da = xr.align(rain_da, pet_da, join="inner")

    # Ensure same dimension order
    rain_da = rain_da.transpose("time", "lat", "lon")
    pet_da  = pet_da.transpose("time", "lat", "lon")

    time_index = pd.to_datetime(rain_da["time"].values)
    ny = rain_da.sizes["lat"]
    nx = rain_da.sizes["lon"]
    nt = rain_da.sizes["time"]

    print(f"‚û°Ô∏è  Dimensions: time={nt}, lat={ny}, lon={nx}")

    # Water balance: P - PET
    surplus = (rain_da - pet_da)
    surplus_vals = surplus.values.reshape(nt, ny * nx)

    # Output array for SPEI
    spei_vals = np.full_like(surplus_vals, np.nan, dtype=float)

    n_cells = ny * nx
    print(f"üîÅ Computing SPEI for {n_cells} grid cells ...")

    for j in range(n_cells):
        ts = surplus_vals[:, j]
        # Skip cells that are completely NaN
        if np.all(np.isnan(ts)):
            continue

        series = pd.Series(ts, index=time_index)

        # Drop leading/trailing NaNs to help fitting
        series = series.dropna()
        if len(series) < 30:   # arbitrary safety check, adjust if needed
            continue

        try:
            # SPEI package: 1D time series ‚Üí SPEI time series
            # fisk = log-logistic distribution (standard choice for SPEI)
            spei_series = si.spei(
                series=series,
                dist=sps.fisk,
                timescale=scale
            )

            # Reindex back to full time_index
            spei_full = spei_series.reindex(time_index)
            spei_vals[:, j] = spei_full.values

        except Exception as e:
            # If it fails for this cell, just leave NaN
            # print(f"Cell {j} failed: {e}")
            continue

        # Progress every ~5%
        if (j + 1) % max(1, (n_cells // 20)) == 0 or j == n_cells - 1:
            print(f"  {((j + 1) / n_cells) * 100:5.1f}% done")

    # Reshape back to (time, lat, lon)
    spei_3d = spei_vals.reshape(nt, ny, nx)

    spei_da = xr.DataArray(
        spei_3d,
        coords={
            "time": time_index,
            "lat": rain_da["lat"],
            "lon": rain_da["lon"],
        },
        dims=("time", "lat", "lon"),
        name=f"spei_{scale}",
        attrs={
            "long_name": f"Standardized Precipitation Evapotranspiration Index (SPEI-{scale})",
            "units": "dimensionless",
            "timescale_months": scale,
            "method": "SPEI package (fisk/log-logistic distribution), surplus = P - PET"
        }
    )

    print("‚úÖ SPEI grid computation finished.")
    return spei_da


In [4]:
# === RUN PIPELINE ===

try:
    # 1. Load rainfall and PET
    rainfall_da = load_monthly_series(RAINFALL_DIR, RAIN_VAR)
    pet_da      = load_monthly_series(EVAPOTRANSPIRATION_DIR, PET_VAR)

    # 2. Compute SPEI-1 over the grid
    spei_da = compute_spei_grid(rainfall_da, pet_da, scale=SPEI_SCALE)

    # 3. Save yearly NetCDF files
    print(f"üíæ Saving yearly SPEI-{SPEI_SCALE} files to: {OUTPUT_DIR}")

    for year, da_year in spei_da.groupby("time.year"):
        year = int(year)
        out_path = os.path.join(OUTPUT_DIR, f"spei_{SPEI_SCALE}_{year}.nc")
        print(f"  ‚ûú {out_path}")
        da_year.to_netcdf(out_path)

    print("üéâ Done! SPEI files written for all years.")

except Exception as e:
    import traceback
    print("‚ùå Something went wrong:")
    print(e)
    traceback.print_exc()


üìÇ Loading monthly_rain from: Data\Climate Data\3. Tasmania_Monthly\Rainfall_tas_monthly
‚úÖ Loaded monthly_rain: shape (396, 84, 92), dims ('time', 'lat', 'lon')
üìÇ Loading et_short_crop from: Data\Climate Data\3. Tasmania_Monthly\ET_tas_monthly
‚úÖ Loaded et_short_crop: shape (396, 84, 92), dims ('time', 'lat', 'lon')
üåßÔ∏è  Aligning rainfall and PET ...
‚û°Ô∏è  Dimensions: time=396, lat=84, lon=92
üîÅ Computing SPEI for 7728 grid cells ...
   15.0% done
   20.0% done
   35.0% done
   40.0% done
   45.0% done
   49.9% done
   59.9% done
‚úÖ SPEI grid computation finished.
üíæ Saving yearly SPEI-1 files to: Data\Climate Data\3. Tasmania_Monthly\SPEI_tas_monthly
  ‚ûú Data\Climate Data\3. Tasmania_Monthly\SPEI_tas_monthly\spei_1_1991.nc
  ‚ûú Data\Climate Data\3. Tasmania_Monthly\SPEI_tas_monthly\spei_1_1992.nc
  ‚ûú Data\Climate Data\3. Tasmania_Monthly\SPEI_tas_monthly\spei_1_1993.nc
  ‚ûú Data\Climate Data\3. Tasmania_Monthly\SPEI_tas_monthly\spei_1_1994.nc
  ‚ûú Data\Climat