In [1]:
import xarray as xr

ds = xr.open_dataset(r"Data\Climate Data\4. Merged_Tas_Climate\Tas_Combined_Data.nc")
print(ds)


<xarray.Dataset> Size: 147MB
Dimensions:       (time: 396, lat: 84, lon: 92)
Coordinates:
  * time          (time) datetime64[ns] 3kB 1991-01-01 1991-02-01 ... 2023-12-01
  * lat           (lat) float64 672B -43.6 -43.55 -43.5 ... -39.55 -39.5 -39.45
  * lon           (lon) float64 736B 143.8 143.9 143.9 ... 148.3 148.3 148.4
Data variables:
    spatial_ref   (time) float64 3kB ...
    monthly_rain  (time, lat, lon) float64 24MB ...
    max_temp      (time, lat, lon) float64 24MB ...
    min_temp      (time, lat, lon) float64 24MB ...
    radiation     (time, lat, lon) float64 24MB ...
    spi_1         (time, lat, lon) float64 24MB ...
    spei_1        (time, lat, lon) float64 24MB ...
Attributes:
    department:               Department of Environment and Science
    department_short:         DES
    copyright:                Copyright - the State of Queensland Department ...
    site_url:                 http://www.longpaddock.qld.gov.au
    institution:              Queensland Gov

In [2]:
import xarray as xr
import numpy as np
import pandas as pd

ds = xr.open_dataset(r"Data\Climate Data\4. Merged_Tas_Climate\Tas_Combined_Data.nc")
print(ds.dims)
print(ds.data_vars)

# Create drought flag from spei_1
ds["drought"] = (ds["spei_1"] < -1.0).astype(np.int8)
ds["no_drought"] = (ds["spei_1"] >= -1.0).astype(np.int8)

# Stack to node dimension
ds_node = ds.stack(node=("lat", "lon")).dropna(dim="node", how="all")
print(ds_node)

# Check valid SPEI per node
spei_mat = ds_node["spei_1"].values
valid_counts = np.sum(np.isfinite(spei_mat), axis=0)
print("Min valid:", valid_counts.min())
print("Median valid:", np.median(valid_counts))
print("Max valid:", valid_counts.max())


Data variables:
    spatial_ref   (time) float64 3kB ...
    monthly_rain  (time, lat, lon) float64 24MB ...
    max_temp      (time, lat, lon) float64 24MB ...
    min_temp      (time, lat, lon) float64 24MB ...
    radiation     (time, lat, lon) float64 24MB ...
    spi_1         (time, lat, lon) float64 24MB ...
    spei_1        (time, lat, lon) float64 24MB ...
<xarray.Dataset> Size: 153MB
Dimensions:       (time: 396, node: 7728)
Coordinates:
  * time          (time) datetime64[ns] 3kB 1991-01-01 1991-02-01 ... 2023-12-01
  * node          (node) object 62kB MultiIndex
  * lat           (node) float64 62kB -43.6 -43.6 -43.6 ... -39.45 -39.45 -39.45
  * lon           (node) float64 62kB 143.8 143.9 143.9 ... 148.3 148.3 148.4
Data variables:
    spatial_ref   (time) float64 3kB ...
    monthly_rain  (time, node) float64 24MB nan nan nan nan ... nan nan nan nan
    max_temp      (time, node) float64 24MB nan nan nan nan ... nan nan nan nan
    min_temp      (time, node) float64 24M

In [3]:
ds.to_netcdf(r"Data\Climate Data\4. Merged_Tas_Climate\tas_monthly_climate_with_drought_flags.nc")

In [13]:
spei_mat = ds_node["spei_1"].values  # shape: (T, N)
T, N = spei_mat.shape
valid_counts = np.sum(np.isfinite(spei_mat), axis=0)

print("Time steps T:", T, "  Nodes N:", N)
print("Min valid per node:", valid_counts.min())
print("Median valid per node:", np.median(valid_counts))
print("Max valid per node:", valid_counts.max())


Time steps T: 396   Nodes N: 7728
Min valid per node: 0
Median valid per node: 0.0
Max valid per node: 396


In [4]:
print(ds)

<xarray.Dataset> Size: 153MB
Dimensions:       (time: 396, lat: 84, lon: 92)
Coordinates:
  * time          (time) datetime64[ns] 3kB 1991-01-01 1991-02-01 ... 2023-12-01
  * lat           (lat) float64 672B -43.6 -43.55 -43.5 ... -39.55 -39.5 -39.45
  * lon           (lon) float64 736B 143.8 143.9 143.9 ... 148.3 148.3 148.4
Data variables:
    spatial_ref   (time) float64 3kB ...
    monthly_rain  (time, lat, lon) float64 24MB nan nan nan nan ... nan nan nan
    max_temp      (time, lat, lon) float64 24MB nan nan nan nan ... nan nan nan
    min_temp      (time, lat, lon) float64 24MB nan nan nan nan ... nan nan nan
    radiation     (time, lat, lon) float64 24MB nan nan nan nan ... nan nan nan
    spi_1         (time, lat, lon) float64 24MB nan nan nan nan ... nan nan nan
    spei_1        (time, lat, lon) float64 24MB nan nan nan nan ... nan nan nan
    drought       (time, lat, lon) int8 3MB 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0
    no_drought    (time, lat, lon) int8 3MB 0 0 0 0 0 0