In [14]:
import xarray as xr
import os

year = "1991"

rainfall_folder = r"Data\Climate Data\3. Tasmania_Monthly\Rainfall_tas_monthly"
maxtemp_folder  = r"Data\Climate Data\3. Tasmania_Monthly\Temp_max_tas_monthly"
mintemp_folder  = r"Data\Climate Data\3. Tasmania_Monthly\Temp_min_tas_monthly"
radiation_folder= r"Data\Climate Data\3. Tasmania_Monthly\Radiation_tas_monthly"
spi_folder      = r"Data\Climate Data\3. Tasmania_Monthly\SPI_tas_monthly"
spei_folder     = r"Data\Climate Data\3. Tasmania_Monthly\SPEI_tas_monthly"

ds_rain = xr.open_dataset(os.path.join(rainfall_folder,  f"{year}.monthly_rain_cropped_monthly.nc"))
ds_maxt = xr.open_dataset(os.path.join(maxtemp_folder,   f"{year}.max_temp_cropped_monthly.nc"))
ds_mint = xr.open_dataset(os.path.join(mintemp_folder,   f"{year}.min_temp_cropped_monthly.nc"))
ds_rad  = xr.open_dataset(os.path.join(radiation_folder, f"{year}.radiation_cropped_monthly.nc"))
ds_spi  = xr.open_dataset(os.path.join(spi_folder,      f"{year}_spi.nc"))
ds_spei = xr.open_dataset(os.path.join(spei_folder,     f"spei_1_{year}.nc"))

print("RAIN:", ds_rain)
print("MAXT:", ds_maxt)
print("MINT:", ds_mint)
print("RAD :", ds_rad)
print("SPI :", ds_spi)
print("SPEI:", ds_spei)

for name, ds_var, vname in [
    ("rain",   ds_rain, "monthly_rain"),
    ("maxt",   ds_maxt, "max_temp"),
    ("mint",   ds_mint, "min_temp"),
    ("rad",    ds_rad,  "radiation"),
    ("spi",    ds_spi,  "spi_1"),
    ("spei",   ds_spei, "spei_1"),
]:
    arr = ds_var[vname]
    print(
        f"{name}: shape={arr.shape}, NaN={int(arr.isnull().sum())}, total={arr.size}"
    )


RAIN: <xarray.Dataset> Size: 743kB
Dimensions:       (time: 12, lat: 84, lon: 92)
Coordinates:
  * time          (time) datetime64[ns] 96B 1991-01-01 1991-02-01 ... 1991-12-01
  * lat           (lat) float64 672B -43.6 -43.55 -43.5 ... -39.55 -39.5 -39.45
  * lon           (lon) float64 736B 143.8 143.9 143.9 ... 148.3 148.3 148.4
Data variables:
    spatial_ref   (time) float64 96B ...
    monthly_rain  (time, lat, lon) float64 742kB ...
Attributes:
    department:               Department of Environment and Science
    department_short:         DES
    copyright:                Copyright - the State of Queensland Department ...
    site_url:                 http://www.longpaddock.qld.gov.au
    institution:              Queensland Government, Department of Environmen...
    raster_source:            Gridded surface was created by interpolating ob...
    raster_source_additions:  and other suppliers (see the SILO webpage for d...
    metadata_url:             http://qldspatial.informa

In [15]:
import os
import re
import xarray as xr

# === Paths ===
rainfall_folder   = r"Data\Climate Data\3. Tasmania_Monthly\Rainfall_tas_monthly"
maxtemp_folder    = r"Data\Climate Data\3. Tasmania_Monthly\Temp_max_tas_monthly"
mintemp_folder    = r"Data\Climate Data\3. Tasmania_Monthly\Temp_min_tas_monthly"
radiation_folder  = r"Data\Climate Data\3. Tasmania_Monthly\Radiation_tas_monthly"
spi_folder        = r"Data\Climate Data\3. Tasmania_Monthly\SPI_tas_monthly"
spei_folder       = r"Data\Climate Data\3. Tasmania_Monthly\SPEI_tas_monthly"

output_file = r"Data\Climate Data\4. Merged_Tas_Climate\Tas_Combined_Data.nc"

# === Get all years from rainfall folder ===
years = sorted([
    re.search(r"\d{4}", f).group()
    for f in os.listdir(rainfall_folder)
    if f.endswith(".nc") and re.search(r"\d{4}", f)
])

print("Years found:", years)

yearly_datasets = []

for year in years:
    print(f"\nðŸ“‚ Processing year: {year}")

    ds_rain = xr.open_dataset(os.path.join(
        rainfall_folder, f"{year}.monthly_rain_cropped_monthly.nc"
    ))
    ds_maxt = xr.open_dataset(os.path.join(
        maxtemp_folder, f"{year}.max_temp_cropped_monthly.nc"
    ))
    ds_mint = xr.open_dataset(os.path.join(
        mintemp_folder, f"{year}.min_temp_cropped_monthly.nc"
    ))
    ds_rad  = xr.open_dataset(os.path.join(
        radiation_folder, f"{year}.radiation_cropped_monthly.nc"
    ))
    ds_spi  = xr.open_dataset(os.path.join(
        spi_folder, f"{year}_spi.nc"
    ))
    ds_spei = xr.open_dataset(os.path.join(
        spei_folder, f"spei_1_{year}.nc"
    ))

    # ðŸ”´ OLD (problematic) way:
    # ds_combined = xr.merge([
    #     ds_rain["monthly_rain"],
    #     ds_maxt["max_temp"],
    #     ds_mint["min_temp"],
    #     ds_rad["radiation"],
    #     ds_spi["spi_1"],
    #     ds_spei["spei_1"],
    # ])

    # ðŸŸ¢ NEW (correct) way: merge FULL datasets by common coords
    ds_year = xr.merge(
        [ds_rain, ds_maxt, ds_mint, ds_rad, ds_spi, ds_spei],
        join="inner"  # keep only coords that exist in all datasets
    )

    # Quick NaN check per year
    for v in ["monthly_rain", "max_temp", "min_temp", "radiation", "spi_1", "spei_1"]:
        arr = ds_year[v]
        print(
            f"{year} {v}: NaN={int(arr.isnull().sum())}, total={arr.size}"
        )

    yearly_datasets.append(ds_year)

# === Concatenate all years by time ===
ds_combined = xr.concat(yearly_datasets, dim="time")
print("\nâœ… Combined dims:", ds_combined.dims)

# Global NaN check across all years
for v in ["monthly_rain", "max_temp", "min_temp", "radiation", "spi_1", "spei_1"]:
    arr = ds_combined[v]
    print(
        f"ALL YEARS {v}: NaN={int(arr.isnull().sum())}, total={arr.size}"
    )

ds_combined.to_netcdf(output_file)
print(f"\nâœ… Saved combined dataset: {output_file}")


Years found: ['1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023']

ðŸ“‚ Processing year: 1991
1991 monthly_rain: NaN=57324, total=92736
1991 max_temp: NaN=57324, total=92736
1991 min_temp: NaN=57324, total=92736
1991 radiation: NaN=57324, total=92736
1991 spi_1: NaN=57324, total=92736
1991 spei_1: NaN=57324, total=92736

ðŸ“‚ Processing year: 1992
1992 monthly_rain: NaN=57324, total=92736
1992 max_temp: NaN=57324, total=92736
1992 min_temp: NaN=57324, total=92736
1992 radiation: NaN=57324, total=92736
1992 spi_1: NaN=57324, total=92736
1992 spei_1: NaN=57324, total=92736

ðŸ“‚ Processing year: 1993
1993 monthly_rain: NaN=57324, total=92736
1993 max_temp: NaN=57324, total=92736
1993 min_temp: NaN=57324, total=92736
1993 radiation: NaN=57324, total=92736
1993 spi_1: NaN=57324, to

  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(


1996 monthly_rain: NaN=57324, total=92736
1996 max_temp: NaN=57324, total=92736
1996 min_temp: NaN=57324, total=92736
1996 radiation: NaN=57324, total=92736
1996 spi_1: NaN=57324, total=92736
1996 spei_1: NaN=57324, total=92736

ðŸ“‚ Processing year: 1997
1997 monthly_rain: NaN=57324, total=92736
1997 max_temp: NaN=57324, total=92736
1997 min_temp: NaN=57324, total=92736
1997 radiation: NaN=57324, total=92736
1997 spi_1: NaN=57324, total=92736
1997 spei_1: NaN=57324, total=92736

ðŸ“‚ Processing year: 1998
1998 monthly_rain: NaN=57324, total=92736
1998 max_temp: NaN=57324, total=92736
1998 min_temp: NaN=57324, total=92736
1998 radiation: NaN=57324, total=92736
1998 spi_1: NaN=57324, total=92736
1998 spei_1: NaN=57324, total=92736

ðŸ“‚ Processing year: 1999
1999 monthly_rain: NaN=57324, total=92736
1999 max_temp: NaN=57324, total=92736
1999 min_temp: NaN=57324, total=92736
1999 radiation: NaN=57324, total=92736
1999 spi_1: NaN=57324, total=92736
1999 spei_1: NaN=57324, total=92736

ðŸ“

  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(


2001 monthly_rain: NaN=57324, total=92736
2001 max_temp: NaN=57324, total=92736
2001 min_temp: NaN=57324, total=92736
2001 radiation: NaN=57324, total=92736
2001 spi_1: NaN=57324, total=92736
2001 spei_1: NaN=57324, total=92736

ðŸ“‚ Processing year: 2002
2002 monthly_rain: NaN=57324, total=92736
2002 max_temp: NaN=57324, total=92736
2002 min_temp: NaN=57324, total=92736
2002 radiation: NaN=57324, total=92736
2002 spi_1: NaN=57324, total=92736
2002 spei_1: NaN=57324, total=92736

ðŸ“‚ Processing year: 2003
2003 monthly_rain: NaN=57324, total=92736
2003 max_temp: NaN=57324, total=92736
2003 min_temp: NaN=57324, total=92736
2003 radiation: NaN=57324, total=92736
2003 spi_1: NaN=57324, total=92736
2003 spei_1: NaN=57324, total=92736

ðŸ“‚ Processing year: 2004
2004 monthly_rain: NaN=57324, total=92736
2004 max_temp: NaN=57324, total=92736
2004 min_temp: NaN=57324, total=92736
2004 radiation: NaN=57324, total=92736
2004 spi_1: NaN=57324, total=92736
2004 spei_1: NaN=57324, total=92736

ðŸ“

  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(


2005 monthly_rain: NaN=57324, total=92736
2005 max_temp: NaN=57324, total=92736
2005 min_temp: NaN=57324, total=92736
2005 radiation: NaN=57324, total=92736
2005 spi_1: NaN=57324, total=92736
2005 spei_1: NaN=57324, total=92736

ðŸ“‚ Processing year: 2006
2006 monthly_rain: NaN=57324, total=92736
2006 max_temp: NaN=57324, total=92736
2006 min_temp: NaN=57324, total=92736
2006 radiation: NaN=57324, total=92736
2006 spi_1: NaN=57324, total=92736
2006 spei_1: NaN=57324, total=92736

ðŸ“‚ Processing year: 2007
2007 monthly_rain: NaN=57324, total=92736
2007 max_temp: NaN=57324, total=92736
2007 min_temp: NaN=57324, total=92736
2007 radiation: NaN=57324, total=92736
2007 spi_1: NaN=57324, total=92736
2007 spei_1: NaN=57324, total=92736

ðŸ“‚ Processing year: 2008
2008 monthly_rain: NaN=57324, total=92736
2008 max_temp: NaN=57324, total=92736
2008 min_temp: NaN=57324, total=92736
2008 radiation: NaN=57324, total=92736
2008 spi_1: NaN=57324, total=92736
2008 spei_1: NaN=57324, total=92736

ðŸ“

  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(


2011 monthly_rain: NaN=57324, total=92736
2011 max_temp: NaN=57324, total=92736
2011 min_temp: NaN=57324, total=92736
2011 radiation: NaN=57324, total=92736
2011 spi_1: NaN=57324, total=92736
2011 spei_1: NaN=57324, total=92736

ðŸ“‚ Processing year: 2012
2012 monthly_rain: NaN=57324, total=92736
2012 max_temp: NaN=57324, total=92736
2012 min_temp: NaN=57324, total=92736
2012 radiation: NaN=57324, total=92736
2012 spi_1: NaN=57324, total=92736
2012 spei_1: NaN=57324, total=92736

ðŸ“‚ Processing year: 2013
2013 monthly_rain: NaN=57324, total=92736
2013 max_temp: NaN=57324, total=92736
2013 min_temp: NaN=57324, total=92736
2013 radiation: NaN=57324, total=92736
2013 spi_1: NaN=57324, total=92736
2013 spei_1: NaN=57324, total=92736

ðŸ“‚ Processing year: 2014
2014 monthly_rain: NaN=57324, total=92736
2014 max_temp: NaN=57324, total=92736
2014 min_temp: NaN=57324, total=92736
2014 radiation: NaN=57324, total=92736
2014 spi_1: NaN=57324, total=92736
2014 spei_1: NaN=57324, total=92736

ðŸ“

  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(


2016 monthly_rain: NaN=57324, total=92736
2016 max_temp: NaN=57324, total=92736
2016 min_temp: NaN=57324, total=92736
2016 radiation: NaN=57324, total=92736
2016 spi_1: NaN=57324, total=92736
2016 spei_1: NaN=57324, total=92736

ðŸ“‚ Processing year: 2017
2017 monthly_rain: NaN=57324, total=92736
2017 max_temp: NaN=57324, total=92736
2017 min_temp: NaN=57324, total=92736
2017 radiation: NaN=57324, total=92736
2017 spi_1: NaN=57324, total=92736
2017 spei_1: NaN=57324, total=92736

ðŸ“‚ Processing year: 2018
2018 monthly_rain: NaN=57324, total=92736
2018 max_temp: NaN=57324, total=92736
2018 min_temp: NaN=57324, total=92736
2018 radiation: NaN=57324, total=92736
2018 spi_1: NaN=57324, total=92736
2018 spei_1: NaN=57324, total=92736

ðŸ“‚ Processing year: 2019
2019 monthly_rain: NaN=57324, total=92736
2019 max_temp: NaN=57324, total=92736
2019 min_temp: NaN=57324, total=92736
2019 radiation: NaN=57324, total=92736
2019 spi_1: NaN=57324, total=92736
2019 spei_1: NaN=57324, total=92736

ðŸ“

  ds_year = xr.merge(
  ds_year = xr.merge(
  ds_year = xr.merge(


2021 monthly_rain: NaN=57324, total=92736
2021 max_temp: NaN=57324, total=92736
2021 min_temp: NaN=57324, total=92736
2021 radiation: NaN=57324, total=92736
2021 spi_1: NaN=57324, total=92736
2021 spei_1: NaN=57324, total=92736

ðŸ“‚ Processing year: 2022
2022 monthly_rain: NaN=57324, total=92736
2022 max_temp: NaN=57324, total=92736
2022 min_temp: NaN=57324, total=92736
2022 radiation: NaN=57324, total=92736
2022 spi_1: NaN=57324, total=92736
2022 spei_1: NaN=57324, total=92736

ðŸ“‚ Processing year: 2023
2023 monthly_rain: NaN=57324, total=92736
2023 max_temp: NaN=57324, total=92736
2023 min_temp: NaN=57324, total=92736
2023 radiation: NaN=57324, total=92736
2023 spi_1: NaN=57324, total=92736
2023 spei_1: NaN=57324, total=92736

ALL YEARS monthly_rain: NaN=1891692, total=3060288
ALL YEARS max_temp: NaN=1891692, total=3060288
ALL YEARS min_temp: NaN=1891692, total=3060288
ALL YEARS radiation: NaN=1891692, total=3060288
ALL YEARS spi_1: NaN=1891692, total=3060288
ALL YEARS spei_1: NaN=

In [None]:
import xarray as xr
import numpy as np
import pandas as pd

ds = xr.open_dataset(r"Data\Climate Data\4. Merged_Tas_Climate\Tas_Combined_Data.nc")
print(ds.dims)
print(ds.data_vars)

# Create drought flag from spei_1
ds["drought"] = (ds["spei_1"] < -1.0).astype(np.int8)
ds["no_drought"] = (ds["spei_1"] >= -1.0).astype(np.int8)

# Stack to node dimension
ds_node = ds.stack(node=("lat", "lon")).dropna(dim="node", how="all")
print(ds_node)

# Check valid SPEI per node
spei_mat = ds_node["spei_1"].values
valid_counts = np.sum(np.isfinite(spei_mat), axis=0)
print("Min valid:", valid_counts.min())
print("Median valid:", np.median(valid_counts))
print("Max valid:", valid_counts.max())


In [16]:
import xarray as xr

ds = xr.open_dataset(r"Data\Climate Data\4. Merged_Tas_Climate\Tas_Combined_Data.nc")
print(ds)

<xarray.Dataset> Size: 147MB
Dimensions:       (time: 396, lat: 84, lon: 92)
Coordinates:
  * time          (time) datetime64[ns] 3kB 1991-01-01 1991-02-01 ... 2023-12-01
  * lat           (lat) float64 672B -43.6 -43.55 -43.5 ... -39.55 -39.5 -39.45
  * lon           (lon) float64 736B 143.8 143.9 143.9 ... 148.3 148.3 148.4
Data variables:
    spatial_ref   (time) float64 3kB ...
    monthly_rain  (time, lat, lon) float64 24MB ...
    max_temp      (time, lat, lon) float64 24MB ...
    min_temp      (time, lat, lon) float64 24MB ...
    radiation     (time, lat, lon) float64 24MB ...
    spi_1         (time, lat, lon) float64 24MB ...
    spei_1        (time, lat, lon) float64 24MB ...
Attributes:
    department:               Department of Environment and Science
    department_short:         DES
    copyright:                Copyright - the State of Queensland Department ...
    site_url:                 http://www.longpaddock.qld.gov.au
    institution:              Queensland Gov

In [None]:
ds_combined = xr.open_dataset(output_file)
 
for v in ds_combined.data_vars:
    print(v, "NaN:", int(ds_combined[v].isnull().sum()), "of", ds_combined[v].size)


spatial_ref NaN: 0 of 396
monthly_rain NaN: 1891692 of 3060288
max_temp NaN: 1891692 of 3060288
min_temp NaN: 1891692 of 3060288
radiation NaN: 1891692 of 3060288
spi_1 NaN: 1891692 of 3060288
spei_1 NaN: 1891692 of 3060288


In [18]:
import xarray as xr

ds_rain = xr.open_dataset(r"Data\Climate Data\3. Tasmania_Monthly\SPEI_tas_monthly\spei_1_1999.nc")

for v in ds_rain.data_vars:
    print(v, "NaN:", int(ds_rain[v].isnull().sum()), "of", ds_rain[v].size)


spei_1 NaN: 57324 of 92736
