In [5]:
import xarray as xr

ds = xr.open_dataset("Data - Modelling\Monthly_DB\Variables\Max Temp\Monthly_QLD_2000.max_temp.nc")
print(ds)


<xarray.Dataset> Size: 12MB
Dimensions:      (time: 12, lat: 382, lon: 311)
Coordinates:
  * lat          (lat) float64 3kB -29.15 -29.1 -29.05 ... -10.2 -10.15 -10.1
  * lon          (lon) float64 2kB 138.0 138.1 138.1 138.2 ... 153.4 153.4 153.5
  * time         (time) datetime64[ns] 96B 2000-01-01 2000-02-01 ... 2000-12-01
Data variables:
    spatial_ref  (time) float64 96B ...
    max_temp     (time, lat, lon) float64 11MB ...
    region_code  (lat, lon) int16 238kB ...
Attributes:
    department:               Department of Environment and Science
    department_short:         DES
    copyright:                Copyright - the State of Queensland Department ...
    site_url:                 http://www.longpaddock.qld.gov.au
    institution:              Queensland Government, Department of Environmen...
    raster_source:            Gridded surface was created by interpolating ob...
    raster_source_additions:  and other suppliers (see the SILO webpage for d...
    metadata_url:  

  ds = xr.open_dataset("Data - Modelling\Monthly_DB\Variables\Max Temp\Monthly_QLD_2000.max_temp.nc")


In [11]:
unique_region_codes = np.unique(sample_file["region_code"].values)
print(f"Unique region codes: {unique_region_codes}")

Unique region codes: [-1  0  1  2  3  4  5  6  7  8  9 10 11 12]


In [1]:
import os
import re
import xarray as xr
import numpy as np

# === Paths ===
rainfall_folder = r"Data - Modelling/Monthly_DB/Variables/Rainfall"
maxtemp_folder = r"Data - Modelling/Monthly_DB/Variables/Max Temp"
mintemp_folder = r"Data - Modelling/Monthly_DB/Variables/Min Temp"
radiation_folder = r"Data - Modelling/Monthly_DB/Variables/Radiation"
spi_folder = r"Data - Modelling/Monthly_DB/Variables/SPI"
output_folder = r"Data - Modelling/Monthly_DB/Combined/Region_Wise"
os.makedirs(output_folder, exist_ok=True)

# === List all years from the rainfall folder ===
years = sorted([
    re.findall(r"\d{4}", f)[0] for f in os.listdir(rainfall_folder) if f.endswith(".nc")
])
print(f"‚úÖ Detected Years: {years}")

# === Extract unique region codes directly from the first file ===
sample_file = xr.open_dataset(os.path.join(rainfall_folder, f"Monthly_QLD_{years[0]}.monthly_rain.nc"))
region_codes = np.unique(sample_file["region_code"].values)
print(f"‚úÖ Found Region Codes: {region_codes}")

# === Combine variables for each region ===
for region in region_codes:
    print(f"\nüìÇ Processing region: {region}")
    combined_datasets = []

    for year in years:
        print(f"  üìÖ Year: {year}")
        
        try:
            # === Load each variable (already containing region_code) ===
            ds_rain = xr.open_dataset(os.path.join(rainfall_folder, f"Monthly_QLD_{year}.monthly_rain.nc"))
            ds_maxtemp = xr.open_dataset(os.path.join(maxtemp_folder, f"Monthly_QLD_{year}.max_temp.nc"))
            ds_mintemp = xr.open_dataset(os.path.join(mintemp_folder, f"Monthly_QLD_{year}.min_temp.nc"))
            ds_radiation = xr.open_dataset(os.path.join(radiation_folder, f"Monthly_QLD_{year}.radiation.nc"))
            ds_spi = xr.open_dataset(os.path.join(spi_folder, f"SPI_Monthly_QLD_{year}.monthly_rain.nc"))

            # === Validate region_code exists ===
            if "region_code" not in ds_rain:
                print(f"‚ùå Missing region_code in {year} file. Skipping.")
                continue

            # === Mask each variable using region code ===
            region_mask = (ds_rain["region_code"] == region)
            if not region_mask.any():
                print(f"‚ùå Region {region} not found in {year}. Skipping.")
                continue

            # === Combine and mask each variable ===
            ds_combined = xr.Dataset({
                "monthly_rain": ds_rain["monthly_rain"].where(region_mask),
                "max_temp": ds_maxtemp["max_temp"].where(region_mask),
                "min_temp": ds_mintemp["min_temp"].where(region_mask),
                "radiation": ds_radiation["radiation"].where(region_mask),
                "spi_1": ds_spi["spi_1"].where(region_mask),
                "region_code": ds_rain["region_code"].where(region_mask)
            })

            combined_datasets.append(ds_combined)

        except Exception as e:
            print(f"‚ùå Error processing {year} for region {region}: {e}")

    # === Concatenate all years for this region ===
    if combined_datasets:
        ds_region_combined = xr.concat(combined_datasets, dim="time")
        
        # === Save the combined region file ===
        region_file = os.path.join(output_folder, f"Combined_{region}.nc")
        ds_region_combined.to_netcdf(region_file)
        print(f"‚úÖ Saved region file: {region_file}")
    else:
        print(f"‚ùå No valid data for region {region}. Skipping.")


‚úÖ Detected Years: ['2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024']
‚úÖ Found Region Codes: [-1  0  1  2  3  4  5  6  7  8  9 10 11 12]

üìÇ Processing region: -1
  üìÖ Year: 2000
  üìÖ Year: 2001
  üìÖ Year: 2002
  üìÖ Year: 2003
  üìÖ Year: 2004
  üìÖ Year: 2005
  üìÖ Year: 2006
  üìÖ Year: 2007
  üìÖ Year: 2008
  üìÖ Year: 2009
  üìÖ Year: 2010
  üìÖ Year: 2011
  üìÖ Year: 2012
  üìÖ Year: 2013
  üìÖ Year: 2014
  üìÖ Year: 2015
  üìÖ Year: 2016
  üìÖ Year: 2017
  üìÖ Year: 2018
  üìÖ Year: 2019
  üìÖ Year: 2020
  üìÖ Year: 2021
  üìÖ Year: 2022
  üìÖ Year: 2023
  üìÖ Year: 2024
‚úÖ Saved region file: Data - Modelling/Monthly_DB/Combined/Region_Wise\Combined_-1.nc

üìÇ Processing region: 0
  üìÖ Year: 2000
  üìÖ Year: 2001
  üìÖ Year: 2002
  üìÖ Year: 2003
  üìÖ Year: 2004
  üìÖ Year: 2005
  üìÖ Year

KeyboardInterrupt: 

In [12]:
import os
import re
import xarray as xr
import numpy as np
import rioxarray

# === Paths ===
rainfall_folder = r"Data - Modelling/Monthly_DB/Variables/Rainfall"
maxtemp_folder = r"Data - Modelling/Monthly_DB/Variables/Max Temp"
mintemp_folder = r"Data - Modelling/Monthly_DB/Variables/Min Temp"
radiation_folder = r"Data - Modelling/Monthly_DB/Variables/Radiation"
spi_folder = r"Data - Modelling/Monthly_DB/Variables/SPI"
output_folder = r"Data - Modelling/Monthly_DB/Combined/Region_Wise"
os.makedirs(output_folder, exist_ok=True)

# === List all years from the rainfall folder ===
years = sorted([
    re.findall(r"\d{4}", f)[0] for f in os.listdir(rainfall_folder) if f.endswith(".nc")
])
print(f"‚úÖ Detected Years: {years}")

# === Extract unique region codes directly from the first file ===
sample_file = xr.open_dataset(os.path.join(rainfall_folder, f"Monthly_QLD_{years[0]}.monthly_rain.nc"))
region_codes = np.unique(sample_file["region_code"].values)
print(f"‚úÖ Found Region Codes: {region_codes}")

# === Combine and Crop Variables for Each Region ===
for region in region_codes:
    print(f"\nüìÇ Processing region: {region}")
    combined_datasets = []

    for year in years:
        print(f"  üìÖ Year: {year}")
        
        try:
            # === Load each variable (already containing region_code) ===
            ds_rain = xr.open_dataset(os.path.join(rainfall_folder, f"Monthly_QLD_{year}.monthly_rain.nc"))
            ds_maxtemp = xr.open_dataset(os.path.join(maxtemp_folder, f"Monthly_QLD_{year}.max_temp.nc"))
            ds_mintemp = xr.open_dataset(os.path.join(mintemp_folder, f"Monthly_QLD_{year}.min_temp.nc"))
            ds_radiation = xr.open_dataset(os.path.join(radiation_folder, f"Monthly_QLD_{year}.radiation.nc"))
            ds_spi = xr.open_dataset(os.path.join(spi_folder, f"SPI_Monthly_QLD_{year}.monthly_rain.nc"))

            # === Validate region_code exists ===
            if "region_code" not in ds_rain:
                print(f"‚ùå Missing region_code in {year} file. Skipping.")
                continue

            # === Mask each variable using region code ===
            region_mask = (ds_rain["region_code"] == region)
            if not region_mask.any():
                print(f"‚ùå Region {region} not found in {year}. Skipping.")
                continue

            # === Combine and mask each variable ===
            ds_combined = xr.Dataset({
                "monthly_rain": ds_rain["monthly_rain"].where(region_mask),
                "max_temp": ds_maxtemp["max_temp"].where(region_mask),
                "min_temp": ds_mintemp["min_temp"].where(region_mask),
                "radiation": ds_radiation["radiation"].where(region_mask),
                "spi_1": ds_spi["spi_1"].where(region_mask),
                "region_code": ds_rain["region_code"].where(region_mask)
            })

            # === Crop to the region's bounding box (remove NaNs) ===
            ds_combined = ds_combined.rio.write_crs("EPSG:4326")
            ds_cropped = ds_combined.rio.clip_box(
                minx=ds_combined.lon.min().item(),
                miny=ds_combined.lat.min().item(),
                maxx=ds_combined.lon.max().item(),
                maxy=ds_combined.lat.max().item()
            )

            combined_datasets.append(ds_cropped)

        except Exception as e:
            print(f"‚ùå Error processing {year} for region {region}: {e}")

    # === Concatenate all years for this region ===
    if combined_datasets:
        ds_region_combined = xr.concat(combined_datasets, dim="time")
        
        # === Save the cropped and optimized region file with compression ===
        region_file = os.path.join(output_folder, f"Combined_{region}.nc")
        ds_region_combined.to_netcdf(
            region_file,
            encoding={var: {"zlib": True, "complevel": 4} for var in ds_region_combined.data_vars}
        )
        print(f"‚úÖ Saved optimized region file: {region_file}")
    else:
        print(f"‚ùå No valid data for region {region}. Skipping.")


‚úÖ Detected Years: ['2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024']
‚úÖ Found Region Codes: [-1  0  1  2  3  4  5  6  7  8  9 10 11 12]

üìÇ Processing region: -1
  üìÖ Year: 2000
  üìÖ Year: 2001
  üìÖ Year: 2002
  üìÖ Year: 2003
  üìÖ Year: 2004
  üìÖ Year: 2005
  üìÖ Year: 2006
  üìÖ Year: 2007
  üìÖ Year: 2008
  üìÖ Year: 2009
  üìÖ Year: 2010
  üìÖ Year: 2011
  üìÖ Year: 2012
  üìÖ Year: 2013
  üìÖ Year: 2014
  üìÖ Year: 2015
  üìÖ Year: 2016
  üìÖ Year: 2017
  üìÖ Year: 2018
  üìÖ Year: 2019
  üìÖ Year: 2020
  üìÖ Year: 2021
  üìÖ Year: 2022
  üìÖ Year: 2023
  üìÖ Year: 2024
‚úÖ Saved optimized region file: Data - Modelling/Monthly_DB/Combined/Region_Wise\Combined_-1.nc

üìÇ Processing region: 0
  üìÖ Year: 2000
  üìÖ Year: 2001
  üìÖ Year: 2002
  üìÖ Year: 2003
  üìÖ Year: 2004
  üìÖ Year: 2005
 

MemoryError: Unable to allocate 272. MiB for an array with shape (300, 382, 311) and data type float64

In [3]:
import xarray as xr

ds = xr.open_dataset(r"Data - Modelling\Annual_DB\2. Reg_Cropped\Max Temp\BRB_cropped_2000.max_temp.nc")
print(ds)

<xarray.Dataset> Size: 74MB
Dimensions:      (lat: 197, lon: 128, time: 366)
Coordinates:
  * lat          (lat) float64 2kB -28.95 -28.9 -28.85 ... -19.25 -19.2 -19.15
  * lon          (lon) float64 1kB 146.0 146.1 146.1 146.2 ... 152.2 152.3 152.3
  * time         (time) datetime64[ns] 3kB 2000-01-01 2000-01-02 ... 2000-12-31
Data variables:
    spatial_ref  int64 8B ...
    max_temp     (time, lat, lon) float64 74MB ...
    crs          |S1 1B ...
Attributes:
    department:               Department of Environment and Science
    department_short:         DES
    copyright:                Copyright - the State of Queensland Department ...
    site_url:                 http://www.longpaddock.qld.gov.au
    institution:              Queensland Government, Department of Environmen...
    raster_source:            Gridded surface was created by interpolating ob...
    raster_source_additions:  and other suppliers (see the SILO webpage for d...
    metadata_url:             http://qldspa