In [1]:
import xarray as xr
import glob
import numpy as np

# --------------------------------------------------
# 1. Locate all ABARES wheat files
# --------------------------------------------------
abres_files = sorted(glob.glob(
    "Data/raster/rounded_farm/*.nc"
))

print(f"Found {len(abres_files)} ABARES files")

# --------------------------------------------------
# 2. Open all files as a single Dataset
#    (stack along a new 'year' dimension)
# --------------------------------------------------
ds = xr.open_mfdataset(
    abres_files,
    combine="nested",
    concat_dim="year"
)

# --------------------------------------------------
# 3. Create wheat presence mask
#    Wheat exists if >0 in ANY year
# --------------------------------------------------
wheat_mask = (
    ds["H_wheat_dot_hat"]
    .fillna(0)
    .max(dim="year") > 0
)

# Convert boolean → int (1 = wheat, 0 = no wheat)
wheat_mask = wheat_mask.astype("int8")

# --------------------------------------------------
# 4. Create clean Dataset for mask
# --------------------------------------------------
mask_ds = xr.Dataset(
    {
        "wheat_mask": wheat_mask
    },
    coords={
        "lat": ds.lat,
        "lon": ds.lon
    }
)

# --------------------------------------------------
# 5. Quick sanity checks
# --------------------------------------------------
total_grids = wheat_mask.size
wheat_grids = int(wheat_mask.sum())

print(f"Total grid cells      : {total_grids}")
print(f"Wheat-capable grids   : {wheat_grids}")
print(f"Non-wheat grids       : {total_grids - wheat_grids}")

# --------------------------------------------------
# 6. Save mask to NetCDF
# --------------------------------------------------
mask_ds.to_netcdf("wheat_presence_mask.nc")

print("✅ Wheat presence mask saved as wheat_presence_mask.nc")


Found 33 ABARES files
Total grid cells      : 34080
Wheat-capable grids   : 7673
Non-wheat grids       : 26407
✅ Wheat presence mask saved as wheat_presence_mask.nc
