In [3]:
import os
import xarray as xr

ds = xr.open_dataset(r"Data\Farm Data\1_Aus_Farm_Data\Aus_farm_data_1991.nc")
print(ds.data_vars)

Data variables:
    farmno                 (lat, lon) float64 5MB ...
    R_total_hat_ha         (lat, lon) float64 5MB ...
    C_total_hat_ha         (lat, lon) float64 5MB ...
    FBP_fci_hat_ha         (lat, lon) float64 5MB ...
    FBP_fbp_hat_ha         (lat, lon) float64 5MB ...
    A_wheat_hat_ha         (lat, lon) float64 5MB ...
    H_wheat_dot_hat        (lat, lon) float64 5MB ...
    A_barley_hat_ha        (lat, lon) float64 5MB ...
    H_barley_dot_hat       (lat, lon) float64 5MB ...
    A_sorghum_hat_ha       (lat, lon) float64 5MB ...
    H_sorghum_dot_hat      (lat, lon) float64 5MB ...
    A_oilseeds_hat_ha      (lat, lon) float64 5MB ...
    H_oilseeds_dot_hat     (lat, lon) float64 5MB ...
    R_wheat_hat_ha         (lat, lon) float64 5MB ...
    R_sorghum_hat_ha       (lat, lon) float64 5MB ...
    R_oilseeds_hat_ha      (lat, lon) float64 5MB ...
    R_barley_hat_ha        (lat, lon) float64 5MB ...
    Q_wheat_hat_ha         (lat, lon) float64 5MB ...
    Q_barley

In [None]:
# ==== 1. SET YOUR FOLDERS HERE ====
input_folder = r"Data\Farm Data\1_Aus_Farm_Data"      # folder containing original .nc files
output_folder = r"Data\Farm Data\2_Cleaned_Aus_Farm_Data"  # folder to save reduced .nc files

os.makedirs(output_folder, exist_ok=True)

# ==== 2. VARIABLES TO KEEP ====
vars_to_keep = ["farmland_per_cell", "A_wheat_hat_ha", "H_wheat_dot_hat"]

# ==== 3. LOOP THROUGH ALL .nc FILES ====
for fname in os.listdir(input_folder):
    if not fname.endswith(".nc"):
        continue  # skip non-netcdf files

    in_path = os.path.join(input_folder, fname)
    out_path = os.path.join(output_folder, fname)

    print(f"Processing: {fname}")

    # Open dataset
    ds = xr.open_dataset(in_path)

    # Check that required variables exist
    missing = [v for v in vars_to_keep if v not in ds.data_vars]
    if missing:
        print(f"  ⚠ Skipping {fname} – missing variables: {missing}")
        ds.close()
        continue

    # Select only desired data variables
    # This keeps ALL coordinates (lat, lon, etc.) and attributes by default
    ds_small = ds[vars_to_keep]

    # Optionally copy global attributes (usually already preserved)
    ds_small.attrs = ds.attrs

    # Save to new NetCDF
    ds_small.to_netcdf(out_path)

    ds.close()
    ds_small.close()

    print(f"  ✔ Saved reduced file to: {out_path}")

print("Done!")


Processing: Aus_farm_data_1991.nc
  ✔ Saved reduced file to: Data\Farm Data\2_Cleaned_Aus_Farm_Data\Aus_farm_data_1991.nc
Processing: Aus_farm_data_1992.nc
  ✔ Saved reduced file to: Data\Farm Data\2_Cleaned_Aus_Farm_Data\Aus_farm_data_1992.nc
Processing: Aus_farm_data_1993.nc
  ✔ Saved reduced file to: Data\Farm Data\2_Cleaned_Aus_Farm_Data\Aus_farm_data_1993.nc
Processing: Aus_farm_data_1994.nc
  ✔ Saved reduced file to: Data\Farm Data\2_Cleaned_Aus_Farm_Data\Aus_farm_data_1994.nc
Processing: Aus_farm_data_1995.nc
  ✔ Saved reduced file to: Data\Farm Data\2_Cleaned_Aus_Farm_Data\Aus_farm_data_1995.nc
Processing: Aus_farm_data_1996.nc
  ✔ Saved reduced file to: Data\Farm Data\2_Cleaned_Aus_Farm_Data\Aus_farm_data_1996.nc
Processing: Aus_farm_data_1997.nc
  ✔ Saved reduced file to: Data\Farm Data\2_Cleaned_Aus_Farm_Data\Aus_farm_data_1997.nc
Processing: Aus_farm_data_1998.nc
  ✔ Saved reduced file to: Data\Farm Data\2_Cleaned_Aus_Farm_Data\Aus_farm_data_1998.nc
Processing: Aus_farm_dat

In [3]:
ds1 = xr.open_dataset(r"Data\Farm Data\2_Cleaned_Aus_Farm_Data\aus_farm_data_2000.nc")
print(ds1)

<xarray.Dataset> Size: 7MB
Dimensions:            (lat: 691, lon: 886)
Coordinates:
  * lat                (lat) float64 6kB -10.0 -10.05 -10.1 ... -44.45 -44.5
  * lon                (lon) float64 7kB 112.0 112.1 112.1 ... 156.1 156.2 156.2
Data variables:
    farmland_per_cell  (lat, lon) float32 2MB ...
    H_wheat_dot_hat    (lat, lon) float64 5MB ...
Attributes:
    fyear:    2022
    cyear:    2000
    pyear:    2000
    tyear:    2022
