Extract test data from a DAO NetCDF file

In [None]:
import numpy as np
import xarray as xr

In [None]:
ds = xr.open_dataset("dao.80_93.nc")

In [None]:
if ds.coords["level"].isnull().all():
    print("Fixing broken pressure level data")
    levels = [1000., 950., 900.,850., 700., 500., 300., 200.]
    ds.coords["level"] = xr.DataArray(
        levels, dims=["level"], coords={"level": levels}, attrs={"units": "hPa"}
    )

In [None]:
# make sure lat runs from south to north
if not ds['lat'].to_index().is_monotonic_increasing:
    print("flipping lat")
    ds = ds.sortby("lat", ascending=True)

In [None]:
# make sure lon runs from west to east
if not ds['lon'].to_index().is_monotonic_increasing:
    print("flipping lon")
    ds = ds.sortby("lon", ascending=True)

In [None]:
# get a spatial subset 4°N–16°S, 50°–76°W
ds = ds.sel(lat=slice(-16, 4), lon=slice(-76, -50))

In [None]:
# make sure that the order of the dimensions is (lon, lat, ...) for all variables
ds = ds.transpose("lon", "lat", ...)

In [None]:
# grab the first time step
# should be Jan 1980
ds = ds.isel(time=0, drop=True)

In [None]:
def nan_trapz(a: np.ndarray, x: np.ndarray):
    mask = ~np.isnan(a)  # real values
    return np.trapezoid(y = a[mask], x = x[mask])

def integrator(a: np.ndarray, axis: int, x: np.ndarray) -> np.ndarray:
    """
    Apply the trapezium rule for 1D integration, dropping NaNs.

    Integrate y dx,
    where y is the dependent variable (given by an axis of a),
    and x is the independent variable (sample_points).

    Arguments:
        a: ND array to integrate
        axis: axis of a to integrate over
        x: 1D array of sample points

    Returns:
        N-1 dimensional array
    """
    return np.apply_along_axis(
        func1d=nan_trapz,
        axis=axis,
        arr=a,
        x=x,
    )

In [None]:
# Integrate 10^-3 Shum Uwnd dp
# The input dataset has NaNs where pressure levels correspond to heights below ground level.
# Because the integration limits are from high pressure to low pressure, we need to invert the sign.
da = -1 * 1e-3 * ds["Shum"] * ds["Uwnd"]
Fx = da.reduce(integrator, dim="level", x=ds.coords["level"].values)
# Units: mb x m/s

In [None]:
# Integrate 10^-3 Shum Vwnd dp
# The input dataset has NaNs where pressure levels correspond to heights below ground level.
# Because the integration limits are from high pressure to low pressure, we need to invert the sign.
da = -1 * 1e-3 * ds["Shum"] * ds["Vwnd"]
Fy = da.reduce(integrator, dim="level", x=ds.coords["level"].values)
# Units: mb x m/s

In [None]:
# save to disk
np.savez_compressed(
    "unit_test_data.npz",
    lat=ds["lat"].values,
    lon=ds["lon"].values,
    Fx=Fx.values,
    Fy=Fy.values,
    E=ds["Evap"].values,
    P=ds["Prec"].values,
    allow_pickle=False,
)