In [1]:
import xarray as xr
from data_config import get_compressed_data_dir, get_scratch_dir

In [2]:
scratch = get_scratch_dir()
data_dir = get_compressed_data_dir(scratch)
data_dir

PosixPath('/pscratch/sd/a/abanihi/dor/compressed')

In [3]:
def get_single_simulation_glob(
    data_dir: str, *, polygon_id: int, injection_month: int, injection_year: int
):
    # Pad the values with zeros
    padded_polygon_id = f"{polygon_id:03d}"
    padded_injection_month = f"{injection_month:02d}"
    padded_injection_year = f"{injection_year:04d}"

    out_filepath = (
        data_dir
        / f"{padded_polygon_id}/{padded_injection_month}"
        / f"smyle.cdr-atlas-v0.glb-dor.{padded_polygon_id}-{padded_injection_year}-{padded_injection_month}.pop.h.*.nc"
    )

    return str(out_filepath)

In [4]:
single_simulation_glob = get_single_simulation_glob(
    data_dir=data_dir, polygon_id=676, injection_month=4, injection_year=1999
)
single_simulation_glob

'/pscratch/sd/a/abanihi/dor/compressed/676/04/smyle.cdr-atlas-v0.glb-dor.676-1999-04.pop.h.*.nc'

In [5]:
%%time
single_simulation = xr.open_mfdataset(
    single_simulation_glob,
    combine="by_coords",  # can we do this with combine='nested'?
    data_vars="minimal",
    coords="minimal",
    compat="override",
    chunks={
        "time": 12,
        "z_t": 60,
        "nlat": 384,
        "nlon": 320,
    },  # open same way files are split (why does this not happen automatically??),
    engine="netcdf4",
)
single_simulation

OSError: no files to open

In [None]:
single_simulation["ALK_ANOM"].isel(z_t=0, elapsed_time=range(0, 180, 15)).plot(
    col_wrap=4, col="elapsed_time"
);

In [None]:
# these are the only variables we actually need to calculate OAE efficiency
REQUIRED_DATA_VARIABLES = [
    "DIC_ANOM",
    "DIC_ALT_CO2",
    "ALK",
    "ALK_ALT_CO2",
    # For vertical integration
    "dz",
    "TAREA",
    # dimension coords
    "polygon_id",
    "elapsed_time",
    "injection_month",
    "ULONG",
    "ULAT",
]

VARS_TO_DROP = [
    var_name
    for var_name in list(single_simulation.variables)
    if var_name not in REQUIRED_DATA_VARIABLES
]

In [None]:
single_simulation[REQUIRED_DATA_VARIABLES]

In [None]:
ds = xr.open_dataset(
    "/pscratch/sd/a/abanihi/dor/compressed/anomalies/676/04/smyle.cdr-atlas-v0.glb-dor.676-1999-04.pop.h.0347-04.nc"
)
ds

In [None]:
ds = xr.open_dataset(
    "/global/cfs/projectdirs/m4746/Projects/Ocean-CDR-Atlas-v0/data/archive/smyle.cdr-atlas-v0.glb-dor_Southern_Ocean_039_1999-10-01_02759.001/ocn/hist/smyle.cdr-atlas-v0.glb-dor_Southern_Ocean_039_1999-10-01_02759.001.pop.h.0347-10.nc"
)
ds