In [2]:
from access_nri_intake.experiment import use_datastore

help(use_datastore)

Help on function use_datastore in module access_nri_intake.experiment.main:

use_datastore(experiment_dir: pathlib.Path | str, builder: ecgtools.builder.Builder | None = None, catalog_dir: pathlib.Path | str | None = None, builder_kwargs: dict | None = None, open_ds: bool = True, datastore_name: str = 'experiment_datastore', description: str | None = None) -> intake_esm.core.esm_datastore | None
    Specify a builder and an experiment directory in order to build and/or open
    an esm-datastore in place for that experiment. Valid and up to date datastores
    will not be overwritten.
    
    Further configuration can be done by passing additional keyword arguments
    
    Parameters
    ----------
    builder : Builder
        The builder object that will be used to build the datastore.
    experiment_dir : Path | str
        The directory containing the experiment. If a string is passed, it will be
        converted to a Path object.
    catalog_dir : Path | str, optional
        Th

# Exercise 1: Building a datastore

In [None]:
# We will make a directory the intake-training repo to save our datastore in.
# If you cloned the repo from somewhere other than your home directory,
# you may need to alter the `CATALOG_DIR` variable
from access_nri_intake.source import builders
import warnings
import os
from xarray import SerializationWarning
warnings.filterwarnings(action='once',category=UserWarning)
warnings.filterwarnings(action='once',category=SerializationWarning)
os.environ["PYTHONWARNINGS"] = "ignore"

!mkdir ~/intake-training/2025_02/cat_dir
!cd 
# NB: There is currently a bug in use_datastore with homedir expansion (ie. ~/), so please use either relative paths 
# or absolute paths (ie. /home/user/). We run cd above so we don't need to start paths with ~/

CATALOG_DIR = "intake-training/2025_02/cat_dir"
EXPERIMENT_DIR = '/g/data/ik11/outputs/access-om2/1deg_iamip2_CMCC-ESM2ssp126'
BUILDER = builders.AccessOm2Builder

expt_datastore = use_datastore(
    experiment_dir = EXPERIMENT_DIR,
    catalog_dir = CATALOG_DIR,
    builder=BUILDER
)
expt_datastore

In [None]:
# Lets rerun use_datastore to verify that the datastore doesn't rebuild when the datastore hasn't changed
expt_datastore = use_datastore(
    experiment_dir = EXPERIMENT_DIR,
    catalog_dir = CATALOG_DIR,
    builder=BUILDER

)
expt_datastore

___
# Exercise 2: Searching for a dataset

In [1]:
# In order to open our datasets, we'll need a dask cluster: so lets start that.

from distributed import Client

client = Client(threads_per_worker=1)
client.dashboard_link

'/proxy/8787/status'

In [None]:
# datastore.df.head() can often be a good way to get a feel for the datastore
expt_datastore.df.head(10)

In [None]:
# Lets pick a frequency - first, we can check all the unique frequencies:
expt_datastore.df.frequency.unique()

In [None]:
# Let's see how many datasets are at monthly frequency
expt_datastore.search(frequency="1mon")

In [None]:
# We have two datasets - and it looks like we can split them on file_id:
expt_datastore.search(frequency="1mon").df.file_id.unique()

In [None]:
# Lets look at physical variables:
expt_datastore.search(frequency="1mon",file_id="ocean_scalar_1_monthly_ym_XXXX_XX")

In [None]:
# This is a single dataset - so we can load the whole thing with .to_dask()
expt_datastore.search(frequency="1mon",file_id="ocean_scalar_1_monthly_ym_XXXX_XX").to_dask()

## Now, try exploring the datastore yourself:
- Can you load a single year from a single variable into a dataset?
- What other ways can you extract a subset of data from the datastore?

___
# Exercise 3: Loading multiple datasets at once

In [None]:
# What about if we wanted one physical, and one biogeochemical variable? 
# Say, `temp_global_ave` and `total_co2_flux`
expt_datastore.search(frequency="1mon",variable=['temp_global_ave', 'total_co2_flux'])

In [None]:
# They're in different datasets, so we can't just use .to_dask(). Instead,
# we can use .to_dataset_dict() - and we'll save them into the dict `d`
d = expt_datastore.search(frequency="1mon",variable=['temp_global_ave', 'total_co2_flux']).to_dataset_dict()

for key in d.keys():
    print(key)

# When we search for variables, intake is smart enough to only load the ones we searched for.
d['ocean_scalar_1_monthly_ym_XXXX_XX.1mon']

In [None]:
# When we search for variables, intake is smart enough to only load the ones we searched for.
d['oceanbgc_scalar_1_monthly_ym_XXXX_XX.1mon']

## Now, try exploring the datastore yourself:
- Can you load the same variable at multiple frequencies into a dataset dict?
- What other ways can you extract a subset of data from the datastore?

___
# Exercise 4: Searching on standard names

We'll use the regular catalog here - the one we just built doesn't have any useful standard named variables.

In [None]:
import intake
esm_ds = intake.cat.access_nri['1deg_jra55_ryf9091_gadi']
esm_ds

In [None]:
# Lets look for sea surface height
esm_ds.search(variable_standard_name="sea_surface_height_above_geoid")

In [None]:
# We get one dataset - so lets open it
esm_ds.search(variable_standard_name="sea_surface_height_above_geoid").to_dask()

# Exercise 5 (Bonus): Using your own datastore for a recipe

The dataset contained at `/g/data/cj50/access-om2/raw-output/access-om2-01/01deg_jra55_ryf_Control/` contains a number of sea surface height data. 

Lets build a catalog for it, and then use it for the recipe at https://github.com/COSIMA/cosima-recipes/blob/main/Recipes/Compare_SSH_model_obs.ipynb

Note: This dataset can be found in the standard catalog - you can compare it against the one you build.

In [None]:
!cd 
!mkdir intake-training/2025_02/cosima_recipe_datastore

CATALOG_DIR = "intake-training/2025_02/cat_dir" # What does this need to change to?
EXPERIMENT_DIR = '/g/data/ik11/outputs/access-om2/1deg_iamip2_CMCC-ESM2ssp126' # What does this need to change to?
BUILDER = builders.AccessOm2Builder # Does this need to change?

expt_datastore = use_datastore(
    experiment_dir = EXPERIMENT_DIR,
    catalog_dir = CATALOG_DIR,
    builder=BUILDER
)
expt_datastore