# ACDtools issue 14 - util demo

#### Date: 5 February, 2025
##### https://github.com/Thomas-Moore-Creative/ACDtools/issues/14

Author = {"name": "Thomas Moore", "affiliation": "CSIRO", "email": "thomas.moore@csiro.au", "orcid": "0000-0003-3930-1946"}

# Install ACDtools locally

In [1]:
# this needs to be set via a custom edit per user at the moment
!pip install --user -e /g/data/es60/users/thomas_moore/code/ACDtools

Obtaining file:///g/data/es60/users/thomas_moore/code/ACDtools
  Installing build dependencies ... [?25ldone
[?25h  Checking if build backend supports build_editable ... [?25ldone
[?25h  Getting requirements to build editable ... [?25ldone
[?25h  Preparing editable metadata (pyproject.toml) ... [?25ldone
[?25hBuilding wheels for collected packages: ACDtools
  Building editable for ACDtools (pyproject.toml) ... [?25ldone
[?25h  Created wheel for ACDtools: filename=ACDtools-0.1-0.editable-py3-none-any.whl size=3512 sha256=776ace72185570254d7468b0a08cef54c631f57ee117fd9d63fbbfcabdf791eb
  Stored in directory: /jobfs/133559658.gadi-pbs/pip-ephem-wheel-cache-pp4df0z6/wheels/b6/a3/f2/6ce45fbdc116ad50e421d6a11cb060cc796e867501807af446
Successfully built ACDtools
Installing collected packages: ACDtools
  Attempting uninstall: ACDtools
    Found existing installation: ACDtools 0.1
    Uninstalling ACDtools-0.1:
      Successfully uninstalled ACDtools-0.1
Successfully installed ACDtool

In [2]:
# Enable autoreload in the notebook
%load_ext autoreload
%autoreload 1 
%aimport ACDtools.util
%aimport ACDtools.ard
%aimport ACDtools.plot
# Importing from your local package util.py
from ACDtools import util
from ACDtools import ard
from ACDtools import plot

# Notebook settings

### filter warnings

In [3]:
import warnings
warnings.filterwarnings("ignore") # Suppress warnings

# Dask cluster from config
`client, cluster = util.start_dask_cluster_from_config('netcdf_work')`
<br>OR<br>
`client, cluster = util.start_dask_cluster_from_config('zarr_work')`

In [5]:
client, cluster = util.start_dask_cluster_from_config('netcdf_work')

Cluster started with 28 workers.
Dashboard available at: /proxy/8787/status


# Issue: make basic notebook to show some `util` functions
- https://github.com/Thomas-Moore-Creative/ACDtools/issues/14

## utilise CMIP6 data catalogs for NCI holdings

##### Information on climate data catalogs across Australian HPC

**ACCESS-NRI** https://access-nri-intake-catalog.readthedocs.io/en/latest/usage/how.html <br>
**NCI** https://opus.nci.org.au/pages/viewpage.action?pageId=213713098


##### $\bigstar$ Get inspiration from ACCESS-NRI intake catalog docs: ACCESS-ESM1-5 CMIP6 example
https://access-nri-intake-catalog.readthedocs.io/en/latest/usage/quickstart.html

## import packages

In [8]:
import intake
import xarray as xr
import numpy as np
import gc
import json

### import the ACCESS-NRI catalog

In [9]:
catalog = intake.cat.access_nri

### (1) "I know I want Australian CMIP6 data - so that's fs38 and I need access to that NCI project"

In [22]:
cmip6_fs38_datastore = catalog.search(name='cmip6_fs38').to_source()

### (2) "what are the realms covered by cmip6_fs38?"

In [12]:
util.report_esm_unique(cmip6_fs38_datastore,keep_list=['realm'])

╒════════════╤═════════════════╕
│ Category   │ Unique values   │
╞════════════╪═════════════════╡
│ realm      │ aerosol         │
│            │ atmos           │
│            │ land            │
│            │ landIce         │
│            │ ocean           │
│            │ ocnBgchem       │
│            │ seaIce          │
╘════════════╧═════════════════╛


### (3) I want to see what variables, over what frequencies, are available in both the 'ocean' & 'oceanBgchem' realms

In [23]:
cmip6_fs38_ocean_datastore = cmip6_fs38_datastore.search(realm=['ocean'])

In [24]:
[sorted_unique_dict, table_data] = util.report_esm_unique(cmip6_fs38_ocean_datastore,keep_list=['variable_id'],return_results=True)

╒═════════════╤═══════════════════╕
│ Category    │ Unique values     │
╞═════════════╪═══════════════════╡
│ variable_id │ agessc            │
│             │ areacello         │
│             │ basin             │
│             │ bigthetao         │
│             │ bigthetaoga       │
│             │ deptho            │
│             │ difvho            │
│             │ difvso            │
│             │ evs               │
│             │ friver            │
│             │ fsitherm          │
│             │ hfbasin           │
│             │ hfbasinpadv       │
│             │ hfbasinpmadv      │
│             │ hfds              │
│             │ hfevapds          │
│             │ hflso             │
│             │ hfrainds          │
│             │ hfsifrazil        │
│             │ hfsso             │
│             │ htovgyre          │
│             │ htovovrt          │
│             │ masscello         │
│             │ masso             │
│             │ mfo         

## what is the long name of a particular variable?

In [25]:
util.var_name_info(cmip6_fs38_ocean_datastore,'thetao')

*** Variable: [1mthetao[0m from catalog: <cmip6-fs38 catalog with 70406 dataset(s) from 292195 asset(s)> ***
╒═══════════════╤══════════════════════════════════════════════════════════════════════════════════════════════════╕
│ Attribute     │ Value                                                                                            │
╞═══════════════╪══════════════════════════════════════════════════════════════════════════════════════════════════╡
│ standard_name │ sea_water_potential_temperature                                                                  │
├───────────────┼──────────────────────────────────────────────────────────────────────────────────────────────────┤
│ long_name     │ Sea Water Potential Temperature                                                                  │
├───────────────┼──────────────────────────────────────────────────────────────────────────────────────────────────┤
│ comment       │ Diagnostic should be contributed even for models usi

## filter catalog for final ACCESS-ESM1.5 dataset

In [26]:
final_search = cmip6_fs38_ocean_datastore.search(file_type='l',
                    variable_id='thetao',source_id='ACCESS-ESM1-5',experiment_id='historical')

In [27]:
util.report_esm_unique(final_search)

╒════════════════╤═════════════════╕
│ Category       │ Unique values   │
╞════════════════╪═════════════════╡
│ experiment_id  │ historical      │
├────────────────┼─────────────────┤
│ file_type      │ l               │
├────────────────┼─────────────────┤
│ frequency      │ mon             │
├────────────────┼─────────────────┤
│ grid_label     │ gn              │
├────────────────┼─────────────────┤
│ institution_id │ CSIRO           │
├────────────────┼─────────────────┤
│ project_id     │ CMIP            │
├────────────────┼─────────────────┤
│ realm          │ ocean           │
├────────────────┼─────────────────┤
│ source_id      │ ACCESS-ESM1-5   │
├────────────────┼─────────────────┤
│ table_id       │ Omon            │
├────────────────┼─────────────────┤
│ variable_id    │ thetao          │
╘════════════════╧═════════════════╛


## what is the chunking of the files in this final_search catalog?

In [28]:
ard.find_chunking_info(final_search,'thetao',return_results=False)

╒══════════════════════════╤═════════════════════════════════════════════════════════════════════╕
│ Variable                 │ thetao                                                              │
├──────────────────────────┼─────────────────────────────────────────────────────────────────────┤
│ Chunk sizes (first file) │ 1, 25, 150, 180 ;                                                   │
├──────────────────────────┼─────────────────────────────────────────────────────────────────────┤
│ File path (first file)   │ /g/data/fs38/publications/CMIP6/CMIP/CSIRO/                         │
│                          │ ACCESS-ESM1-5/historical/r38i1p1f1/Omon/thetao/gn/                  │
│                          │ v20210525/                                                          │
│                          │ thetao_Omon_ACCESS-ESM1-5_historical_r38i1p1f1_gn_201001-201412.nc/ │
├──────────────────────────┼─────────────────────────────────────────────────────────────────────┤
│ Chunk si

# let's use the tools as they exist to try to start the workflow

## I want Australian CMIP6 data

In [21]:
cmip6_fs38_datastore = util.load_cmip6_CLEX_datastore()

In [None]:
report_esm_unique(cmip6_fs38_datastore.search(**load_config()['catalog_search_query_dict']['ACCESS_ESM15']['CSEPTA']['CLEX_catalog']))

In [None]:
CSEPTA_catalog = cmip6_fs38_datastore.search(**load_config()['catalog_search_query_dict']['ACCESS_ESM15']['CSEPTA']['CLEX_catalog'])

In [None]:
CSEPTA_catalog

In [None]:
show_methods(CSEPTA_intpp_catalog)

In [None]:
report_esm_unique(CSEPTA_intpp_catalog)

In [None]:
CSEPTA_intpp_catalog.unique()['path']

In [None]:
search_dict = dict(experiment_id = 'historical',source_id = 'ACCESS-ESM1-5',variable_id = ['intpp'],realm = ['ocnBgchem'], frequency = 'mon',file_type='f')

In [None]:
search = cmip6_fs38_datastore.search(**search_dict)
search

In [None]:
search.unique()['path']

# let's repeat workflow with CLEX catalog

In [None]:
cmip6_fs38_datastore = load_cmip6_CLEX_datastore()

In [None]:
load_config()['catalog_search_query_dict']['ACCESS_ESM15']['CSEPTA']['CLEX_catalog']

In [None]:
report_esm_unique(cmip6_fs38_datastore.search(**load_config()['catalog_search_query_dict']['ACCESS_ESM15']['CSEPTA']['CLEX_catalog']))

In [None]:
find_chunking_info(cmip6_fs38_datastore.search(**load_config()['catalog_search_query_dict']['ACCESS_ESM15']['CSEPTA']['CLEX_catalog']),'thetao',return_results=True)

In [None]:
search = cmip6_fs38_datastore.search(**load_config()['catalog_search_query_dict']['ACCESS_ESM15']['CSEPTA']['CLEX_catalog'])

In [None]:
%%time
ds_ESM15_esorted = load_ACCESS_ESM_ensemble(search,use_cftime=True,chunking_key='ACCESS_ESM15_3D')

In [None]:
ds_ESM15_esorted

In [None]:
data = ds_ESM15_esorted.isel({'lev':0,'member':0})
data

In [None]:
data.mean(dim=['i','j']).thetao.plot()

### piControl

In [None]:
search = cmip6_fs38_datastore.search(**load_config()['catalog_search_query_dict']['ACCESS_ESM15']['CSEPTA']['CLEX_catalog'])

In [None]:
%%time
ds_ESM15 = load_ACCESS_ESM(search,use_cftime=True,chunking_key='ACCESS_ESM15_3D')

In [None]:
ds_ESM15