# Generic playspace

In [1]:
from catalog_manager import esmcat, dfcat, translators

In [2]:
dfcat_dir = "/g/data/tm70/ds0092/projects/nri_intake_catalog/catalogs"
dfcat_name = "dfcat"

## Add existing CMIP6 esm catalog

In [3]:
import yaml

config = "/g/data/tm70/ds0092/projects/nri_intake_catalog/config/cmip6.yaml"

with open(config) as f:
    config = yaml.safe_load(f)

builder = config.get("builder")
translator = config.get("translator")
metadata = config.get("metadata") or {}
subcatalog_dir = config.get("subcatalog_dir")
catalogs = config.get("catalogs")

In [4]:
%%time

for name, kwargs in catalogs.items():
    json_file = kwargs.pop("paths")
    assert len(json_file) == 1
    json_file = json_file[0]
    load_args = dict(
        name=name,
        description=kwargs.pop("description"),
        json_file=json_file,
        open_kwargs=kwargs,
        **metadata,
    )
    if translator:
        load_args["translator"] = getattr(translators, translator)
        
    manager = dfcat.CatalogManager.load_esm(**load_args).add(
        name=dfcat_name,
        directory=dfcat_dir
    )

CPU times: user 18.5 s, sys: 1.16 s, total: 19.7 s
Wall time: 19.7 s


## Add new esm catalogs from a config file

In [None]:
import yaml

config = "/g/data/tm70/ds0092/projects/nri_intake_catalog/config/access-esm1-5.yaml"

with open(config) as f:
    config = yaml.safe_load(f)

builder = config.get("builder")
translator = config.get("translator")
metadata = config.get("metadata") or {}
subcatalog_dir = config.get("subcatalog_dir")
catalogs = config.get("catalogs")

In [None]:
%%time

for name, kwargs in catalogs.items():
    build_args = dict(
        name=name,
        description=kwargs.pop("description"),
        builder=getattr(esmcat, builder),
        paths=kwargs.pop("paths"),
        builder_kwargs=kwargs,
        directory=subcatalog_dir,
        overwrite=True,
        **metadata,
    )
    if translator:
        build_args["translator"] = getattr(translators, translator)
        
    manager = dfcat.CatalogManager.build_esm(**build_args).add(
        name=dfcat_name,
        directory=dfcat_dir
    )

## Use the DF catalog

In [5]:
import os
import ast
import intake
import pandas as pd

In [6]:
cat = intake.open_df_catalog(
    os.path.join(dfcat_dir, f"{dfcat_name}.csv"),
    name_column="subcatalog",
    columns_with_iterables=["variable"],
)

cat

Unnamed: 0_level_0,model,description,realm,frequency,variable
subcatalog,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
01deg_jra55v13_ryf9091,ACCESS-OM2,0.1 degree ACCESS-OM2 global model configuration with JRA55-do v1.3 RYF9091 repeat year forcing (May 1990 to Apr 1991),ocean,"[1day, 1mon, 3hr, 3mon, fx]","[age_global, area_t, area_u, average_DT, average_T1, average_T2, bih_fric_u, bih_fric_v, buoyfreq2_wt, drag_coeff, dxt, dxu, dyt, dyu, dzt, eta_global, eta_t, evap, evap_heat, fprec, fprec_melt_he..."
01deg_jra55v140_iaf,ACCESS-OM2,Cycle 1/4 of 0.1 degree ACCESS-OM2 global model configuration with JRA55-do v1.4.0 OMIP2 interannual forcing,"[ice, ocean]","[1day, 1mon, fx]","[ANGLE, ANGLET, HTE, HTN, Tsfc_m, age_global, aice_m, aicen_m, alidf_ai_m, alidr_ai_m, alvdf_ai_m, alvdr_ai_m, area_t, area_u, average_DT, average_T1, average_T2, bih_fric_u, bih_fric_v, blkmask, ..."
01deg_jra55v140_iaf_cycle2,ACCESS-OM2,Cycle 2/4 of 0.1 degree ACCESS-OM2 global model configuration with JRA55-do v1.4.0 OMIP2 interannual forcing,"[ice, ocean]","[1day, 1mon, fx]","[ANGLE, ANGLET, HTE, HTN, Sinz_m, Tinz_m, Tsfc_m, age_global, aice_m, aicen_m, alidf_ai_m, alidr_ai_m, alvdf_ai_m, alvdr_ai_m, area_t, area_u, average_DT, average_T1, average_T2, bih_fric_u, bih_f..."
01deg_jra55v140_iaf_cycle3,ACCESS-OM2,Cycle 3/4 of 0.1 degree ACCESS-OM2 global model configuration with JRA55-do v1.4.0 OMIP2 interannual forcing,"[ice, ocean]","[1day, 1mon, fx]","[ANGLE, ANGLET, HTE, HTN, Sinz_m, Tinz_m, Tsfc_m, age_global, aice_m, aicen_m, alidf_ai_m, alidr_ai_m, alvdf_ai_m, alvdr_ai_m, area_t, area_u, average_DT, average_T1, average_T2, blkmask, bmf_u, b..."
01deg_jra55v140_iaf_cycle4,ACCESS-OM2,Cycle 4/4 of 0.1 degree ACCESS-OM2 + WOMBAT BGC global model configuration with JRA55-do v1.4.0 OMIP2 interannual forcing,"[ice, ocean]","[1day, 1mon, 3hr, 6hr, fx]","[ANGLE, ANGLET, HTE, HTN, NCAT, PP_net_m, Sinz_m, Tinz_m, Tsfc_m, VGRDi, adic, adic_int100, adic_intmld, adic_xflux_adv, adic_yflux_adv, adic_zflux_adv, age_global, aice_h, aice_m, aicen_m, albsni..."
01deg_jra55v140_iaf_cycle4_jra55v150_extension,ACCESS-OM2,Extensions of cycle 4/4 of 0.1 degree ACCESS-OM2 + WOMBAT BGC global model configuration with JRA55-do v1.5.0 and v1.5.0.1 OMIP2 interannual forcing,"[ice, ocean]","[0hr, 1day, 1mon, fx]","[ANGLE, ANGLET, HTE, HTN, PP_net_m, Tsfc_m, adic, adic_int100, adic_intmld, adic_xflux_adv, adic_yflux_adv, adic_zflux_adv, age_global, aice_m, aicen_m, albsni_m, algal_N_m, alidf_ai_m, alidr_ai_m..."
01deg_jra55v150_iaf_cycle1,ACCESS-OM2,Cycle 1/1 of 0.1 degree ACCESS-OM2 global model configuration with JRA55-do v1.5.0 OMIP2 interannual forcing,"[ice, ocean]","[1day, 1mon, fx]","[ULAT, ULON, age_global, aice_m, area_t, area_u, average_DT, average_T1, average_T2, blkmask, bmf_u, bmf_v, diff_cbt_t, drag_coeff, dxt, dxu, dyt, dyu, dzt, ekman_we, eta_nonbouss, eta_t, evap, ev..."
025deg_jra55_iaf_omip2_cycle1,ACCESS-OM2,Cycle 1/6 of 0.25 degree ACCESS-OM2 physics-only global configuration with JRA55-do v1.4 OMIP2 interannual forcing (1958-2019),"[ice, ocean]","[1day, 1mon, 1yr, fx]","[ANGLE, ANGLET, HTE, HTN, Tair_m, Tsfc_m, age_global, agm, aice_m, aicen_m, aiso_bih, albice_m, albsni_m, albsno_m, alidf_ai_m, alidr_ai_m, alvdf_ai_m, alvdr_ai_m, area_t, area_u, aredi, average_D..."
025deg_jra55_iaf_omip2_cycle2,ACCESS-OM2,Cycle 2/6 of 0.25 degree ACCESS-OM2 physics-only global configuration with JRA55-do v1.4 OMIP2 interannual forcing (1958-2019),"[ice, ocean]","[1day, 1mon, 1yr, fx]","[ANGLE, ANGLET, HTE, HTN, Tair_m, Tsfc_m, age_global, agm, aice_m, aicen_m, aiso_bih, albice_m, albsni_m, albsno_m, alidf_ai_m, alidr_ai_m, alvdf_ai_m, alvdr_ai_m, area_t, area_u, aredi, average_D..."
025deg_jra55_iaf_omip2_cycle3,ACCESS-OM2,Cycle 3/6 of 0.25 degree ACCESS-OM2 physics-only global configuration with JRA55-do v1.4 OMIP2 interannual forcing (1958-2019),"[ice, ocean]","[1day, 1mon, 1yr, fx]","[ANGLE, ANGLET, HTE, HTN, Tair_m, Tsfc_m, age_global, agm, aice_m, aicen_m, aiso_bih, albice_m, albsni_m, albsno_m, alidf_ai_m, alidr_ai_m, alvdf_ai_m, alvdr_ai_m, area_t, area_u, aredi, average_D..."


In [None]:
cat2 = cat.search(model="ACCESS-ESM1-5")

cat2

In [None]:
subcat_dict = cat2.to_subcatalog_dict()

In [None]:
subcat_dict

# COSIMA

In [None]:
Builder = esmcat.AccessOm2Builder(
    ["/g/data/cj50/access-om2/raw-output/access-om2-01/01deg_jra55v140_iaf"]
).build(
).save(
    "delete", "delete me", "/g/data/tm70/ds0092/projects/nri_intake_catalog/catalogs"
)

In [None]:
import intake

test = intake.open_esm_datastore(
    "/g/data/tm70/ds0092/projects/nri_intake_catalog/catalogs/delete.json",
    columns_with_iterables=["variable"]
)

In [None]:
from distributed import Client

client = Client()
client

In [None]:
subtest = test.search(variable="wt", frequency="1mon")
subtest

In [None]:
ds = subtest.to_dask(xarray_open_kwargs=dict(use_cftime=True))

In [None]:
ds["wt"].isel(time=0, sw_ocean=0).plot()

# Parsing `file_id`

In [None]:
files = [
    "bz687a.pm107912_mon.nc",
    "bz687a.p7107912_mon.nc",
    "iceh.1917-05.nc",
    "iceh.1917-05-daily.nc",
    "ocean_month.nc",
    "ocean.nc",
    "ocean-3d-v-1-monthly-pow02-ym_1958_04.nc",
    "ocean-2d-sfc_salt_flux_restore-1-monthly-mean-ym_1958_04.nc",
    "PI-GWL-B2035.pe-109904_dai.nc",
    "PI-GWL-B2035.pa-109904_mon.nc",
    "PI-1pct-02.pe-011802_dai.nc_dai.nc",
    "iceh.0101-01.nc",
    "ocean_bgc_ann.nc-01551231",
    "ocean_bgc_daily.nc-02971231",
    "ocean_bgc_mth.nc-03011231",
    "ocean_bgc.nc-02011231",
    "ocean_daily.nc-02531231",
    "ocean_month.nc-01991231",
    "ocean_scalar.nc-01851231",
]

In [None]:
import re
from pathlib import Path

for file in files:
filename = Path(file).stem
# Remove dates dddd-dd, dddddd, dddddddd, starting from right
file_id = re.sub(r"(\d{4}[-_]\d{2}|\d{6}|\d{8})(([^0-9]|$))(.*)$", r"\3\4", filename)
# Enforce Python characters
file_id = re.sub(r"[-.]", "_", file_id)
# Remove any double or dangling _
file_id = re.sub(r"__", "_", file_id).rstrip("_")
    print(file_id)

In [None]:
import re
from pathlib import Path

for file in files:
    filename = Path(file).stem

    file_id = re.sub(
        r"[-.]", 
        "_",
        re.sub(
            r"[-_.](\d{4}[-_]\d{2}|\d{6}|\d{8})", r'boo\2', filename
        )
    )
    print(file, file_id)