# Generic playspace

In [1]:
from catalog_manager import esm, translators
from catalog_manager.build import DFCatUpdater

ModuleNotFoundError: No module named 'catalog_manager'

In [1]:
dfcat_dir = "/g/data/tm70/ds0092/projects/nri_intake_catalog/catalogs"
dfcat_name = "dfcat"

## Add existing CMIP6 esm catalog

In [3]:
from catalog_manager.translators import Cmip6MetadataTranslator

In [4]:
%%time

manager = DFCatUpdater.load_esm(
    "/g/data/hh5/public/apps/nci-intake-catalogue/esgf/cmip6/catalogue_latest.json",
    Cmip6MetadataTranslator
)
manager.add(
    name=dfcat_name,
    directory=dfcat_dir
)

CPU times: user 1min 28s, sys: 1.35 s, total: 1min 30s
Wall time: 1min 31s


## Add new esm catalogs from a config file

In [5]:
import yaml

config = "/g/data/tm70/ds0092/projects/intake_catalog/config/cosima.yaml"

with open(config) as f:
    config = yaml.safe_load(f)
    
catalogs = config.get("catalogs")
builder = getattr(esm, config.get("builder"))
translator = getattr(translators, config.get("translator"))
esmcat_dir = config.get("catalog_dir")

In [6]:
%%time

for name, kwargs in catalogs.items():
    description = kwargs.pop("description")
    paths = kwargs.pop("paths")
    
    manager = DFCatUpdater.build_esm(
        name=name,
        description=description,
        builder=builder,
        translator=translator,
        paths=paths,
        builder_kwargs=kwargs,
        directory=esmcat_dir,
        overwrite=True,
    )
    manager.add(
        name=dfcat_name,
        directory=dfcat_dir
    )

Successfully wrote ESM catalog json file to: file:///g/data/tm70/ds0092/projects/intake_catalog/catalogs/cosima/1deg_jra55_ryf9091_gadi.json
Successfully wrote ESM catalog json file to: file:///g/data/tm70/ds0092/projects/intake_catalog/catalogs/cosima/1deg_jra55_iaf_omip2_cycle1.json
Successfully wrote ESM catalog json file to: file:///g/data/tm70/ds0092/projects/intake_catalog/catalogs/cosima/1deg_jra55_iaf_omip2_cycle2.json
Successfully wrote ESM catalog json file to: file:///g/data/tm70/ds0092/projects/intake_catalog/catalogs/cosima/1deg_jra55_iaf_omip2_cycle3.json
Successfully wrote ESM catalog json file to: file:///g/data/tm70/ds0092/projects/intake_catalog/catalogs/cosima/1deg_jra55_iaf_omip2_cycle4.json
Successfully wrote ESM catalog json file to: file:///g/data/tm70/ds0092/projects/intake_catalog/catalogs/cosima/1deg_jra55_iaf_omip2_cycle5.json
Successfully wrote ESM catalog json file to: file:///g/data/tm70/ds0092/projects/intake_catalog/catalogs/cosima/1deg_jra55_iaf_omip2_c

## Use the DF catalog

In [2]:
import os
import ast
import intake
import pandas as pd

In [5]:
cat = intake.open_df_catalog(
    os.path.join(dfcat_dir, f"{dfcat_name}.csv"),
    name_column="experiment",
    read_kwargs={"converters": {"variable": ast.literal_eval}},
)

In [7]:
subcat = cat.search(experiment="CMIP6_CMS", variable="psl")

subcat

Unnamed: 0_level_0,realm,model,description,variable,frequency
experiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CMIP6_CMS,"[atmos, unknown]",CMIP6,CLEX CMS indexed replicas of CMIP6 available o...,"[abs550aer, albisccp, bldep, ccb, cct, ch4, ci...","[1day, 1mon, 3hr, 6hr, subhr]"


In [126]:
pd.set_option('display.max_colwidth', 300)

def _list_unique(series):
    uniques = sorted(
        set(
            series.drop_duplicates().apply(
                lambda x: x if series.name in cat.dfcat.columns_with_iterables else [x]
            ).sum()
        )
    )
    return uniques[0] if len(uniques) == 1 else uniques

df_summary = subcat.df.groupby("experiment").agg({col: _list_unique for col in cat.df.columns.drop("yaml")})

df_summary

Unnamed: 0_level_0,experiment,frequency,variable,realm,model,description
experiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CMIP6_CMS,CMIP6_CMS,"[1day, 1mon, 3hr, 6hr, subhr]","[abs550aer, albisccp, bldep, ccb, cct, ch4, ci, cl, cli, clivi, clt, cltisccp, clw, clwvi, co2, dryss, evspsbl, fco2antt, fco2fos, fco2nat, gpp, hfls, hfss, hur, hurs, hursmax, hursmin, hus, huss, lai, lossch4, mc, mmraerh2o, mmrbc, mmrdust, mmroa, mmrso4, mmrss, mrro, mrsfl, mrsll, mrso, mrsol,...","[atmos, unknown]",CMIP6,CLEX CMS indexed replicas of CMIP6 available on Gadi.


In [119]:
subcat.df

Unnamed: 0,experiment,frequency,variable,realm,model,description,yaml
0,CMIP6_CMS,1mon,"[o3, ch4, lossch4, oh, fco2nat, pr, psl, rlut, rsdt, rsut, ta, tas, tasmax, tasmin, ts, uas, vas, zg, toz, fco2antt, fco2fos, co2, rlds, cl, cli, clivi, clt, clw, evspsbl, hfls, hfss, hur, hurs, hus, huss, pfull, phalf, prc, prsn, prw, ps, rldscs, rlus, rlutcs, rsds, rsdscs, rsus, rsuscs, rsutcs...",atmos,CMIP6,CLEX CMS indexed replicas of CMIP6 available on Gadi.,sources:\n CMIP6_CMS:\n args:\n obj: /g/data/hh5/public/apps/nci-intake-catalogue/esgf/cmip6/catalogue_latest.json\n description: ''\n driver: intake_esm.core.esm_datastore\n metadata: {}\n
1,CMIP6_CMS,3hr,"[hfls, hfss, huss, mrro, pr, prc, ps, rlds, rlus, rsds, rsus, tas, uas, vas, tos, sfcWind, clt, mrsos, prsn, rldscs, rsdscs, rsuscs, gpp, rsdsdiff, tslsi, ts, rsutcs, ta, psl, rlut, rsut, hus, ua, va]",atmos,CMIP6,CLEX CMS indexed replicas of CMIP6 available on Gadi.,sources:\n CMIP6_CMS:\n args:\n obj: /g/data/hh5/public/apps/nci-intake-catalogue/esgf/cmip6/catalogue_latest.json\n description: ''\n driver: intake_esm.core.esm_datastore\n metadata: {}\n
2,CMIP6_CMS,6hr,"[psl, tas, uas, vas, hus, ps, ta, ua, va, pr, sfcWind, zg, zg500, pfull, ts, tsl, rv850, hurs, ua100m, va100m, wap, wsgmax10m, huss, mrsol, zg1000, bldep]",atmos,CMIP6,CLEX CMS indexed replicas of CMIP6 available on Gadi.,sources:\n CMIP6_CMS:\n args:\n obj: /g/data/hh5/public/apps/nci-intake-catalogue/esgf/cmip6/catalogue_latest.json\n description: ''\n driver: intake_esm.core.esm_datastore\n metadata: {}\n
3,CMIP6_CMS,subhr,"[ps, ts, psl]",atmos,CMIP6,CLEX CMS indexed replicas of CMIP6 available on Gadi.,sources:\n CMIP6_CMS:\n args:\n obj: /g/data/hh5/public/apps/nci-intake-catalogue/esgf/cmip6/catalogue_latest.json\n description: ''\n driver: intake_esm.core.esm_datastore\n metadata: {}\n
4,CMIP6_CMS,1day,"[pr, sfcWind, tas, tasmax, tasmin, mrsfl, mrsll, mrsol, lai, hur, mrro, mrsos, rlut, rsus, snw, wap, hfls, hfss, psl, hurs, hus, rsds, ta, ua, va, zg, sfcWindmax, uas, vas, prw, clt, rlds, rlus, ts, mrso, hursmin, huss, hursmax, prc, prsn, ta850, tauu, tauv, snc, tslsi, tsl]",unknown,CMIP6,CLEX CMS indexed replicas of CMIP6 available on Gadi.,sources:\n CMIP6_CMS:\n args:\n obj: /g/data/hh5/public/apps/nci-intake-catalogue/esgf/cmip6/catalogue_latest.json\n description: ''\n driver: intake_esm.core.esm_datastore\n metadata: {}\n


In [23]:
groups.groups

{'025deg_jra55_iaf_omip2_cycle1': [12, 13], '025deg_jra55_iaf_omip2_cycle2': [14, 15], '025deg_jra55_iaf_omip2_cycle3': [16, 17], '025deg_jra55_iaf_omip2_cycle4': [18, 19], '025deg_jra55_iaf_omip2_cycle5': [20, 21], '025deg_jra55_iaf_omip2_cycle6': [22, 23], '1deg_jra55_iaf_omip2_cycle1': [0, 1], '1deg_jra55_iaf_omip2_cycle2': [2, 3], '1deg_jra55_iaf_omip2_cycle3': [4, 5], '1deg_jra55_iaf_omip2_cycle4': [6, 7], '1deg_jra55_iaf_omip2_cycle5': [8, 9], '1deg_jra55_iaf_omip2_cycle6': [10, 11]}

In [8]:
import numpy as np

pd.set_option('display.max_colwidth', 200)

tab = pd.pivot_table(subcat.df, index=["model", "experiment", "description"], aggfunc=lambda x: np.unique(x)).reset_index()
tab.set_index("experiment")[["model", "description", "frequency", "realm", "variable"]]

Unnamed: 0_level_0,model,description,frequency,realm,variable
experiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
025deg_jra55_iaf_omip2_cycle1,ACCESS-OM2,0.25 degree ACCESS-OM2 physics only global configuration with JRA55-do v1.4 IAF Interannual Forcing. Cycle 1 of 6 61-year cycles of 1 Jan 1958 to 1 Jan 2019 following the OMIP-2 protocol. Run for ...,"[1day, 1mon]",[ocean],"[[pbot_t, patm_t, rho_dzt, dht, sea_level, sea_level_sq, pot_temp, temp, sst, sst_sq, bottom_temp, salt, sss, sss_sq, bottom_salt, age_global, mld, mld_max, mld_min, mld_sq, psiu, psiv, bv_freq, b..."
025deg_jra55_iaf_omip2_cycle2,ACCESS-OM2,0.25 degree ACCESS-OM2 physics only global configuration with JRA55-do v1.4 IAF Interannual Forcing. Cycle 2 of 6 61-year cycles of 1 Jan 1958 to 1 Jan 2019 following the OMIP-2 protocol. Run for ...,"[1day, 1mon]",[ocean],"[[pbot_t, patm_t, rho_dzt, dht, sea_level, sea_level_sq, pot_temp, temp, sst, sst_sq, bottom_temp, salt, sss, sss_sq, bottom_salt, age_global, mld, mld_max, mld_min, mld_sq, psiu, psiv, bv_freq, b..."
025deg_jra55_iaf_omip2_cycle3,ACCESS-OM2,0.25 degree ACCESS-OM2 physics only global configuration with JRA55-do v1.4 IAF Interannual Forcing. Cycle 3 of 6 61-year cycles of 1 Jan 1958 to 1 Jan 2019 following the OMIP-2 protocol. Run for ...,"[1day, 1mon]",[ocean],"[[pbot_t, patm_t, rho_dzt, dht, sea_level, sea_level_sq, pot_temp, temp, sst, sst_sq, bottom_temp, salt, sss, sss_sq, bottom_salt, age_global, mld, mld_max, mld_min, mld_sq, psiu, psiv, bv_freq, b..."
025deg_jra55_iaf_omip2_cycle4,ACCESS-OM2,0.25 degree ACCESS-OM2 physics only global configuration with JRA55-do v1.4 IAF Interannual Forcing. Cycle 4 of 6 61-year cycles of 1 Jan 1958 to 1 Jan 2019 following the OMIP-2 protocol. Run for ...,"[1day, 1mon]",[ocean],"[[pbot_t, patm_t, rho_dzt, dht, sea_level, sea_level_sq, pot_temp, temp, sst, sst_sq, bottom_temp, salt, sss, sss_sq, bottom_salt, age_global, mld, mld_max, mld_min, mld_sq, psiu, psiv, bv_freq, b..."
025deg_jra55_iaf_omip2_cycle5,ACCESS-OM2,0.25 degree ACCESS-OM2 physics only global configuration with JRA55-do v1.4 IAF Interannual Forcing. Cycle 5 of 6 61-year cycles of 1 Jan 1958 to 1 Jan 2019 following the OMIP-2 protocol. Run for ...,"[1day, 1mon]",[ocean],"[[pbot_t, patm_t, rho_dzt, dht, sea_level, sea_level_sq, pot_temp, temp, sst, sst_sq, bottom_temp, salt, sss, sss_sq, bottom_salt, age_global, mld, mld_max, mld_min, mld_sq, psiu, psiv, bv_freq, b..."
025deg_jra55_iaf_omip2_cycle6,ACCESS-OM2,0.25 degree ACCESS-OM2 physics only global configuration with JRA55-do v1.4 IAF Interannual Forcing. Cycle 6 of 6 61-year cycles of 1 Jan 1958 to 1 Jan 2019 following the OMIP-2 protocol. Run for ...,"[1day, 1mon]",[ocean],"[[salt_int_rhodz, temp_int_rhodz, temp_tendency, temp_advection, temp_submeso, temp_vdiffuse_diff_cbt, temp_nonlocal_KPP, temp_vdiffuse_sbc, frazil_3d, temp_eta_smooth, neutral_diffusion_temp, neu..."
1deg_jra55_iaf_omip2_cycle1,ACCESS-OM2,1 degree ACCESS-OM2-BGC global configuration with JRA55-do v1.4 IAF Interannual Forcing. Cycle 1 of 6 61-year cycles of 1 Jan 1958 to 1 Jan 2019 following the OMIP-2 protocol. Run for and included...,"[1day, 1mon]",[ocean],"[[dissicos_raw, surface_dic, talkos_raw, o2os_raw, no3os_raw, phyos_raw, surface_zoo, surface_det, surface_fe, adic, alk, no3, det, caco3, intpp_raw, paco2, pco2, fgco2_raw, fgco2nat_raw, fgo2_raw..."
1deg_jra55_iaf_omip2_cycle2,ACCESS-OM2,1 degree ACCESS-OM2-BGC global configuration with JRA55-do v1.4 IAF Interannual Forcing. Cycle 2 of 6 61-year cycles of 1 Jan 1958 to 1 Jan 2019 following the OMIP-2 protocol. Run for and included...,"[1day, 1mon]",[ocean],"[[dissicos_raw, surface_dic, talkos_raw, o2os_raw, no3os_raw, phyos_raw, surface_zoo, surface_det, surface_fe, adic, alk, no3, det, caco3, intpp_raw, paco2, pco2, fgco2_raw, fgco2nat_raw, fgo2_raw..."
1deg_jra55_iaf_omip2_cycle3,ACCESS-OM2,1 degree ACCESS-OM2-BGC global configuration with JRA55-do v1.4 IAF Interannual Forcing. Cycle 3 of 6 61-year cycles of 1 Jan 1958 to 1 Jan 2019 following the OMIP-2 protocol. Run for and included...,"[1day, 1mon]",[ocean],"[[dissicos_raw, surface_dic, talkos_raw, o2os_raw, no3os_raw, phyos_raw, surface_zoo, surface_det, surface_fe, adic, alk, no3, det, caco3, intpp_raw, paco2, pco2, fgco2_raw, fgco2nat_raw, fgo2_raw..."
1deg_jra55_iaf_omip2_cycle4,ACCESS-OM2,1 degree ACCESS-OM2-BGC global configuration with JRA55-do v1.4 IAF Interannual Forcing. Cycle 4 of 6 61-year cycles of 1 Jan 1958 to 1 Jan 2019 following the OMIP-2 protocol. Run for and included...,"[1day, 1mon]",[ocean],"[[dissicos_raw, surface_dic, talkos_raw, o2os_raw, no3os_raw, phyos_raw, surface_zoo, surface_det, surface_fe, adic, alk, no3, det, caco3, intpp_raw, paco2, pco2, fgco2_raw, fgco2nat_raw, fgo2_raw..."


In [205]:
subcat.to_subcatalog()

KeyError: 'name'