# make ACS intake dataframe catalogue

Date: 19 May 2023

Author = {"name": "Thomas Moore", "affiliation": "CSIRO", "email": "thomas.moore@csiro.au", "orcid": "0000-0003-3930-1946"}

### Reference documents: https://intake-dataframe-catalog.readthedocs.io/en/latest/getting_started/quickstart.html

In [1]:
import intake

### start a local Dask client

In [2]:
from dask.distributed import Client
client = Client()
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 2
Total threads: 2,Total memory: 9.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:42401,Workers: 2
Dashboard: /proxy/8787/status,Total threads: 2
Started: Just now,Total memory: 9.00 GiB

0,1
Comm: tcp://127.0.0.1:46679,Total threads: 1
Dashboard: /proxy/45709/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:39177,
Local directory: /jobfs/84612829.gadi-pbs/dask-worker-space/worker-wbvk9p9d,Local directory: /jobfs/84612829.gadi-pbs/dask-worker-space/worker-wbvk9p9d

0,1
Comm: tcp://127.0.0.1:37323,Total threads: 1
Dashboard: /proxy/41555/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:44533,
Local directory: /jobfs/84612829.gadi-pbs/dask-worker-space/worker-0om9qzek,Local directory: /jobfs/84612829.gadi-pbs/dask-worker-space/worker-0om9qzek


### get catalogue path from config file

In [3]:
import configparser

# Create a ConfigParser object
config = configparser.ConfigParser()

# Read the config file
#########
#### you will need to specifiy your correct path the the `data-catalogue/config.ini` file 
#########
config.read('./code/ACS/data-catalogue/config.ini')

# Get the value of a variable
catalogue_path = config.get('paths', 'catalogue_path')

### load `intake-esm` sub-catalogues

In [4]:
data_catalog_hist = intake.open_esm_datastore(catalogue_path+'ccam_noresm2-mm_historical_aus-10i_12km.json')
data_catalog_ssp126 = intake.open_esm_datastore(catalogue_path+'ccam_noresm2-mm_ssp126_aus-10i_12km.json')

### initialise our dataframe catalogue

In [5]:
df_cat = intake.open_df_catalog(path=catalogue_path+'root_df_catalog.csv', mode="w")

# add sources to df_cat

In [6]:
data_catalog_hist.name = "ccam_noresm2-mm_historical_aus-10i_12km"
model = "ccam_noresm2-mm_historical_aus-10i_12km"
variables = list(
    set(
        data_catalog_hist.df.variable.unique().astype(str)
    )
)

df_cat.add(
    data_catalog_hist,
    metadata={"model": model, "variable": variables}
)

df_cat

Unnamed: 0_level_0,model,variable
name,Unnamed: 1_level_1,Unnamed: 2_level_1
ccam_noresm2-mm_historical_aus-10i_12km,{ccam_noresm2-mm_historical_aus-10i_12km},"{wa10, zg10, clt, tas, hus400, cll, prhmax, ua50m, rsuscs, ua500, hus10, ta925, clwvi, ua600, evspsblpot, wa600, ta10, hus300, hus600, mrsos, wa925, va20, hfss, zg20, va200m, ta400, va925, va400, ..."


In [7]:
data_catalog_ssp126.name = "ccam_noresm2-mm_ssp126_aus-10i_12km"
model = "ccam_noresm2-mm_ssp126_aus-10i_12km"
variables = list(
    set(
        data_catalog_hist.df.variable.unique().astype(str)
    )
)

df_cat.add(
    data_catalog_ssp126,
    metadata={"model": model, "variable": variables}
)

df_cat

Unnamed: 0_level_0,model,variable
name,Unnamed: 1_level_1,Unnamed: 2_level_1
ccam_noresm2-mm_historical_aus-10i_12km,{ccam_noresm2-mm_historical_aus-10i_12km},"{wa10, zg10, clt, tas, hus400, cll, prhmax, ua50m, rsuscs, ua500, hus10, ta925, clwvi, ua600, evspsblpot, wa600, ta10, hus300, hus600, mrsos, wa925, va20, hfss, zg20, va200m, ta400, va925, va400, ..."
ccam_noresm2-mm_ssp126_aus-10i_12km,{ccam_noresm2-mm_ssp126_aus-10i_12km},"{wa10, zg10, clt, tas, hus400, cll, prhmax, ua50m, rsuscs, ua500, hus10, ta925, clwvi, ua600, evspsblpot, wa600, ta10, hus300, hus600, mrsos, wa925, va20, hfss, zg20, va200m, ta400, va925, va400, ..."


# save the df_cat

In [8]:
df_cat.save()

# THE END

In [9]:
client.shutdown()