In [None]:
# this notebook builds a catalogue for ACS bias adjust data in 
# /g/data/ia39/australian-climate-service/test-data/CORDEX-CMIP6/bias-adjustment-output/AGCD-05i
# this is a useful way to record all available data within a directory for easy searching and selection

In [1]:
cd ~/bom-toolbox/

/home/563/gt3409/bom-toolbox


In [2]:
from CCVE_toolbox.catalog_builder import build_catalog

In [3]:
import dask
from dask.distributed import Client, LocalCluster
dask.config.set({'distributed.worker.daemon': False})
# cluster = LocalCluster(n_workers=40, processes=True, threads_per_worker=1)
client = Client()  # start distributed scheduler locally.
client
# clie

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 4
Total threads: 4,Total memory: 18.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:41225,Workers: 4
Dashboard: /proxy/8787/status,Total threads: 4
Started: Just now,Total memory: 18.00 GiB

0,1
Comm: tcp://127.0.0.1:41539,Total threads: 1
Dashboard: /proxy/36965/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:35709,
Local directory: /jobfs/121455931.gadi-pbs/dask-scratch-space/worker-8m5_cwt7,Local directory: /jobfs/121455931.gadi-pbs/dask-scratch-space/worker-8m5_cwt7

0,1
Comm: tcp://127.0.0.1:36523,Total threads: 1
Dashboard: /proxy/37953/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:38149,
Local directory: /jobfs/121455931.gadi-pbs/dask-scratch-space/worker-knxnh3tp,Local directory: /jobfs/121455931.gadi-pbs/dask-scratch-space/worker-knxnh3tp

0,1
Comm: tcp://127.0.0.1:37465,Total threads: 1
Dashboard: /proxy/43191/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:45095,
Local directory: /jobfs/121455931.gadi-pbs/dask-scratch-space/worker-glni2nrb,Local directory: /jobfs/121455931.gadi-pbs/dask-scratch-space/worker-glni2nrb

0,1
Comm: tcp://127.0.0.1:33607,Total threads: 1
Dashboard: /proxy/36275/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:43263,
Local directory: /jobfs/121455931.gadi-pbs/dask-scratch-space/worker-tjknxejg,Local directory: /jobfs/121455931.gadi-pbs/dask-scratch-space/worker-tjknxejg


In [4]:
# import modules
import pathlib
import traceback
import re
import glob
import os
import time

from ecgtools import Builder, RootDirectory, glob_to_regex
from ecgtools.builder import INVALID_ASSET, TRACEBACK


In [5]:
def acs_parser(file):
    """
    Extract attributes of a file using information from Directory structure.

    '/g/data/ia39/australian-climate-service/test-data/\
    CORDEX-CMIP6/bias-adjustment-output/AGCD-05i/BOM/ACCESS-CM2/ssp370/r4i1p1f1/BARPA-R/v1-r1-ACS-QME-BARRA-R2-1980-2022/day/tasmaxAdjust/'

    Directory structure =
    <mip_era>/
        <product>/
            <domain>/
                <institution_id>/
                    <parent_model>/
                        <experiment_id>/
                            <member_id>/
                                <downscaling_model>/
                                    <bias_adjustment>/
                                        <table_id>/
                                            <variable_id>/

    eg tasmaxAdjust_AGCD-05i_ACCESS-CM2_ssp370_r4i1p1f1_BOM_BARPA-R_v1-r1-ACS-QME-BARRA-R2-1980-2022_day_20240101-20241231.nc
    filename = <variable_id>_<domain>_<parent_model>_<experiment_id>_<member_id>_<institution_id>_<downscaling_model>_<bias_adjustment>_<table_id>[_<time_range>].nc
    
    """

    try:
        filep = pathlib.Path(file)
        stem_split = filep.stem.split('_')
        path_split = str(filep.parent).split(filep.anchor)
        
        mip_era, product, domain, institution_id, parent_model, experiment_id, member_id, downscaling_model, bias_adjustment, table_id, variable_id = path_split[-11:]
        
        info = {'institution_id':institution_id,
                'variable_id':variable_id,
                'table_id':table_id,
                'source_id':parent_model,
                'downscaling_model':downscaling_model,
                'experiment_id':experiment_id,
                'member_id':member_id,
                'grid_label':domain,
                'bias_adjustment':bias_adjustment,
                'time_range':0,
                'path':file
        }
        if len(stem_split)>9:
            info["time_range"] = stem_split[-1]
            
        return info
            
    except Exception:
        return {INVALID_ASSET: file, TRACEBACK: traceback.format_exc()}



In [7]:
%%time
b= build_catalog(paths_list=["/g/data/ia39/australian-climate-service/test-data/CORDEX-CMIP6/bias-adjustment-output/AGCD-05i"], 
              parser=acs_parser,
              catalog_name="nci-acs_bias_corrected",
              directory="/g/data/mn51/users/gt3409")

building catalog : nci-acs_bias_corrected
saving catalog : nci-acs_bias_corrected
Successfully wrote ESM catalog json file to: file:///g/data/mn51/users/gt3409/nci-acs_bias_corrected.json
CPU times: user 8.29 s, sys: 5.98 s, total: 14.3 s
Wall time: 1min 24s


In [8]:
b.df

Unnamed: 0,institution_id,variable_id,table_id,source_id,downscaling_model,experiment_id,member_id,grid_label,bias_adjustment,time_range,path
0,BOM,prAdjust,day,ACCESS-CM2,BARPA-R,historical,r4i1p1f1,AGCD-05i,v1-r1-ACS-MRNBC-AGCD-1960-2022,19600101-19601231,/g/data/ia39/australian-climate-service/test-d...
1,BOM,prAdjust,day,ACCESS-CM2,BARPA-R,historical,r4i1p1f1,AGCD-05i,v1-r1-ACS-MRNBC-AGCD-1960-2022,19610101-19611231,/g/data/ia39/australian-climate-service/test-d...
2,BOM,prAdjust,day,ACCESS-CM2,BARPA-R,historical,r4i1p1f1,AGCD-05i,v1-r1-ACS-MRNBC-AGCD-1960-2022,19620101-19621231,/g/data/ia39/australian-climate-service/test-d...
3,BOM,prAdjust,day,ACCESS-CM2,BARPA-R,historical,r4i1p1f1,AGCD-05i,v1-r1-ACS-MRNBC-AGCD-1960-2022,19630101-19631231,/g/data/ia39/australian-climate-service/test-d...
4,BOM,prAdjust,day,ACCESS-CM2,BARPA-R,historical,r4i1p1f1,AGCD-05i,v1-r1-ACS-MRNBC-AGCD-1960-2022,19640101-19641231,/g/data/ia39/australian-climate-service/test-d...
...,...,...,...,...,...,...,...,...,...,...,...
71997,UQ-DES,tasminAdjust,day,NorESM2-MM,CCAMoc-v2112,ssp370,r1i1p1f1,AGCD-05i,v1-r1-ACS-QME-BARRA-R2-1980-2022,20960101-20961231,/g/data/ia39/australian-climate-service/test-d...
71998,UQ-DES,tasminAdjust,day,NorESM2-MM,CCAMoc-v2112,ssp370,r1i1p1f1,AGCD-05i,v1-r1-ACS-QME-BARRA-R2-1980-2022,20970101-20971231,/g/data/ia39/australian-climate-service/test-d...
71999,UQ-DES,tasminAdjust,day,NorESM2-MM,CCAMoc-v2112,ssp370,r1i1p1f1,AGCD-05i,v1-r1-ACS-QME-BARRA-R2-1980-2022,20980101-20981231,/g/data/ia39/australian-climate-service/test-d...
72000,UQ-DES,tasminAdjust,day,NorESM2-MM,CCAMoc-v2112,ssp370,r1i1p1f1,AGCD-05i,v1-r1-ACS-QME-BARRA-R2-1980-2022,20990101-20991231,/g/data/ia39/australian-climate-service/test-d...
