In [19]:
import pathlib
import intake
import pandas as pd
from ecgtools import Builder
import itertools

In [9]:
root_path = pathlib.Path('/data/keeling/a/cristi/a/downscaled_data/LOCA2/').absolute()

sorted(root_path.rglob('*'))

[PosixPath('/data/keeling/a/cristi/a/downscaled_data/LOCA2/ACCESS-CM2'),
 PosixPath('/data/keeling/a/cristi/a/downscaled_data/LOCA2/ACCESS-CM2/historical'),
 PosixPath('/data/keeling/a/cristi/a/downscaled_data/LOCA2/ACCESS-CM2/historical/pr.ACCESS-CM2.historical.r1i1p1f1.1950-2014.LOCA_16thdeg_v20240915.cent.nc'),
 PosixPath('/data/keeling/a/cristi/a/downscaled_data/LOCA2/ACCESS-CM2/historical/pr.ACCESS-CM2.historical.r2i1p1f1.1950-2014.LOCA_16thdeg_v20240915.cent.nc'),
 PosixPath('/data/keeling/a/cristi/a/downscaled_data/LOCA2/ACCESS-CM2/historical/pr.ACCESS-CM2.historical.r3i1p1f1.1950-2014.LOCA_16thdeg_v20240915.cent.nc'),
 PosixPath('/data/keeling/a/cristi/a/downscaled_data/LOCA2/ACCESS-CM2/historical/tasmax.ACCESS-CM2.historical.r1i1p1f1.1950-2014.LOCA_16thdeg_v20220413.cent.nc'),
 PosixPath('/data/keeling/a/cristi/a/downscaled_data/LOCA2/ACCESS-CM2/historical/tasmax.ACCESS-CM2.historical.r2i1p1f1.1950-2014.LOCA_16thdeg_v20220413.cent.nc'),
 PosixPath('/data/keeling/a/cristi/a/dow

In [None]:
description = "Sample NetCDF catalog"
catalog = {'metadata': {'version': 1,'description': description},'sources': {}}
with open('LOCA2_catalog.yml', 'w') as f:
    yaml.dump(catalog, f)

# Create a catalog object
catalog = intake.open_catalog('LOCA2_catalog.yml')

root_path = '/data/keeling/a/cristi/a/downscaled_data/LOCA2/'
# Define your CSV sources
for path in root_path:
    source1 = intake.open_nc()
    source1.name = 'states1'

# Add the sources to the catalog
catalog = catalog.add(source1)

catalog.save('catalog.yml')

In [5]:
filepathlist = pd.read_csv('/data/keeling/a/cristi/a/downscaled_data/LOCA2/subdir_list.csv', 
                           header=None, names=['dirpaths']).values.tolist()

# Transform list of lists to list of strings
filepath=list(itertools.chain.from_iterable(filepathlist))
filepath

['/data/cristi/a/cristi/downscaled_data/LOCA2/ACCESS-CM2/historical',
 '/data/cristi/a/cristi/downscaled_data/LOCA2/ACCESS-CM2/ssp245',
 '/data/cristi/a/cristi/downscaled_data/LOCA2/ACCESS-CM2/ssp370',
 '/data/cristi/a/cristi/downscaled_data/LOCA2/ACCESS-CM2/ssp585',
 '/data/cristi/a/cristi/downscaled_data/LOCA2/ACCESS-ESM1-5/historical',
 '/data/cristi/a/cristi/downscaled_data/LOCA2/ACCESS-ESM1-5/ssp245',
 '/data/cristi/a/cristi/downscaled_data/LOCA2/ACCESS-ESM1-5/ssp370',
 '/data/cristi/a/cristi/downscaled_data/LOCA2/ACCESS-ESM1-5/ssp585',
 '/data/cristi/a/cristi/downscaled_data/LOCA2/AWI-CM-1-1-MR/historical',
 '/data/cristi/a/cristi/downscaled_data/LOCA2/AWI-CM-1-1-MR/ssp245',
 '/data/cristi/a/cristi/downscaled_data/LOCA2/AWI-CM-1-1-MR/ssp370',
 '/data/cristi/a/cristi/downscaled_data/LOCA2/AWI-CM-1-1-MR/ssp585',
 '/data/cristi/a/cristi/downscaled_data/LOCA2/BCC-CSM2-MR/historical',
 '/data/cristi/a/cristi/downscaled_data/LOCA2/BCC-CSM2-MR/ssp245',
 '/data/cristi/a/cristi/downscaled

In [6]:
test_file = '/data/keeling/a/cristi/a/downscaled_data/LOCA2/NorESM2-LM/historical/pr.NorESM2-LM.historical.r1i1p1f1.1950-2014.LOCA_16thdeg_v20240915.cent.nc'

In [17]:
def LOCA2_parser(file):
    """Atmospheric observational data stored in LOCA2"""
    file = pathlib.Path(file)
    info = {}

    try:
        stem = file.stem
        split = stem.split('.')
        print(split)
        variable = split[0]
        model = split[1]
        scheme = split[2]
        experiment_id = split[3]
        time_period = split[4]
        version = split[5]
        
        info = {
                'variable': variable,
                'model': model,
                'scheme': scheme,
                'experiment_id': experiment_id,
                'time_period': time_period,
                'path': str(file),
            }

        return info

    except Exception:
        return {INVALID_ASSET: file, TRACEBACK: traceback.format_exc()}

In [16]:
parse_amwg_obs(test_file)

['pr', 'NorESM2-LM', 'historical', 'r1i1p1f1', '1950-2014', 'LOCA_16thdeg_v20240915', 'cent']


{'variable': 'pr',
 'model': 'NorESM2-LM',
 'scheme': 'historical',
 'experiment_id': 'r1i1p1f1',
 'time_period': '1950-2014',
 'path': '/data/keeling/a/cristi/a/downscaled_data/LOCA2/NorESM2-LM/historical/pr.NorESM2-LM.historical.r1i1p1f1.1950-2014.LOCA_16thdeg_v20240915.cent.nc'}

In [20]:
# Create the Builder
cat_builder = Builder(
    # Directory of Organized CMIP6 files 
    paths=filepath)

# Build the Catalog
catalog = cat_builder.build(parsing_func=LOCA2_parser)

['pr', 'ACCESS-CM2', 'historical', 'r1i1p1f1', '1950-2014', 'LOCA_16thdeg_v20240915', 'cent']
['pr', 'ACCESS-CM2', 'historical', 'r2i1p1f1', '1950-2014', 'LOCA_16thdeg_v20240915', 'cent']
['pr', 'ACCESS-CM2', 'historical', 'r3i1p1f1', '1950-2014', 'LOCA_16thdeg_v20240915', 'cent']
['tasmax', 'ACCESS-CM2', 'historical', 'r1i1p1f1', '1950-2014', 'LOCA_16thdeg_v20220413', 'cent']
['tasmax', 'ACCESS-CM2', 'historical', 'r2i1p1f1', '1950-2014', 'LOCA_16thdeg_v20220413', 'cent']
['tasmax', 'ACCESS-CM2', 'historical', 'r3i1p1f1', '1950-2014', 'LOCA_16thdeg_v20220413', 'cent']
['tasmin', 'ACCESS-CM2', 'historical', 'r1i1p1f1', '1950-2014', 'LOCA_16thdeg_v20220413', 'cent']
['tasmin', 'ACCESS-CM2', 'historical', 'r2i1p1f1', '1950-2014', 'LOCA_16thdeg_v20220413', 'cent']
['tasmin', 'ACCESS-CM2', 'historical', 'r3i1p1f1', '1950-2014', 'LOCA_16thdeg_v20220413', 'cent']
['pr', 'ACCESS-CM2', 'ssp245', 'r1i1p1f1', '2015-2044', 'LOCA_16thdeg_v20240915', 'cent']
['pr', 'ACCESS-CM2', 'ssp245', 'r1i1p1f1

['pr', 'EC-Earth3-Veg', 'ssp585', 'r1i1p1f1', '2075-2100', 'LOCA_16thdeg_v20240915', 'cent']
['pr', 'EC-Earth3-Veg', 'ssp585', 'r2i1p1f1', '2015-2044', 'LOCA_16thdeg_v20240915', 'cent']
['pr', 'EC-Earth3-Veg', 'ssp585', 'r2i1p1f1', '2045-2074', 'LOCA_16thdeg_v20240915', 'cent']
['pr', 'EC-Earth3-Veg', 'ssp585', 'r2i1p1f1', '2075-2100', 'LOCA_16thdeg_v20240915', 'cent']
['pr', 'EC-Earth3-Veg', 'ssp585', 'r3i1p1f1', '2015-2044', 'LOCA_16thdeg_v20240915', 'cent']
['pr', 'EC-Earth3-Veg', 'ssp585', 'r3i1p1f1', '2045-2074', 'LOCA_16thdeg_v20240915', 'cent']
['pr', 'EC-Earth3-Veg', 'ssp585', 'r3i1p1f1', '2075-2100', 'LOCA_16thdeg_v20240915', 'cent']
['pr', 'EC-Earth3-Veg', 'ssp585', 'r4i1p1f1', '2015-2044', 'LOCA_16thdeg_v20240915', 'cent']
['pr', 'EC-Earth3-Veg', 'ssp585', 'r4i1p1f1', '2045-2074', 'LOCA_16thdeg_v20240915', 'cent']
['pr', 'EC-Earth3-Veg', 'ssp585', 'r4i1p1f1', '2075-2100', 'LOCA_16thdeg_v20240915', 'cent']
['tasmax', 'EC-Earth3-Veg', 'ssp585', 'r1i1p1f1', '2015-2044', 'LOCA_1

['pr', 'IPSL-CM6A-LR', 'historical', 'r9i1p1f1', '1950-2014', 'LOCA_16thdeg_v20240915', 'cent']
['tasmax', 'IPSL-CM6A-LR', 'historical', 'r10i1p1f1', '1950-2014', 'LOCA_16thdeg_v20220413', 'cent']
['tasmax', 'IPSL-CM6A-LR', 'historical', 'r1i1p1f1', '1950-2014', 'LOCA_16thdeg_v20220413', 'cent']
['tasmax', 'IPSL-CM6A-LR', 'historical', 'r2i1p1f1', '1950-2014', 'LOCA_16thdeg_v20220413', 'cent']
['tasmax', 'IPSL-CM6A-LR', 'historical', 'r3i1p1f1', '1950-2014', 'LOCA_16thdeg_v20220413', 'cent']
['tasmax', 'IPSL-CM6A-LR', 'historical', 'r4i1p1f1', '1950-2014', 'LOCA_16thdeg_v20220413', 'cent']
['tasmax', 'IPSL-CM6A-LR', 'historical', 'r5i1p1f1', '1950-2014', 'LOCA_16thdeg_v20220413', 'cent']
['tasmax', 'IPSL-CM6A-LR', 'historical', 'r6i1p1f1', '1950-2014', 'LOCA_16thdeg_v20220413', 'cent']
['tasmax', 'IPSL-CM6A-LR', 'historical', 'r7i1p1f1', '1950-2014', 'LOCA_16thdeg_v20220413', 'cent']
['tasmax', 'IPSL-CM6A-LR', 'historical', 'r8i1p1f1', '1950-2014', 'LOCA_16thdeg_v20220413', 'cent']
['t

In [21]:
catalog.df

Unnamed: 0,variable,model,scheme,experiment_id,time_period,path
0,pr,ACCESS-CM2,historical,r1i1p1f1,1950-2014,/data/cristi/a/cristi/downscaled_data/LOCA2/AC...
1,pr,ACCESS-CM2,historical,r2i1p1f1,1950-2014,/data/cristi/a/cristi/downscaled_data/LOCA2/AC...
2,pr,ACCESS-CM2,historical,r3i1p1f1,1950-2014,/data/cristi/a/cristi/downscaled_data/LOCA2/AC...
3,tasmax,ACCESS-CM2,historical,r1i1p1f1,1950-2014,/data/cristi/a/cristi/downscaled_data/LOCA2/AC...
4,tasmax,ACCESS-CM2,historical,r2i1p1f1,1950-2014,/data/cristi/a/cristi/downscaled_data/LOCA2/AC...
...,...,...,...,...,...,...
2308,tasmax,TaiESM1,ssp370,r1i1p1f1,2045-2074,/data/cristi/a/cristi/downscaled_data/LOCA2/Ta...
2309,tasmax,TaiESM1,ssp370,r1i1p1f1,2075-2100,/data/cristi/a/cristi/downscaled_data/LOCA2/Ta...
2310,tasmin,TaiESM1,ssp370,r1i1p1f1,2015-2044,/data/cristi/a/cristi/downscaled_data/LOCA2/Ta...
2311,tasmin,TaiESM1,ssp370,r1i1p1f1,2045-2074,/data/cristi/a/cristi/downscaled_data/LOCA2/Ta...


In [22]:
catalog.df.to_csv('/data/keeling/a/cristi/a/downscaled_data/LOCA2/LOCA2_catalog.csv', index=False)