# Create Catalog for CESM output

In [1]:
import pathlib
from os import listdir
from os.path import isfile, join
import shutil

import intake
from ecgtools import Builder
from ecgtools.parsers.cesm import parse_cesm_timeseries

import pandas as pd 
import xarray as xr
import numpy as np
import itertools

## Step 0: Move Files from `data_tmp` to `data` and Create New Folders if needed

In [None]:
# Path of new downloaded CESM data 
path='/data/keeling/a/cristi/a/data_tmp/'
# rootpath of target directory.
rootpath='/data/keeling/a/cristi/a/esm_data/cesm/CESM_LME'

def target_location_cmip(fname,rootpath): #generates target location 
    if rootpath[-1]!='/':
        rootpath=rootpath+'/'
    fname_set=fname.split('_')     
    location=rootpath+fname_set[2]+'/'+fname_set[3]+'/'+fname_set[4]+'/'# index need change based on CESM files are named
    return location

# move files from current folder to target folders
k_moved=0
for j in listdir(path): #read all downlaoded files in data_tmp
    current_dir=os.path.join(path, j) 
    target_dir= target_location_cmip(j,rootpath)
    if target_dir!=current_dir:
        if os.path.exists(target_dir)==False:
            os.makedirs(target_dir) #make directories 
        shutil.move(current_dir,target_dir) #move files 
        k_moved+=1
        
print('moved '+str(k_moved)+' files')

## Step 1: Get updated list of Subdirectories

1. Log onto Keeling and go to `/data/keeling/a/cristi/a/data/cesm`

2. Run the following line of code to the get the most updated list of all subdirectories

`readlink -f $(find . -type d -path '*/*' -prune) > subdir_list.csv`

## Get list of all subdirectories in `/data/keeling/a/cristi/a/data/cesm2`

In [11]:
filepathlist = pd.read_csv('/data/keeling/a/cristi/a/esm_data/cesm/subdir_list.csv',header=None,names=['dirpaths']).values.tolist()

# Transform list of lists to list of strings
filepath=list(itertools.chain.from_iterable(filepathlist))
filepath

['/data/cristi/a/cristi/esm_data/cesm/CESM_LME']

## Create Catalog Builder and Build the Catalog

In [7]:
# Create the Builder
cat_builder = Builder(
    # Directory of Organized CMIP6 files 
    paths=filepath)

# Build the Catalog
catalog = cat_builder.build(parsing_func=parse_cesm_timeseries)

  self.get_assets().parse(


# Check and Save the Catalog

In [8]:
# Browse the catalog
catalog.df

Unnamed: 0,component,stream,case,member_id,variable,start_time,end_time,time_range,long_name,units,vertical_levels,frequency,path
1,ocn,pop.h,b.ie12.B1850C5CN.f19_g16.LME.002,2,R18O,0850-01,0899-12,085001-089912,R18O,unitless,1.0,month_1,/data/cristi/a/cristi/data/CESM_LME/b.ie12.B18...
2,ocn,pop.h,b.ie12.B1850C5CN.f19_g16.LME.002,2,R18O,0900-01,0999-12,090001-099912,R18O,unitless,1.0,month_1,/data/cristi/a/cristi/data/CESM_LME/b.ie12.B18...
3,ocn,pop.h,b.ie12.B1850C5CN.f19_g16.LME.002,2,R18O,1000-01,1099-12,100001-109912,R18O,unitless,1.0,month_1,/data/cristi/a/cristi/data/CESM_LME/b.ie12.B18...
4,ocn,pop.h,b.ie12.B1850C5CN.f19_g16.LME.002,2,R18O,1100-01,1199-12,110001-119912,R18O,unitless,1.0,month_1,/data/cristi/a/cristi/data/CESM_LME/b.ie12.B18...
5,ocn,pop.h,b.ie12.B1850C5CN.f19_g16.LME.002,2,R18O,1200-01,1299-12,120001-129912,R18O,unitless,1.0,month_1,/data/cristi/a/cristi/data/CESM_LME/b.ie12.B18...
6,ocn,pop.h,b.ie12.B1850C5CN.f19_g16.LME.002,2,R18O,1300-01,1399-12,130001-139912,R18O,unitless,1.0,month_1,/data/cristi/a/cristi/data/CESM_LME/b.ie12.B18...
7,ocn,pop.h,b.ie12.B1850C5CN.f19_g16.LME.002,2,R18O,1400-01,1499-12,140001-149912,R18O,unitless,1.0,month_1,/data/cristi/a/cristi/data/CESM_LME/b.ie12.B18...
8,ocn,pop.h,b.ie12.B1850C5CN.f19_g16.LME.002,2,R18O,1500-01,1599-12,150001-159912,R18O,unitless,1.0,month_1,/data/cristi/a/cristi/data/CESM_LME/b.ie12.B18...
9,ocn,pop.h,b.ie12.B1850C5CN.f19_g16.LME.002,2,R18O,1600-01,1699-12,160001-169912,R18O,unitless,1.0,month_1,/data/cristi/a/cristi/data/CESM_LME/b.ie12.B18...
10,ocn,pop.h,b.ie12.B1850C5CN.f19_g16.LME.002,2,R18O,1700-01,1799-12,170001-179912,R18O,unitless,1.0,month_1,/data/cristi/a/cristi/data/CESM_LME/b.ie12.B18...


In [9]:
# Save the catalog as .csv 
catalog.df.to_csv('/data/keeling/a/cristi/a/data/cesm_lme_catalog.csv', index=False)

### Note to Catalog Admin (Last updated: 1/28/2023)

1. Step 0 `path` require update based on confirmation about CESM data in `data_tmp`. 
2. Step 1 `'*/*'` in `readlink` command require update based on the sorted `cesm` structure. 