# Create Catalog for CMIP6 output

In [1]:
# Load packages
import pathlib
import intake
from ecgtools import Builder
from ecgtools.parsers import parse_cmip6
import pandas as pd 
import xarray as xr
import numpy as np
import itertools
from os import listdir
from os.path import isfile, join
import shutil

## Step 0: Move Files from `data_tmp` to `data` and Create New Folders if needed

In [2]:
# Path of new downloaded CMIP6 data 
path='/data/keeling/a/cristi/a/data_tmp/cmip6'
# rootpath of target directory.
rootpath='/data/keeling/a/cristi/a/esm_data/cmip6'

def target_location_cmip(fname,rootpath): #generates target location 
    if rootpath[-1]!='/':
        rootpath=rootpath+'/'
    fname_set=fname.split('_')     
    location=rootpath+fname_set[2]+'/'+fname_set[3]+'/'+fname_set[4]+'/'    
    return location

# move files from current folder to target folders
k_moved=0
for j in listdir(path): #read all downlaoded files in data_tmp
    current_dir=os.path.join(path, j) 
    target_dir= target_location_cmip(j,rootpath)
    if target_dir!=current_dir:
        if os.path.exists(target_dir)==False:
            os.makedirs(target_dir) #make directories 
        shutil.move(current_dir,target_dir) #move files 
        k_moved+=1
        
print('moved '+str(k_moved)+' files')


moved 0 files


## Step 1: Get updated list of Subdirectories

1. Log onto Keeling and go to `/data/keeling/a/cristi/a/esm_data/cmip6`

2. Run the following line of code to the get the most updated list of all subdirectories

`readlink -f $(find . -type d -path '*/r*' -prune) > subdir_list.csv`

## Step 2: Load list of all subdirectories from `/data/keeling/a/cristi/a/data/cmip6` to Notebook

In [3]:
filepathlist = pd.read_csv('/data/keeling/a/cristi/a/esm_data/cmip6/subdir_list.csv',\
                       header=None,names=['dirpaths']).values.tolist()

# Transform list of lists to list of strings
filepath=list(itertools.chain.from_iterable(filepathlist))

## Step 3: Create Catalog Builder and Build the Catalog

In [None]:
# Create the Builder
cat_builder = Builder(
    # Directory of Organized CMIP6 files 
    paths=filepath)

# Build the Catalog
catalog = cat_builder.build(parsing_func=parse_cmip6)

## Step 4: Check and Save the Catalog

In [None]:
# Browse the catalog
catalog.df

In [None]:
# Save the catalog as .csv 
catalog.df.to_csv('/data/keeling/a/cristi/a/esm_data/cmip6_catalog.csv', index=False)