# Create Catalog for CMIP6 output

In [1]:
# Load packages
import pathlib
import intake
from ecgtools import Builder
from ecgtools.parsers import parse_cmip6
import pandas as pd 
import xarray as xr
import numpy as np
import itertools
from os import listdir
from os.path import isfile, join
import shutil

## Step 0: Move Files from `data_tmp` to `data` and Create New Folders if needed

In [2]:
# Path of new downloaded CMIP6 data 
path='/data/keeling/a/cristi/a/data_tmp/cmip6'
# rootpath of target directory.
rootpath='/data/keeling/a/cristi/a/esm_data/cmip6'

def target_location_cmip(fname,rootpath): #generates target location 
    if rootpath[-1]!='/':
        rootpath=rootpath+'/'
    fname_set=fname.split('_')     
    location=rootpath+fname_set[2]+'/'+fname_set[3]+'/'+fname_set[4]+'/'    
    return location

# move files from current folder to target folders
k_moved=0
for j in listdir(path): #read all downlaoded files in data_tmp
    current_dir=os.path.join(path, j) 
    target_dir= target_location_cmip(j,rootpath)
    if target_dir!=current_dir:
        if os.path.exists(target_dir)==False:
            os.makedirs(target_dir) #make directories 
        shutil.move(current_dir,target_dir) #move files 
        k_moved+=1
        
print('moved '+str(k_moved)+' files')


moved 0 files


## Step 1: Get updated list of Subdirectories

1. Log onto Keeling and go to `/data/keeling/a/cristi/a/esm_data/cmip6`

2. Run the following line of code to the get the most updated list of all subdirectories

`readlink -f $(find . -type d -path '*/r*' -prune) > subdir_list.csv`

## Step 2: Load list of all subdirectories from `/data/keeling/a/cristi/a/esm_data/cmip6` to Notebook

In [3]:
filepathlist = pd.read_csv('/data/keeling/a/cristi/a/esm_data/cmip6/subdir_list.csv',\
                       header=None,names=['dirpaths']).values.tolist()

# Transform list of lists to list of strings
filepath=list(itertools.chain.from_iterable(filepathlist))

## Step 3: Create Catalog Builder and Build the Catalog

In [4]:
# Create the Builder
cat_builder = Builder(
    # Directory of Organized CMIP6 files 
    paths=filepath)

# Build the Catalog
catalog = cat_builder.build(parsing_func=parse_cmip6)

  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(
  self.get_assets().parse(


## Step 4: Check and Save the Catalog

In [5]:
# Browse the catalog
catalog.df

Unnamed: 0,activity_id,branch_method,branch_time_in_child,branch_time_in_parent,experiment,experiment_id,frequency,grid,grid_label,institution_id,...,standard_name,long_name,units,vertical_levels,init_year,start_time,end_time,time_range,path,version
0,CMIP,standard,0.0,0.0,abrupt quadrupling of CO2,abrupt-4xCO2,mon,native atmosphere N96 grid (144x192 latxlon),gn,CSIRO-ARCCSS,...,surface_upward_latent_heat_flux,Surface Upward Latent Heat Flux,W m-2,1.0,,0950-01-16 12:00:00,1099-12-16 12:00:00,0950-01-16 12:00:00-1099-12-16 12:00:00,/data/cristi/a/cristi/data/cmip6/ACCESS-CM2/ab...,v0
1,CMIP,standard,0.0,0.0,abrupt quadrupling of CO2,abrupt-4xCO2,mon,native atmosphere N96 grid (144x192 latxlon),gn,CSIRO-ARCCSS,...,surface_upward_sensible_heat_flux,Surface Upward Sensible Heat Flux,W m-2,1.0,,0950-01-16 12:00:00,1099-12-16 12:00:00,0950-01-16 12:00:00-1099-12-16 12:00:00,/data/cristi/a/cristi/data/cmip6/ACCESS-CM2/ab...,v0
2,CMIP,standard,0.0,0.0,abrupt quadrupling of CO2,abrupt-4xCO2,mon,native atmosphere N96 grid (144x192 latxlon),gn,CSIRO-ARCCSS,...,surface_downwelling_longwave_flux_in_air,Surface Downwelling Longwave Radiation,W m-2,1.0,,0950-01-16 12:00:00,1099-12-16 12:00:00,0950-01-16 12:00:00-1099-12-16 12:00:00,/data/cristi/a/cristi/data/cmip6/ACCESS-CM2/ab...,v0
3,CMIP,standard,0.0,0.0,abrupt quadrupling of CO2,abrupt-4xCO2,mon,native atmosphere N96 grid (144x192 latxlon),gn,CSIRO-ARCCSS,...,surface_upwelling_longwave_flux_in_air,Surface Upwelling Longwave Radiation,W m-2,1.0,,0950-01-16 12:00:00,1099-12-16 12:00:00,0950-01-16 12:00:00-1099-12-16 12:00:00,/data/cristi/a/cristi/data/cmip6/ACCESS-CM2/ab...,v0
4,CMIP,standard,0.0,0.0,abrupt quadrupling of CO2,abrupt-4xCO2,mon,native atmosphere N96 grid (144x192 latxlon),gn,CSIRO-ARCCSS,...,toa_outgoing_longwave_flux,TOA Outgoing Longwave Radiation,W m-2,1.0,,0950-01-16 12:00:00,1099-12-16 12:00:00,0950-01-16 12:00:00-1099-12-16 12:00:00,/data/cristi/a/cristi/data/cmip6/ACCESS-CM2/ab...,v0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11431,ScenarioMIP,standard,59400.0,59400.0,update of RCP8.5 based on SSP5,ssp585,mon,Native N96 grid; 192 x 144 longitude/latitude,gn,MOHC,...,surface_downwelling_shortwave_flux_in_air,Surface Downwelling Shortwave Radiation,W m-2,1.0,,2015-01-16 00:00:00,2100-12-16 00:00:00,2015-01-16 00:00:00-2100-12-16 00:00:00,/data/cristi/a/cristi/data/cmip6/UKESM1-0-LL/s...,v0
11432,ScenarioMIP,standard,59400.0,59400.0,update of RCP8.5 based on SSP5,ssp585,mon,Native N96 grid; 192 x 144 longitude/latitude,gn,MOHC,...,toa_incoming_shortwave_flux,TOA Incident Shortwave Radiation,W m-2,1.0,,2015-01-16 00:00:00,2100-12-16 00:00:00,2015-01-16 00:00:00-2100-12-16 00:00:00,/data/cristi/a/cristi/data/cmip6/UKESM1-0-LL/s...,v0
11433,ScenarioMIP,standard,59400.0,59400.0,update of RCP8.5 based on SSP5,ssp585,mon,Native N96 grid; 192 x 144 longitude/latitude,gn,MOHC,...,surface_upwelling_shortwave_flux_in_air,Surface Upwelling Shortwave Radiation,W m-2,1.0,,2015-01-16 00:00:00,2100-12-16 00:00:00,2015-01-16 00:00:00-2100-12-16 00:00:00,/data/cristi/a/cristi/data/cmip6/UKESM1-0-LL/s...,v0
11434,ScenarioMIP,standard,59400.0,59400.0,update of RCP8.5 based on SSP5,ssp585,mon,Native N96 grid; 192 x 144 longitude/latitude,gn,MOHC,...,toa_outgoing_shortwave_flux,TOA Outgoing Shortwave Radiation,W m-2,1.0,,2015-01-16 00:00:00,2100-12-16 00:00:00,2015-01-16 00:00:00-2100-12-16 00:00:00,/data/cristi/a/cristi/data/cmip6/UKESM1-0-LL/s...,v0


In [6]:
# Save the catalog as .csv 
catalog.df.to_csv('/data/keeling/a/cristi/a/data/cmip6_catalog.csv', index=False)