In [1]:
import os
import yaml
import importlib

import numpy as np
import xarray as xr

import xcollection as xc
import data_catalog

### recipes

In [2]:
with open('definitions.yml', 'r') as f:
    defs = yaml.load(f)
recipes = defs['analysis_recipes']
recipes

{'monclim': {'description': 'compute monthly climatology',
  'file_format': 'nc',
  'operators': [<function compute_mon_climatology at 0x2aab66035158>]}}

### sample data

In [3]:
if not os.path.exists('junk.nc'):
    start_date = np.array([0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334])
    start_date = np.append(start_date, start_date + 365)
    end_date = np.array([31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365])
    end_date = np.append(end_date, end_date + 365)
    ds = xr.Dataset(coords={"time": 24, "lat": 1, "lon": 1, "d2": 2})
    ds["time"] = xr.DataArray(end_date, dims="time")
    ds["lat"] = xr.DataArray([0], dims="lat")
    ds["lon"] = xr.DataArray([0], dims="lon")
    ds["d2"] = xr.DataArray([0, 1], dims="d2")
    ds["time_bound"] = xr.DataArray(
        np.array([start_date, end_date]).transpose(), dims=["time", "d2"]
    )
    ds["var_to_average"] = xr.DataArray(
        np.append(np.zeros([12, 1, 1]), np.ones([12, 1, 1]), axis=0),
        dims=["time", "lat", "lon"],
    )
    ds.time.attrs["units"] = "days since 0001-01-01 00:00:00"
    ds.time.attrs["calendar"] = "noleap"
    ds.time.attrs["bounds"] = "time_bound"

    ds.to_netcdf('junk.nc')


### define `datasource`

In [4]:
importlib.reload(xc)

entry = {'variable': ['var_to_average'], 
         'files': ['junk.nc']}

dsrc = xc.datasource('junk', entry)
dsrc

{'name': 'junk', 'variables': array(['var_to_average'], dtype=object), 'ensembles': [0], 'applied_methods': [], 'year_offset': nan, 'files': {0: {'var_to_average': ['junk.nc']}}, 'attrs': {0: {'var_to_average': {}}}}

### analyze datasource

In [5]:
importlib.reload(xc)
dsrc = xc.datasource('junk', entry)
dc = xc.analyzed_datasource('monclim', recipes['monclim'], dsrc, clobber_cache=True)
dc

INFO:root:removing old /glade/scratch/mclong/calcs/processed_collections/junk.000.monclim.nc
INFO:root:applying operator: <function compute_mon_climatology at 0x2aab66035158>
INFO:root:writing /glade/scratch/mclong/calcs/processed_collections/junk.000.monclim.nc


<xcollection.core.analyzed_datasource at 0x2aab66061da0>

In [6]:
ds = dc.to_xarray()
ds.info()

xarray.Dataset {
dimensions:
	d2 = 2 ;
	ens = 1 ;
	lat = 1 ;
	lon = 1 ;
	time = 12 ;

variables:
	int32 d2(d2) ;
	int32 lat(lat) ;
	int32 lon(lon) ;
	object time(time) ;
		time:long_name = time ;
		time:bounds = month_bounds ;
	float64 var_to_average(ens, time, lat, lon) ;
	int32 month(ens, time) ;
		month:long_name = Month ;
		month:units = month ;
	object month_bounds(ens, time, d2) ;
		month_bounds:long_name = month_bounds ;

// global attributes:
	:history = created by mclong on 2019-01-30 05:09:07 ;
	:analysis_name = monclim ;
	:analysis = {'description': 'compute monthly climatology', 'operators': [<function compute_mon_climatology at 0x2aab66035158>], 'sel_kwargs': None, 'isel_kwargs': None} ;
	:applied_methods = [['time:clim_mon']] ;
	:datasource = {'name': 'junk', 'variables': array(['var_to_average'], dtype=object), 'ensembles': [0], 'applied_methods': [], 'year_offset': nan, 'files': {0: {'var_to_average': ['junk.nc']}}, 'attrs': {0: {'var_to_average': {}}}} ;
}

In [7]:
data_catalog.set_catalog('cesm2_runs')

active catalog: cesm2_runs


In [9]:
entry = data_catalog.get_entries(experiment='g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001', 
                                 variable='FG_CO2')
entry

{'case': ['g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001',
  'g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001'],
 'component': ['ocn', 'ocn'],
 'date_range': ["['000101', '024012']", "['024101', '031012']"],
 'ensemble': [0, 0],
 'experiment': ['g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001',
  'g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001'],
 'file_basename': ['g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001.pop.h.FG_CO2.000101-024012.nc',
  'g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001.pop.h.FG_CO2.024101-031012.nc'],
 'files': ['/glade/scratch/mclong/archive/g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001/ocn/proc/tseries/month_1/g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001.pop.h.FG_CO2.000101-024012.nc',
  '/glade/scratch/mclong/archive/g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001/ocn/proc/tseries/month_1/g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001.pop.h.FG_CO2.024101-031012.nc'],
 'grid': ['POP_gx1v7', 'POP_gx1v7'],
 'sequence_order': [0, 0],
 'stream': ['pop.h', 'pop.h'

In [10]:
dsrc = xc.datasource('g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001.FG_CO2', entry)
dsrc

{'name': 'g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001.FG_CO2', 'variables': array(['FG_CO2'], dtype=object), 'ensembles': array([0]), 'applied_methods': [], 'year_offset': 1699.0, 'files': {0: {'FG_CO2': ['/glade/scratch/mclong/archive/g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001/ocn/proc/tseries/month_1/g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001.pop.h.FG_CO2.000101-024012.nc', '/glade/scratch/mclong/archive/g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001/ocn/proc/tseries/month_1/g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001.pop.h.FG_CO2.024101-031012.nc']}}, 'attrs': {0: {'FG_CO2': {'case': ['g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001', 'g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001'], 'component': ['ocn', 'ocn'], 'date_range': ["['000101', '024012']", "['024101', '031012']"], 'experiment': ['g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001', 'g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001'], 'file_basename': ['g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001.pop.h.FG_CO2.00

In [11]:
dc = xc.analyzed_datasource('monclim', recipes['monclim'], dsrc, clobber_cache=True)
dc

INFO:root:applying operator: <function compute_mon_climatology at 0x2aab66035158>
INFO:root:writing /glade/scratch/mclong/calcs/processed_collections/g.e21a01d.G1850ECOIAF.T62_g17.extraterr-fe.001.FG_CO2.000.monclim.nc
  x = np.divide(x1, x2, out)


<xcollection.core.analyzed_datasource at 0x2aac75d2b080>

In [12]:
ds = dc.to_xarray()
ds.info()

xarray.Dataset {
dimensions:
	d2 = 2 ;
	ens = 1 ;
	lat_aux_grid = 395 ;
	moc_comp = 3 ;
	moc_z = 61 ;
	nlat = 384 ;
	nlon = 320 ;
	time = 12 ;
	transport_comp = 5 ;
	transport_reg = 2 ;
	z_t = 60 ;
	z_t_150m = 15 ;
	z_w = 60 ;
	z_w_bot = 60 ;
	z_w_top = 60 ;

variables:
	object time(time) ;
		time:long_name = time ;
		time:bounds = month_bounds ;
	float32 lat_aux_grid(lat_aux_grid) ;
		lat_aux_grid:long_name = latitude grid for transport diagnostics ;
		lat_aux_grid:units = degrees_north ;
		lat_aux_grid:valid_min = -79.48815155029297 ;
		lat_aux_grid:valid_max = 90.0 ;
	float32 moc_z(moc_z) ;
		moc_z:long_name = depth from surface to top of layer ;
		moc_z:units = centimeters ;
		moc_z:positive = down ;
		moc_z:valid_min = 0.0 ;
		moc_z:valid_max = 549999.0625 ;
	float32 z_t(z_t) ;
		z_t:long_name = depth from surface to midpoint of layer ;
		z_t:units = centimeters ;
		z_t:positive = down ;
		z_t:valid_min = 500.0 ;
		z_t:valid_max = 537500.0 ;
	float32 z_t_150m(z_t_150m) ;
		z_t_150