In [2]:
import intake

from workflow.scripts.utils import copy_meta_data_CMIP,transelate_aerocom_helper, regrid_global
from pyclim_noresm.general_util_funcs import yearly_avg
import time
import pathlib as pl
import xarray as xr
from functools import partial
import xesmf
import pandas as pd

In [3]:
esm_cat = intake.open_esm_datastore(snakemake.input.catalog)

In [4]:
kind = snakemake.params.get("kind", "experiment")
params = snakemake.params
accumlative_vars = params.get('accumalative_vars',None)


In [5]:
exp_id = snakemake.wildcards.experiment
mod_id = snakemake.wildcards.model
var_id = snakemake.wildcards.variable
freq = snakemake.wildcards.freq
table_id = snakemake.config['table_ids'].get(var_id, snakemake.config['table_id_default'])
if snakemake.config['model_specific_variant'].get(exp_id, None):
    memb_id = snakemake.config['model_specific_variant'][exp_id].get(mod_id, snakemake.config['variant_default'])
else:
    memb_id = snakemake.config['variant_default']

In [6]:
col = esm_cat.search(experiment_id=exp_id,
                    source_id=mod_id,
                     variable_id = var_id,
                     member_id=memb_id,
                     table_id=table_id
)

if col.nunique().version > 1:
    latest = max(col.df['version'].unique())
    col = col.search(version=[latest])

In [7]:
ds = col.to_dataset_dict(xarray_open_kwargs={'use_cftime':True})
ds = ds[list(ds.keys())[0]]
ds = ds.drop('member_id').squeeze()

In [8]:
def regrid_dataset(ds, grid_params, grid_path):

    method=grid_params.get('method','conservative')
    if grid_path:
        out_grid = xr.open_dataset(grid_path)
        ds = regrid_global(ds, out_grid, method=method)
    elif grid_params.get('dxdy',None):
        dxdy = grid_params['dxdy']
        ds= regrid_global(ds, lon=dxdy[0], lat=dxdy[1], method=method,ignore_degenerate=True)
    else:
        print('No outgrid provided!')
    return ds

In [16]:
def check_bounds(ds, variable):
    return 'time' in set(ds.cf.get_bounds(variable).dims) - set(ds.cf[variable].dims)
if 'lon_bnds' in ds.coords or 'lat_bnds' in ds.coords:
    if check_bounds(ds, 'longitude') or check_bounds(ds, 'latitude'):
        ds = ds.drop('lon_bnds')
        ds = ds.drop('lat_bnds')
        ds = ds.cf.add_bounds('latitude', dim='lat')
        ds = ds.cf.add_bounds('longitude', dim='lon')

In [17]:
if snakemake.config.get('regrid_params', None) and snakemake.params.get('regrid', True):
    grid_params=snakemake.config['regrid_params']
    regrid_func = partial(regrid_dataset,grid_params = snakemake.config['regrid_params'],
                                            grid_path = grid_params.get('grid_path',None))
    ds = regrid_func(ds)

In [18]:
with xr.set_options(keep_attrs=True):
    if not ds.cf.bounds.get('lon', None):
        ds = ds.cf.add_bounds(['lon', 'lat'])
    if freq == 'Ayear':
        if 'time_bnds' in ds.data_vars:
            data = ds.drop_vars('time_bnds')
        if var_id in accumlative_vars:
            data=ds
            vname = ds.variable_id
            data=data[data.variable_id].resample(time='Y').mean()*365*24*60*60
            data.attrs['units'] = '{} year-1'.format(' '.join(data.attrs['units'].split(' ')[:-1]))
            data = data.to_dataset(name=vname)
            data.attrs['history'] = ds.attrs.get('history', '') + f', accumulated over a year'
            dvar_attrs = copy_meta_data_CMIP(data[var_id].attrs)
        else:
            #data[data.variable_id] = yearly_avg(data[data.variable_id])
            data=ds.resample(time='Y').mean()
            data.attrs['history'] = data.attrs.get('history','') + f', annual average'
            dvar_attrs = copy_meta_data_CMIP(data[var_id].attrs)
        data = data.assign({ds.cf.bounds['lon'][0]:ds[ds.cf.bounds['lon'][0]]})
        data = data.assign({ds.cf.bounds['lat'][0]:ds[ds.cf.bounds['lat'][0]]})
    elif freq == 'clim':
        t0 = data.time[0].dt.strftime('%Y/%m').values
        t1 = data.time[0].dt.strftime('%Y/%m').values
        data = data.groupby('time.month').mean('time')
        data[data.variable_id].attrs['history'] = data[data.variable_id].attrs.get('history','') + f', clim mean {t0}-{t1}'
        if wildcards.freq=='2010':
            import cftime
            import pandas as pd
            data = data.rename(month='time')
            cftimes = cftime.date2num(pd.date_range('2010-01-31','2010-12-31', freq='M').to_list(),
                                              'days since 2010-01-01', 
                                                  has_year_zero=False, calendar = 'gregorian')
            data = data.assign_coords(time=cftimes)
            data.time.attrs['units'] = 'days since 2010-01-01'
        dvar_attrs = copy_meta_data_CMIP(data[var_id].attrs)
        
    elif freq == 'Amon':
        data = ds
    else:
        raise(ValueError(f'{wildcards.freq} is an invalid frequency'))

In [19]:
data.attrs['frequency'] = snakemake.wildcards.freq
data.to_netcdf(snakemake.output.outpath)