prepare yearly forcing files based on eartH2Observe data

In [None]:
import xarray as xr
import pandas as pd
import os, sys
import numpy as np
from os.path import join, dirname, realpath, basename
from datetime import datetime
import pdb
import glob

# multiprocessing libraries
import dask

In [None]:
ddir = r'/home/dirk/datasets/E2O'
name = 'metfr'
var_name="Runoff"
options = dict(scale = -86400, xdim='lon', ydim='lat', tdim='time')
fns = glob.glob(join(ddir, name, 'org', f'e2o_{name}_wrr2_glob15_day_{var_name}_*-*.nc'))
fns

In [None]:
import matplotlib.pyplot as plt
cmap = plt.cm.Blues
cmap.set_under('red')

In [None]:
@dask.delayed
def main(fn, options, name, ddir, var_name):
# for fn in fns:
    tdim, xdim, ydim = options['tdim'], options['xdim'], options['ydim']
    scale = options['scale'] # to 'mm.day-1'
    units = 'mm day-1'

    with xr.open_dataset(fn, chunks={tdim: 30, xdim:-1, ydim:-1}) as ds:
        ds = ds.rename({xdim: 'lon', ydim: 'lat', tdim:'time'})
        yrs = np.unique(ds.time.dt.year)
        for yr in yrs:
            fn_out = f'e2o_{name}_wrr2_glob15_day_{var_name}_{yr}.nc'
            print(fn_out)

            ds_yr = ds.sel(time = slice(f'01-01-{yr:04d}', f'31-12-{yr:04d}')).load()
            # fix data
            # runoff is positive
            xvar = ds_yr[var_name] * scale 

            # make figure
#             fig = plt.figure()
            xvar_seas = xvar.groupby('time.season').mean('time')
            xvar_seas.plot(x='lon', y='lat', col='season', vmin=0, vmax=10, col_wrap=2, cmap=cmap, figsize=(14,8))
            fig = plt.gcf()
            fig.suptitle(f'{name} - {yr}', y=1.01, fontsize='large')
            plt.savefig(join(ddir, name, 'fig', fn_out.replace('.nc','.png')), bbox_inches='tight', dpi = 320)
            plt.close('all')

            # remove negative values
#             xvar = xr.ufuncs.maximum(xvar, 0)
            # write output
            xvar.name = var_name
            xvar.attrs.update(ds_yr[var_name].attrs)
            xvar.attrs.update(units=units) # overwrite unit.
            ds_out = xvar.to_dataset()
            ds_out.attrs.update(ds_yr.attrs)
            encoding= {var_name: {'zlib': True}}

            ds_out.to_netcdf(join(ddir, name, fn_out), encoding=encoding)

In [None]:
tasks = [main(fn, options, name, ddir, var_name) for fn in fns]
dask.compute(*tasks, scheduler='processes', num_workers=3)