# Annual Averages of MOM 0.1 output

In [1]:
%matplotlib inline

import os

import xarray as xr
from glob import glob

from dask.dot import dot_graph
from dask.diagnostics import Profiler, ResourceProfiler, CacheProfiler

  from pandas.tslib import OutOfBoundsDatetime


In [2]:
from bokeh.plotting import output_notebook
output_notebook()

In [3]:
from dask.diagnostics import ProgressBar
pbar = ProgressBar()
pbar.register()

In [4]:
DataDir = '/g/data3/hh5/tmp/cosima/mom01v5'

expts = ['GFDL50','KDS75','KDS75_UP','KDS75_PI','KDS75_wind']

In [5]:
%%time
# Annual mean of u
expt = 'KDS75_UP'
ExpDir = os.path.join(DataDir, expt)
FileList = glob(os.path.join(ExpDir, 'output*/ocean.nc'))
FileList.sort()

CPU times: user 1.91 ms, sys: 2.5 ms, total: 4.41 ms
Wall time: 7.96 ms


In [6]:
import warnings
warnings.filterwarnings("ignore", message="Unable to decode time axis into full numpy.datetime64 objects")

In [7]:
%%time
from dask import bag 
datasets =  bag.from_sequence(FileList)\
              .map(lambda path: (path, xr.open_dataset(path, decode_times=True)))\
              .map(lambda path, ds: (path, ds.time))\
              .groupby(lambda p: p[1].time.data[0].year)\
              .map(lambda k, p: (k, [x[0] for x in p]))
FilesByYear = dict(datasets.compute())

[########################################] | 100% Completed |  5.4s
CPU times: user 926 ms, sys: 158 ms, total: 1.08 s
Wall time: 6.25 s


In [8]:
year = 70
FilesByYear[year]

['/g/data3/hh5/tmp/cosima/mom01v5/KDS75_UP/output267/ocean.nc',
 '/g/data3/hh5/tmp/cosima/mom01v5/KDS75_UP/output269/ocean.nc',
 '/g/data3/hh5/tmp/cosima/mom01v5/KDS75_UP/output268/ocean.nc',
 '/g/data3/hh5/tmp/cosima/mom01v5/KDS75_UP/output266/ocean.nc']

In [22]:
%%time
datasets = [xr.open_dataset(fn,
                         chunks={'time':1, 'st_ocean':7, 'yu_ocean':300, 'xu_ocean': 400},
                        decode_times=False).isel(xu_ocean=slice(0,400)).u for fn in FilesByYear[year]]

u_merged = xr.concat(datasets, 
                     dim='time',
                     coords='all',
            )

u_avg = u_merged.mean(dim='time')


CPU times: user 241 ms, sys: 9.29 ms, total: 250 ms
Wall time: 249 ms


In [57]:
%%time
print('loading...')
u_avg.load()
print('saving...')
u_avg.to_netcdf('u_100.nc')

loading...
[##########                              ] | 27% Completed |  0.3s

  x = np.divide(x1, x2, out)


[########################################] | 100% Completed |  1.2s
[########################################] | 100% Completed |  1.2s
[########################################] | 100% Completed |  1.3s
saving...
CPU times: user 1.31 s, sys: 87.5 ms, total: 1.4 s
Wall time: 1.34 s


In [23]:
with Profiler() as prof, ResourceProfiler(dt=0.25) as rprof, CacheProfiler() as cprof:
    u_avg.load()

[                                        ] | 1% Completed |  0.4s

  x = np.divide(x1, x2, out)


[########################################] | 100% Completed | 13.7s


In [24]:
rprof.visualize()

In [12]:
dot_graph(u_avg.data.dask, rankdir='LR')

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.
