## Accessing NEMO results
This notebook explores different ways to access NEMO results and the associated computational efficiency

In [1]:
from nco import Nco
import netCDF4 as nc
import xarray as xr
import numpy as np
import datetime as dtm
import dateutil.parser as dparser
import timeit

nco = Nco()

### Append arrays directly
These methods are pretty bad. Crashed COD several times. I can only assemble about 2-3 days of hourly results at best.

In [3]:
# Looping into numpy arrays - Method 1

# Initialize first file
u_vel = nc.Dataset('/results/SalishSea/nowcast-green/01mar16/SalishSea_1h_20160301_20160301_grid_U.nc')
u_grid = u_vel.variables['vozocrtx']

# Append 2 days
dates = [dparser.parse('2016 mar 02') + dtm.timedelta(days=day) for day in range(2)]

# Iterate through 2 days
for date in dates:
    filepath = '/results/SalishSea/nowcast-green/{date1}/SalishSea_1h_{date2}_{date3}_grid_U.nc'.format(
                date1=date.strftime('%d%b%y').lower(), date2=date.strftime('%Y%m%d'), date3=date.strftime('%Y%m%d'))
    u_vel = nc.Dataset(filepath)
    u_grid = np.concatenate((u_grid, u_vel.variables['vozocrtx']), axis=0)

print('u_grid shape: ', u_grid.shape)

u_grid shape:  (72, 40, 898, 398)


In [4]:
# Looping into numpy arrays - Method 2

# Define 2 days
dates = [dparser.parse('2016 mar 02') + dtm.timedelta(days=day) for day in range(2)]

# Define list of netCDF4 datasets
u_store = [nc.Dataset('/results/SalishSea/nowcast-green/{date1}/SalishSea_1h_{date2}_{date3}_grid_U.nc'.format(
           date1=date.strftime('%d%b%y').lower(), date2=date.strftime('%Y%m%d'),
           date3=date.strftime('%Y%m%d'))).variables['vozocrtx'] for date in dates]

# Concatenate dataset list
u_grid = np.concatenate(u_store, axis=0)

print('u_grid shape: ', u_grid.shape)

u_grid shape:  (48, 40, 898, 398)


### Merge netCDF files using NCO
This method merges netCDF files to a temporary merged file that can be loaded using netCDF4 or xarray. Still expensive, but far superior performance-wise to directly appending numpy arrays.

In [2]:
def skookum_with_NCO(days):
    """Merge results from skookum with PyNCO
    """

    # Define 2 days
    dates = [dparser.parse('2016 mar 02') + dtm.timedelta(days=day) for day in range(days)]
    
    # Build list of filenames
    filenames = ['/results/SalishSea/nowcast-green/{date1}/SalishSea_1h_{date2}_{date3}_grid_U.nc'.format(
                 date1=date.strftime('%d%b%y').lower(), date2=date.strftime('%Y%m%d'),
                 date3=date.strftime('%Y%m%d')) for date in dates]
    
    # Use nco.ncrcat to merge files to temporary, then load using netCDF4
    u_vel = nc.Dataset(nco.ncrcat(filenames))
    u_grid = u_vel.variables['vozocrtx'][:, :, :, :]
    
    print('u_grid shape: ', u_grid.shape)

In [3]:
t = timeit.timeit(stmt='skookum_with_NCO(1)', setup="from __main__ import skookum_with_NCO", number=1)
print('Time Elapsed: ', t)

u_grid shape:  (24, 40, 898, 398)
Time Elapsed:  56.09589666198008


### Load ERDDAP results using netCDF4
Essentially the server-side equivalent of merging with NCO and performs about the same

In [3]:
def ERDDAP_with_netCDF4(days):
    """Load ERDDAP results using netCDF4
    """
    
    # Specify date range
    startdate = dparser.parse('2016 mar 01 00:30')
    daterange = [startdate, startdate + dtm.timedelta(days=days)]
    
    # Load 
    u_vel      = nc.Dataset('https://salishsea.eos.ubc.ca/erddap/griddap/ubcSSn3DuVelocity1hV1')
    index_NEMO = [abs(u_vel.variables['time'][:] - date.timestamp()).argmin() for date in daterange]
    u_grid     = u_vel.variables['uVelocity'][index_NEMO[0]:index_NEMO[1], :, :, :]
    
    print('u_grid shape: ', u_grid.shape)

In [4]:
t = timeit.timeit(stmt='ERDDAP_with_netCDF4(1)', setup="from __main__ import ERDDAP_with_netCDF4", number=1)
print('Time Elapsed: ', t)

u_grid shape:  (24, 40, 898, 398)
Time Elapsed:  56.61164977197768
