## `timeseries_tools` Development Notebook
This notebook is for developing a prototype Nowcast timeseries analysis package. The primary goal of this package is to be memory-efficient. As such, the results arrays are flattened to 2-D (time, space) so that land points can be removed. The 2-D dimensions are also ideal for some analyses like PCA. The basic workflow proceeds as follows:
   * Flatten the model grid and mask to 2-D (time, space) and remove land indices
   * Load, process, and flatten hourly Nowcast Results to 2-D (time, space) and remove land indices
   * Concatenate consecutive 24 hour periods
   * Reshape the concatenated `Numpy ndarray` to (time, depth, y, x)

In [5]:
from salishsea_tools import nc_tools
import progressbar
import xarray as xr
import netCDF4 as nc
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

### Functions
To be added to `timeseries_tools.py` in `salishsea_tools`

In [None]:
def build_GEM_mask(grid_GEM, grid_NEMO, mask_NEMO):
    """
    """

    # Preallocate
    ngrid_GEM = grid_GEM['x'].shape[0] * grid_GEM['y'].shape[0]
    mask_GEM = np.zeros(ngrid_GEM, dtype=int)

    # Evaluate each point on GEM grid
    with progressbar.ProgressBar(max_value=ngrid_GEM) as bar:
        for index, coords in enumerate(zip(
            grid_GEM['nav_lon'].values.reshape(ngrid_GEM) - 360,
            grid_GEM['nav_lat'].values.reshape(ngrid_GEM))):

            j, i = geo_tools.find_closest_model_point(coords[0], coords[1], grid_NEMO['nav_lon'], grid_NEMO['nav_lat'])
            if j is np.nan or i is np.nan:
                mask_GEM[index] = 0
            else:
                mask_GEM[index] = mask_NEMO[j, i].values
                
            # Update progress bar
            bar.update(index)

    # Reshape
    mask_GEM = mask_GEM.reshape(grid_GEM['nav_lon'].shape)
    
    return mask_GEM

In [2]:
def reshape_coords(mask_in, dim_in, index=0, spacing=1):
    """Prepare the mask and grid for the selected timeseries slice, and reshape into 1 spatial dimension
    """
    
    # Correct for depth dimension name
    if dim_in.find('depth') is not -1:
        dim = 'deptht'
    else:
        dim = dim_in
    
    # Create full gridded mask, grid and depth Numpy ndarrays
    gridZ, gridY, gridX = np.meshgrid(mask_in.z, mask_in.y, mask_in.x, indexing='ij')
    gridmask = xr.Dataset({
        'tmask': (['deptht', 'y', 'x'], mask_in.tmask.isel(t=0).values.astype(bool)),
        'depth': (['deptht', 'y', 'x'], mask_in.gdept_0.isel(t=0).values),
        'gridZ': (['deptht', 'y', 'x'], gridZ),
        'gridY': (['deptht', 'y', 'x'], gridY),
        'gridX': (['deptht', 'y', 'x'], gridX),},
        coords={'deptht': mask_in.gdept_1d.isel(t=0).values, 'y': mask_in.y, 'x': mask_in.x})
    
    # Slice and subsample mask
    mask = gridmask.tmask.isel(**{dim: index}).values[::spacing, ::spacing]
    
    # Slice and subsample grid and depth into dict
    coords = {
        'depth': gridmask.depth.isel(**{dim: index}).values[::spacing, ::spacing],
        'gridZ': gridmask.gridZ.isel(**{dim: index}).values[::spacing, ::spacing],
        'gridY': gridmask.gridY.isel(**{dim: index}).values[::spacing, ::spacing],
        'gridX': gridmask.gridX.isel(**{dim: index}).values[::spacing, ::spacing],
    }
    
    # Number of grid points
    ngrid = mask.shape[0] * mask.shape[1]
    ngrid_water = mask.sum()
    
    # Reshape mask, grid, and depth
    mask  =  mask.reshape(ngrid)
    coords['depth'] = coords['depth'].reshape(ngrid)[mask]
    coords['gridZ'] = coords['gridZ'].reshape(ngrid)[mask]
    coords['gridY'] = coords['gridY'].reshape(ngrid)[mask]
    coords['gridX'] = coords['gridX'].reshape(ngrid)[mask]
    
    return mask, coords, ngrid, ngrid_water

In [3]:
def reshape_to_ts(data_grid, mask, ngrid, ngrid_water, spacing=1):
    """
    """
        
    # Convert to Numpy ndarray, subsample, and reshape
    data_flat = data_grid[:, ::spacing, ::spacing].reshape((-1, ngrid))
    
    # Preallocate trimmed array
    data_trim = np.zeros((data_flat.shape[0], ngrid_water))
    
    # Trim land points
    for tindex, data_t in enumerate(data_flat):
        data_trim[tindex, :] = data_t[mask]
    
    return data_trim

In [4]:
def reshape_to_grid(data_flat, coords, shape):
    """
    """

    # Preallocate gridded array
    data_grid = np.zeros((data_flat.shape[0],) + shape)
    
    # Reshape flattened data to grid
    for coord1, coord2, data_xyz in zip(*(coords + [data_flat.T])):
        data_grid[:, coord1, coord2] = data_xyz
    
    return data_grid

In [5]:
def load_NEMO_timeseries(filenames, field, mask, dim, index=0, spacing=1, shape='grid', unstagger_dim=None):
    """
    """
    
    # Reshape mask, grid, and depth
    tmask, coords, ngrid, ngrid_water = reshape_coords(mask, dim, index=index, spacing=spacing)
    
    # Initialize output array
    date = np.empty(0, dtype='datetime64[ns]')
    data = np.empty((0, ngrid_water))
    
    # Loop through filenames
    with progressbar.ProgressBar(max_value=len(filenames)) as bar:
        for findex, filename in enumerate(filenames):

            # Open NEMO results and flatten (depth averages would be added here)
            data_grid = xr.open_dataset(filename)[field].isel(**{dim: index})

            # Unstagger if velocity field
            if unstagger_dim is not None:
                data_grid = viz_tools.unstagger_xarray(data_grid, unstagger_dim)

            # Reshape field
            data_trim = reshape_to_ts(data_grid.values, tmask, ngrid, ngrid_water, spacing=spacing)

            # Store trimmed arrays
            date = np.concatenate([date, data_grid.time_counter.values])
            data = np.concatenate([data, data_trim], axis=0)

            # Update progress bar
            bar.update(findex)
    
    # Reshape to grid
    if shape is 'grid':
    
        # Correct for depth dimension name
        if dim.find('depth') is not -1:
            dim1, dim2, dimslice = 'gridY', 'gridX', 'z'
        elif dim.find('y') is not -1:
            dim1, dim2, dimslice = 'depth', 'gridX', 'y'
        elif dim.find('x') is not -1:
            dim1, dim2, dimslice = 'depth', 'gridY', 'x'

        # Reshape data to grid
        data = reshape_to_grid(data, [coords[dim1], coords[dim2]], mask.gdept_0.isel(**{'t': 0, dimslice: 0}).shape)

        # Redefine coords for grid
        coords = {'depth': mask.gdept_1d.values, 'gridZ': mask.z.values, 'gridY': mask.y.values, 'gridX': mask.x.values}
    
    # Coords dict
    coords['date'] = date
    
    return data, coords

### Basic Usage

In [6]:
# Load Temperature
timerange = ['2017 Jan 1 00:00', '2017 Jan 30 23:00']

# Make a list of sequential filenames to loop through
filenames = nc_tools.make_filename_list(timerange, 'T', model='nowcast-green', resolution='h')

mask = xr.open_dataset('/ocean/bmoorema/research/MEOPAR/NEMO-forcing/grid/mesh_mask_downbyone2.nc')

# Load, slice, flatten, etc (example is surface slice)
T_flat, coords = load_NEMO_timeseries(filenames, 'votemper', mask, 'deptht')

# Unflatten
T_grid = reshape_to_grid(T_flat, [coords['gridY'], coords['gridX']], mask.gdept_0.isel(t=0, z=0).shape)

100% (30 of 30) |###################################| Elapsed Time: 0:01:21 Time: 0:01:21


ValueError: could not broadcast input array from shape (898,720) into shape (720)