From 9b06b033d795ad9d71d51eca147b5b45c3986327 Mon Sep 17 00:00:00 2001 From: "Phillip J. Wolfram" Date: Mon, 27 Mar 2017 13:04:29 -0700 Subject: [PATCH 1/6] Removes values selection from moc --- .../ocean/meridional_overturning_circulation.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/mpas_analysis/ocean/meridional_overturning_circulation.py b/mpas_analysis/ocean/meridional_overturning_circulation.py index 71c74a5ab..918111bb3 100644 --- a/mpas_analysis/ocean/meridional_overturning_circulation.py +++ b/mpas_analysis/ocean/meridional_overturning_circulation.py @@ -306,10 +306,9 @@ def _compute_moc_climo_postprocess(config, runStreams, variableMap, calendar, # Compute annual climatology annualClimatology = ds.mean('Time') - # Convert to numpy arrays - # (can result in a memory error for large array size) - horizontalVel = annualClimatology.avgNormalVelocity.values - verticalVel = annualClimatology.avgVertVelocityTop.values + # Convert to numpy arrays + horizontalVel = annualClimatology.avgNormalVelocity + verticalVel = annualClimatology.avgVertVelocityTop velArea = verticalVel * areaCell[:, np.newaxis] # Create dictionary for MOC climatology (NB: need this form @@ -484,8 +483,8 @@ def write_file(dsMOCTimeSeries): first = False date = days_to_datetime(ds.Time[tIndex], calendar=calendar) print ' date: {:04d}-{:02d}'.format(date.year, date.month) - horizontalVel = ds.avgNormalVelocity[tIndex, :, :].values - verticalVel = ds.avgVertVelocityTop[tIndex, :, :].values + horizontalVel = ds.avgNormalVelocity.isel(Time=tIndex) + verticalVel = ds.avgVertVelocityTop.isel(Time=tIndex) velArea = verticalVel * areaCell[:, np.newaxis] transportZ = _compute_transport(maxEdgesInTransect, transectEdgeGlobalIDs, From ffc26c949ed6bfc2e7b478fd4400972db6519883 Mon Sep 17 00:00:00 2001 From: "Phillip J. Wolfram" Date: Mon, 27 Mar 2017 14:56:53 -0700 Subject: [PATCH 2/6] Fixes dictionary that only had last set of entries --- mpas_analysis/ocean/meridional_overturning_circulation.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mpas_analysis/ocean/meridional_overturning_circulation.py b/mpas_analysis/ocean/meridional_overturning_circulation.py index 918111bb3..89c1b79a9 100644 --- a/mpas_analysis/ocean/meridional_overturning_circulation.py +++ b/mpas_analysis/ocean/meridional_overturning_circulation.py @@ -240,6 +240,7 @@ def _compute_moc_climo_postprocess(config, runStreams, variableMap, calendar, raise IOError('Regional masking file for MOC calculation ' 'does not exist') iRegion = 0 + dictRegion = {} for region in regionNames: print '\n Reading region and transect mask for {}...'.format(region) ncFileRegional = netCDF4.Dataset(regionMaskFiles, mode='r') @@ -255,12 +256,14 @@ def _compute_moc_climo_postprocess(config, runStreams, variableMap, calendar, iRegion += 1 indRegion = np.where(regionCellMask == 1) - dictRegion = { + # Milena: this seems like it will only have the last entry in regionNames. + # Is this what we want? + dictRegion.update({ 'ind{}'.format(region): indRegion, '{}CellMask'.format(region): regionCellMask, 'maxEdgesInTransect{}'.format(region): maxEdgesInTransect, 'transectEdgeMaskSigns{}'.format(region): transectEdgeMaskSigns, - 'transectEdgeGlobalIDs{}'.format(region): transectEdgeGlobalIDs} + 'transectEdgeGlobalIDs{}'.format(region): transectEdgeGlobalIDs}) # Add Global regionCellMask=1 everywhere to make the algorithm # for the global moc similar to that of the regional moc dictRegion['GlobalCellMask'] = np.ones(np.size(latCell)) From f4cb87683634826c3375b352351c3d7d2d79ea9b Mon Sep 17 00:00:00 2001 From: "Phillip J. Wolfram" Date: Tue, 28 Mar 2017 10:20:53 -0700 Subject: [PATCH 3/6] Moves MOC computation from numpy to xarray This prevents memory errors and allows for dask-enabled computational threading. --- .../meridional_overturning_circulation.py | 141 +++++++----------- 1 file changed, 56 insertions(+), 85 deletions(-) diff --git a/mpas_analysis/ocean/meridional_overturning_circulation.py b/mpas_analysis/ocean/meridional_overturning_circulation.py index 89c1b79a9..44bb13e88 100644 --- a/mpas_analysis/ocean/meridional_overturning_circulation.py +++ b/mpas_analysis/ocean/meridional_overturning_circulation.py @@ -12,7 +12,7 @@ Last Modified ------------- -03/23/2017 +03/28/2017 """ import xarray as xr @@ -203,23 +203,18 @@ def _load_mesh(runStreams): # {{{ except ValueError: raise IOError('No MPAS-O restart file found: need at least one ' 'restart file for MOC calculation') - ncFile = netCDF4.Dataset(restartFile, mode='r') - dvEdge = ncFile.variables['dvEdge'][:] - areaCell = ncFile.variables['areaCell'][:] - refBottomDepth = ncFile.variables['refBottomDepth'][:] - latCell = ncFile.variables['latCell'][:] + ds = xr.open_dataset(restartFile) + latCell = ds.latCell latCell = latCell * rad_to_deg # convert to degree - ncFile.close() - nVertLevels = len(refBottomDepth) - refTopDepth = np.zeros(nVertLevels+1) - refTopDepth[1:nVertLevels+1] = refBottomDepth[0:nVertLevels] - refLayerThickness = np.zeros(nVertLevels) - refLayerThickness[0] = refBottomDepth[0] - refLayerThickness[1:nVertLevels] = (refBottomDepth[1:nVertLevels] - - refBottomDepth[0:nVertLevels-1]) + refTopDepth = xr.DataArray(np.hstack((0, ds.refBottomDepth.values)), + coords=[ds.nVertLevelsP1]) + refLayerThickness = xr.DataArray(np.hstack((ds.refBottomDepth[0].values, + ds.refBottomDepth.diff('nVertLevels').values)), + coords=[ds.nVertLevels]) - return dvEdge, areaCell, refBottomDepth, latCell, nVertLevels, \ - refTopDepth, refLayerThickness # }}} + return ds.dvEdge, ds.areaCell, ds.refBottomDepth, latCell, \ + ds. nVertLevels, ds.nVertLevelsP1, \ + refTopDepth, refLayerThickness # }}} def _compute_moc_climo_postprocess(config, runStreams, variableMap, calendar, @@ -228,7 +223,7 @@ def _compute_moc_climo_postprocess(config, runStreams, variableMap, calendar, '''compute mean MOC streamfunction as a post-process''' dvEdge, areaCell, refBottomDepth, latCell, nVertLevels, \ - refTopDepth, refLayerThickness = _load_mesh(runStreams) + nVertLevelsP1, refTopDepth, refLayerThickness = _load_mesh(runStreams) variableList = ['avgNormalVelocity', 'avgVertVelocityTop'] @@ -243,30 +238,19 @@ def _compute_moc_climo_postprocess(config, runStreams, variableMap, calendar, dictRegion = {} for region in regionNames: print '\n Reading region and transect mask for {}...'.format(region) - ncFileRegional = netCDF4.Dataset(regionMaskFiles, mode='r') - maxEdgesInTransect = \ - ncFileRegional.dimensions['maxEdgesInTransect'].size + dsRegion = xr.open_dataset(regionMaskFiles) transectEdgeMaskSigns = \ - ncFileRegional.variables['transectEdgeMaskSigns'][:, iRegion] - transectEdgeGlobalIDs = \ - ncFileRegional.variables['transectEdgeGlobalIDs'][iRegion, :] - regionCellMask = \ - ncFileRegional.variables['regionCellMasks'][:, iRegion] - ncFileRegional.close() + dsRegion.transectEdgeMaskSigns.isel(nTransects=iRegion) + regionCellMask = dsRegion.regionCellMasks.isel(nRegions=iRegion) iRegion += 1 - indRegion = np.where(regionCellMask == 1) - # Milena: this seems like it will only have the last entry in regionNames. - # Is this what we want? + # this will only have the last entry in regionNames dictRegion.update({ - 'ind{}'.format(region): indRegion, '{}CellMask'.format(region): regionCellMask, - 'maxEdgesInTransect{}'.format(region): maxEdgesInTransect, - 'transectEdgeMaskSigns{}'.format(region): transectEdgeMaskSigns, - 'transectEdgeGlobalIDs{}'.format(region): transectEdgeGlobalIDs}) + 'transectEdgeMaskSigns{}'.format(region): transectEdgeMaskSigns}) # Add Global regionCellMask=1 everywhere to make the algorithm # for the global moc similar to that of the regional moc - dictRegion['GlobalCellMask'] = np.ones(np.size(latCell)) + dictRegion['GlobalCellMask'] = xr.ones_like(regionCellMask).rename('GlobalCellMask') regionNames[:0] = ['Global'] # Compute and plot annual climatology of MOC streamfunction @@ -311,8 +295,7 @@ def _compute_moc_climo_postprocess(config, runStreams, variableMap, calendar, # Convert to numpy arrays horizontalVel = annualClimatology.avgNormalVelocity - verticalVel = annualClimatology.avgVertVelocityTop - velArea = verticalVel * areaCell[:, np.newaxis] + velArea = annualClimatology.avgVertVelocityTop * areaCell # Create dictionary for MOC climatology (NB: need this form # in order to convert it to xarray dataset later in the script) @@ -321,17 +304,11 @@ def _compute_moc_climo_postprocess(config, runStreams, variableMap, calendar, print ' Compute {} MOC...'.format(region) print ' Compute transport through region southern transect...' if region == 'Global': - transportZ = np.zeros(nVertLevels) + transportZ = xr.zeros_like(nVertLevels).rename('transportZ') else: - maxEdgesInTransect = \ - dictRegion['maxEdgesInTransect{}'.format(region)] - transectEdgeGlobalIDs = \ - dictRegion['transectEdgeGlobalIDs{}'.format(region)] transectEdgeMaskSigns = \ dictRegion['transectEdgeMaskSigns{}'.format(region)] - transportZ = _compute_transport(maxEdgesInTransect, - transectEdgeGlobalIDs, - transectEdgeMaskSigns, + transportZ = _compute_transport(transectEdgeMaskSigns, nVertLevels, dvEdge, refLayerThickness, horizontalVel) @@ -342,12 +319,13 @@ def _compute_moc_climo_postprocess(config, runStreams, variableMap, calendar, if region == 'Global': latBins = np.arange(-90.0, 90.1, latBinSize) else: - indRegion = dictRegion['ind{}'.format(region)] - latBins = latCell[indRegion] + cellMask = dictRegion['{}CellMask'.format(region)] + latBins = latCell.where(cellMask) latBins = np.arange(np.amin(latBins), np.amax(latBins)+latBinSize, latBinSize) - mocTop = _compute_moc(latBins, nVertLevels, latCell, + latBins = xr.DataArray(latBins, coords=[latBins], dims=['latBins']) + mocTop = _compute_moc(latBins, nVertLevelsP1, latCell, regionCellMask, transportZ, velArea) # Store computed MOC to dictionary @@ -365,7 +343,7 @@ def _compute_moc_climo_postprocess(config, runStreams, variableMap, calendar, latBins = mocDictClimo['lat{}'.format(region)]['data'] mocTop = mocDictClimo['moc{}'.format(region)]['data'] ncFile.createDimension('nx{}'.format(region), len(latBins)) - # create variables + # create variables x = ncFile.createVariable('lat{}'.format(region), 'f4', ('nx{}'.format(region),)) x.description = 'latitude bins for MOC {}'\ @@ -376,7 +354,7 @@ def _compute_moc_climo_postprocess(config, runStreams, variableMap, calendar, y.description = 'MOC {} streamfunction, annual'\ ' climatology'.format(region) y.units = 'Sv (10^6 m^3/s)' - # save variables + # save variables x[:] = latBins y[:, :] = mocTop depth = ncFile.createVariable('depth', 'f4', ('nz',)) @@ -414,7 +392,8 @@ def write_file(dsMOCTimeSeries): dsMOCTimeSeries.Time.attrs['units'] = 'days since 0001-01-01' dsMOCTimeSeries.mocAtlantic26.attrs['units'] = 'Sv (10^6 m^3/s)' dsMOCTimeSeries.mocAtlantic26.attrs['description'] = \ - 'Max MOC Atlantic streamfunction nearest to RAPID Array latitude (26.5N)' + ('Max MOC Atlantic streamfunction nearest to ' + 'RAPID Array latitude (26.5N)') dsMOCTimeSeries.to_netcdf(outputFileTseries) return dsMOCTimeSeries @@ -428,7 +407,7 @@ def write_file(dsMOCTimeSeries): 'avgVertVelocityTop'] dvEdge, areaCell, refBottomDepth, latCell, nVertLevels, \ - refTopDepth, refLayerThickness = _load_mesh(runStreams) + nVertLevelsP1, refTopDepth, refLayerThickness = _load_mesh(runStreams) ds = open_multifile_dataset(fileNames=dictTseries['inputFilesTseries'], calendar=calendar, @@ -440,11 +419,7 @@ def write_file(dsMOCTimeSeries): startDate=dictTseries['startDateTseries'], endDate=dictTseries['endDateTseries']) latAtlantic = mocDictClimo['latAtlantic']['data'] - dLat = latAtlantic - 26.5 - indlat26 = np.where(dLat == np.amin(np.abs(dLat))) - maxEdgesInTransect = dictRegion['maxEdgesInTransectAtlantic'] - transectEdgeGlobalIDs = dictRegion['transectEdgeGlobalIDsAtlantic'] transectEdgeMaskSigns = dictRegion['transectEdgeMaskSignsAtlantic'] regionCellMask = dictRegion['AtlanticCellMask'] @@ -487,17 +462,14 @@ def write_file(dsMOCTimeSeries): date = days_to_datetime(ds.Time[tIndex], calendar=calendar) print ' date: {:04d}-{:02d}'.format(date.year, date.month) horizontalVel = ds.avgNormalVelocity.isel(Time=tIndex) - verticalVel = ds.avgVertVelocityTop.isel(Time=tIndex) - velArea = verticalVel * areaCell[:, np.newaxis] - transportZ = _compute_transport(maxEdgesInTransect, - transectEdgeGlobalIDs, - transectEdgeMaskSigns, + velArea = ds.avgVertVelocityTop.isel(Time=tIndex) * areaCell + transportZ = _compute_transport(transectEdgeMaskSigns, nVertLevels, dvEdge, refLayerThickness, horizontalVel) - mocTop = _compute_moc(latAtlantic, nVertLevels, latCell, + mocTop = _compute_moc(latAtlantic, nVertLevelsP1, latCell, regionCellMask, transportZ, velArea) - mocAtlantic26 = np.amax(mocTop[:, indlat26]) + mocAtlantic26 = mocTop.sel(latBins=26.5, method='nearest').max() dictonary = {'Time': {'dims': ('Time'), 'data': [ds.Time[tIndex]]}, 'mocAtlantic26': {'dims': ('Time'), @@ -525,23 +497,17 @@ def write_file(dsMOCTimeSeries): # return (mocDictClimo, mocDictTseries) -def _compute_transport(maxEdgesInTransect, transectEdgeGlobalIDs, - transectEdgeMaskSigns, nz, dvEdge, refLayerThickness, +def _compute_transport(transectEdgeMaskSigns, nz, dvEdge, refLayerThickness, horizontalVel): # {{{ '''compute mass transport across southern transect of ocean basin''' - transportZEdge = np.zeros([nz, maxEdgesInTransect]) - for i in range(maxEdgesInTransect): - if transectEdgeGlobalIDs[i] == 0: - break - # subtract 1 because of python 0-indexing - iEdge = transectEdgeGlobalIDs[i] - 1 - transportZEdge[:, i] = horizontalVel[iEdge, :] * \ - transectEdgeMaskSigns[iEdge, np.newaxis] * \ - dvEdge[iEdge, np.newaxis] * \ - refLayerThickness[np.newaxis, :] - transportZ = transportZEdge.sum(axis=1) + transect = transectEdgeMaskSigns != 0 + transportZ = (refLayerThickness*horizontalVel.where(transect) + *transectEdgeMaskSigns.where(transect) + *dvEdge.where(transect) + ).sum('nEdges') + return transportZ # }}} @@ -549,17 +515,22 @@ def _compute_moc(latBins, nz, latCell, regionCellMask, transportZ, velArea): # {{{ '''compute meridionally integrated MOC streamfunction''' - - mocTop = np.zeros([np.size(latBins), nz+1]) - mocTop[0, range(1, nz+1)] = transportZ.cumsum() - for iLat in range(1, np.size(latBins)): - indlat = np.logical_and(np.logical_and( - regionCellMask == 1, latCell >= latBins[iLat-1]), - latCell < latBins[iLat]) - mocTop[iLat, :] = mocTop[iLat-1, :] + velArea[indlat, :].sum(axis=0) + # note computation should be more cleanly facilitated + # via a multidimensional groupby_bins when available + # (see https://github.com/pydata/xarray/pull/924) + + mocTop = xr.DataArray(np.zeros([len(nz), len(latBins)]), + coords=[nz, latBins]) + mocTop[1:, 0] = transportZ.cumsum(axis=0) + for iLat in np.arange(1, len(latBins)): + mask = np.logical_and( + np.logical_and(latCell >= latBins[iLat-1], + latCell < latBins[iLat]), + regionCellMask > 0) + mocTop[:, iLat] = velArea.where(mask).sum('nCells') + mocTop = mocTop.cumsum('latBins') # convert m^3/s to Sverdrup - mocTop = mocTop * m3ps_to_Sv - mocTop = mocTop.T + mocTop *= m3ps_to_Sv return mocTop # }}} # vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python From d477b9d0580afc4487ba93f64217bcf7ff67f31b Mon Sep 17 00:00:00 2001 From: "Phillip J. Wolfram" Date: Wed, 29 Mar 2017 15:59:55 -0600 Subject: [PATCH 4/6] Sets chunking limits to prevent dask memory errors --- mpas_analysis/shared/generalized_reader/generalized_reader.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mpas_analysis/shared/generalized_reader/generalized_reader.py b/mpas_analysis/shared/generalized_reader/generalized_reader.py index 78531d4a8..c27ecbf91 100644 --- a/mpas_analysis/shared/generalized_reader/generalized_reader.py +++ b/mpas_analysis/shared/generalized_reader/generalized_reader.py @@ -176,6 +176,9 @@ def open_multifile_dataset(fileNames, calendar, config, # select only the data in the specified range of dates ds = ds.sel(Time=slice(startDate, endDate)) + # limit chunk size to prevent memory error + ds = ds.chunk(config.getint('input', 'maxChunkSize')) + # private record of autoclose use ds.attrs['_autoclose'] = autoclose From abff7657226f298f22c50091ec1caa3532096534 Mon Sep 17 00:00:00 2001 From: "Phillip J. Wolfram" Date: Thu, 30 Mar 2017 09:50:57 -0600 Subject: [PATCH 5/6] Adds capability to preload results for performance --- config.default | 3 + .../meridional_overturning_circulation.py | 18 ++++-- .../generalized_reader/generalized_reader.py | 60 ++++++++++++++++++- 3 files changed, 72 insertions(+), 9 deletions(-) diff --git a/config.default b/config.default index 7c53f3081..14b738219 100644 --- a/config.default +++ b/config.default @@ -76,6 +76,9 @@ autocloseFileLimitFraction = 0.5 # with a single time slice. maxChunkSize = 10000 +# Reasonable size for an array to be cached into memory +reasonableArraySizeGB = 3.0 + [output] ## options related to writing out plots, intermediate cached data sets, logs, ## etc. diff --git a/mpas_analysis/ocean/meridional_overturning_circulation.py b/mpas_analysis/ocean/meridional_overturning_circulation.py index 44bb13e88..a4dae2b45 100644 --- a/mpas_analysis/ocean/meridional_overturning_circulation.py +++ b/mpas_analysis/ocean/meridional_overturning_circulation.py @@ -15,6 +15,7 @@ 03/28/2017 """ +from functools import partial import xarray as xr import numpy as np import netCDF4 @@ -28,7 +29,7 @@ from ..shared.io.utility import build_config_full_path from ..shared.generalized_reader.generalized_reader \ - import open_multifile_dataset + import open_multifile_dataset, preload from ..shared.timekeeping.utility import get_simulation_start_time, \ days_to_datetime @@ -293,9 +294,11 @@ def _compute_moc_climo_postprocess(config, runStreams, variableMap, calendar, # Compute annual climatology annualClimatology = ds.mean('Time') - # Convert to numpy arrays - horizontalVel = annualClimatology.avgNormalVelocity - velArea = annualClimatology.avgVertVelocityTop * areaCell + # Cache results if their size is reasonable + load = partial(preload, maxgb=config.getfloat('input', + 'reasonableArraySizeGB')) + horizontalVel = load(annualClimatology.avgNormalVelocity) + velArea = load(annualClimatology.avgVertVelocityTop * areaCell) # Create dictionary for MOC climatology (NB: need this form # in order to convert it to xarray dataset later in the script) @@ -461,8 +464,11 @@ def write_file(dsMOCTimeSeries): first = False date = days_to_datetime(ds.Time[tIndex], calendar=calendar) print ' date: {:04d}-{:02d}'.format(date.year, date.month) - horizontalVel = ds.avgNormalVelocity.isel(Time=tIndex) - velArea = ds.avgVertVelocityTop.isel(Time=tIndex) * areaCell + # Cache results if their size is reasonable + load = partial(preload, maxgb=config.getfloat('input', + 'reasonableArraySizeGB')) + horizontalVel = load(ds.avgNormalVelocity.isel(Time=tIndex)) + velArea = load(ds.avgVertVelocityTop.isel(Time=tIndex) * areaCell) transportZ = _compute_transport(transectEdgeMaskSigns, nVertLevels, dvEdge, refLayerThickness, diff --git a/mpas_analysis/shared/generalized_reader/generalized_reader.py b/mpas_analysis/shared/generalized_reader/generalized_reader.py index c27ecbf91..4b1f270f6 100644 --- a/mpas_analysis/shared/generalized_reader/generalized_reader.py +++ b/mpas_analysis/shared/generalized_reader/generalized_reader.py @@ -176,15 +176,41 @@ def open_multifile_dataset(fileNames, calendar, config, # select only the data in the specified range of dates ds = ds.sel(Time=slice(startDate, endDate)) - # limit chunk size to prevent memory error - ds = ds.chunk(config.getint('input', 'maxChunkSize')) - # private record of autoclose use ds.attrs['_autoclose'] = autoclose return ds # }}} +def preload(xarrayobj, maxgb): # {{{ + """ + Loads the xarray object into memory if it is of an acceptable size. + + Parameters + ---------- + xarrayobj : xarray.DataSet or xarray.DataArray object + xarray object to load into memory if it is of a reasonable size. + + maxgb : float + Maximum size of array that can be loaded into memory. + + Returns + ------- + xarrayobj : xarray.DataSet or xarray.DataArray object + + Authors + ------- + Phillip J. Wolfram + + Last modified + ------------- + 03/30/2017 + """ + if _size_gb(xarrayobj) < maxgb: + xarrayobj.load() + return xarrayobj # }}} + + def _preprocess(ds, calendar, simulationStartTime, timeVariableName, variableList, selValues, iselValues, variableMap, startDate, endDate, maxChunkSize): # {{{ @@ -301,6 +327,34 @@ def _preprocess(ds, calendar, simulationStartTime, timeVariableName, return ds # }}} +def _size_gb(xarrayobj): #{{{ + """ + Computes the size of an xarray object in GB. + + Parameters + ---------- + xarrayobj : xarray.DataSet or xarray.DataArray object + xarray object to compute size in GB. + + Returns + ------- + gbs : float + size of xarrayobj in GB. + + Authors + ------- + Phillip J. Wolfram + + Last modified + ------------- + 03/30/2017 + """ + + gbs = xarrayobj.nbytes * 2 ** -30 + + return gbs #}}} + + def _map_variable_name(variableName, ds, variableMap): # {{{ """ Given a `variableName` in a `variableMap` and an xarray `ds`, From 43f37126c53198e196b3bfa64acfff6dab44f31d Mon Sep 17 00:00:00 2001 From: "Phillip J. Wolfram" Date: Sun, 2 Apr 2017 16:46:40 -0600 Subject: [PATCH 6/6] Optimizes calclations using xarray datastructures --- .../meridional_overturning_circulation.py | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/mpas_analysis/ocean/meridional_overturning_circulation.py b/mpas_analysis/ocean/meridional_overturning_circulation.py index a4dae2b45..820bf89a3 100644 --- a/mpas_analysis/ocean/meridional_overturning_circulation.py +++ b/mpas_analysis/ocean/meridional_overturning_circulation.py @@ -314,7 +314,7 @@ def _compute_moc_climo_postprocess(config, runStreams, variableMap, calendar, transportZ = _compute_transport(transectEdgeMaskSigns, nVertLevels, dvEdge, refLayerThickness, - horizontalVel) + horizontalVel, load) regionCellMask = dictRegion['{}CellMask'.format(region)] latBinSize = config.getExpression(sectionName, @@ -472,7 +472,7 @@ def write_file(dsMOCTimeSeries): transportZ = _compute_transport(transectEdgeMaskSigns, nVertLevels, dvEdge, refLayerThickness, - horizontalVel) + horizontalVel, load) mocTop = _compute_moc(latAtlantic, nVertLevelsP1, latCell, regionCellMask, transportZ, velArea) mocAtlantic26 = mocTop.sel(latBins=26.5, method='nearest').max() @@ -504,17 +504,17 @@ def write_file(dsMOCTimeSeries): def _compute_transport(transectEdgeMaskSigns, nz, dvEdge, refLayerThickness, - horizontalVel): # {{{ + horizontalVel, load): # {{{ '''compute mass transport across southern transect of ocean basin''' - transect = transectEdgeMaskSigns != 0 - transportZ = (refLayerThickness*horizontalVel.where(transect) - *transectEdgeMaskSigns.where(transect) - *dvEdge.where(transect) - ).sum('nEdges') + transect = load(transectEdgeMaskSigns != 0) + transportZ = (refLayerThickness*( + horizontalVel.where(transect, drop=True)* + transectEdgeMaskSigns.where(transect, drop=True)* + dvEdge.where(transect, drop=True)).sum('nEdges')) - return transportZ # }}} + return load(transportZ) # }}} def _compute_moc(latBins, nz, latCell, regionCellMask, transportZ, @@ -525,18 +525,18 @@ def _compute_moc(latBins, nz, latCell, regionCellMask, transportZ, # via a multidimensional groupby_bins when available # (see https://github.com/pydata/xarray/pull/924) - mocTop = xr.DataArray(np.zeros([len(nz), len(latBins)]), - coords=[nz, latBins]) - mocTop[1:, 0] = transportZ.cumsum(axis=0) + mask = regionCellMask.values > 0 + velArea = velArea.values[mask, :] + latCell = latCell.values[mask] + mocTop = np.zeros([len(nz), len(latBins)]) + mocTop[1:, 0] = transportZ.values.cumsum(axis=0) for iLat in np.arange(1, len(latBins)): - mask = np.logical_and( - np.logical_and(latCell >= latBins[iLat-1], - latCell < latBins[iLat]), - regionCellMask > 0) - mocTop[:, iLat] = velArea.where(mask).sum('nCells') - mocTop = mocTop.cumsum('latBins') + mask = np.logical_and(latCell >= latBins[iLat-1].values, + latCell < latBins[iLat].values) + mocTop[:, iLat] = mocTop[:, iLat-1] + \ + velArea[mask, :].sum(axis=0) # convert m^3/s to Sverdrup - mocTop *= m3ps_to_Sv + mocTop = xr.DataArray(mocTop*m3ps_to_Sv, coords=[nz, latBins]) return mocTop # }}} # vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python