Skip to content

Commit

Permalink
handles memory errors
Browse files Browse the repository at this point in the history
  • Loading branch information
François Laurent committed Mar 19, 2018
1 parent a3d91ff commit 7d25b06
Show file tree
Hide file tree
Showing 15 changed files with 151 additions and 42 deletions.
2 changes: 1 addition & 1 deletion doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ Where to start
:maxdepth: 1
:hidden:

plugins
api
.. plugins
2 changes: 1 addition & 1 deletion doc/inference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ The posterior probability used to infer the local diffusivity :math:`D_i` and po
P(D_i, V_i | T_i) \propto \prod_j \frac{\textrm{exp}\left(-\frac{\left(\Delta\textbf{r}_j + \frac{D_i\nabla V_i\Delta t_j}{k_BT}\right)^2}{4\left(D_i+\frac{\sigma^2}{\Delta t_j}\right)\Delta t_j}\right)}{4\pi\left(D_i+\frac{\sigma^2}{\Delta t_j}\right)\Delta t_j}P_S(\textbf{D})P_S(\textbf{V})
:math:`P_S(\textbf{D})` and :math:`P_S(\textbf{V})` are smoothing factors for the diffusivity and potential energy respectively.
The :math:`P_S(\textbf{D})` smoothing factor is also available for the other inference mode.
The :math:`P_S(\textbf{D})` smoothing factor is also available for the other inference modes.
These factors are described in a :ref:`dedicated section <inference_smoothing>`.

This mode supports the :ref:`Jeffreys' prior <inference_jeffreys>`.
Expand Down
11 changes: 8 additions & 3 deletions doc/tessellation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,13 @@ The *grid* method
The corresponding tessellation class is :class:`~tramway.tessellation.grid.RegularMesh`.
From the command-line, a key argument is ``--location-count``.
The :func:`~tramway.helper.tessellation.tessellate` helper function converts the desired average location count per cell into a probability (:attr:`~tramway.tessellation.grid.RegularMesh.avg_probability`) that :class:`~tramway.tessellation.grid.RegularMesh` in turn considers to fit the size of the cells.
From the command-line, key arguments are ``--location-count`` and ``--distance``.
Per default, ``--distance`` is set to the average translocation distance.
If ``--location-count`` is not defined, neighbour cells are spaced by twice the value given by ``--distance``.
If ``--location-count`` is defined, the :func:`~tramway.helper.tessellation.tessellate` helper function converts the desired average location count per cell into a probability (:attr:`~tramway.tessellation.grid.RegularMesh.avg_probability`) that :class:`~tramway.tessellation.grid.RegularMesh` in turn considers to fit the size of the cells.
The cell size (or inter-cell distance) is bounded by :math:`0.8` times the value given by ``--distance``.
Distance-based parameters are not used by this plugin.
Expand All @@ -128,7 +133,7 @@ The *kmeans* method
The corresponding tessellation class is :class:`~tramway.tessellation.kmeans.KMeansMesh`.
The algorithm is initialized with a *grid* tessellation.
As a consequence cells scale wrt the `avg_probability` argument (or command-line option ``--location-count``).
As a consequence cells scale wrt the `avg_probability` argument (or command-line option ``--location-count``) or `ref_distance` argument (or command-line option ``--distance``).
The *gwr* method
Expand Down
36 changes: 33 additions & 3 deletions tramway/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,17 @@
def _parse_args(args):
kwargs = dict(args.__dict__)
del kwargs['func']
input_files = []
try:
input_files = kwargs.pop('input')
input_files[0]
except (KeyError, IndexError):
print('please specify input file(s) with -i')
except KeyError:
pass
try:
input_files += kwargs.pop('input_file')
except KeyError:
pass
if not input_files:
print('please specify input file(s)')
sys.exit(1)
for input_file in input_files:
if not os.path.isfile(input_file):
Expand Down Expand Up @@ -265,6 +271,10 @@ def main():
method_parser.add_argument('--seed', nargs='?', default=False, \
help='random generator seed (for testing purposes)')
translations = add_arguments(method_parser, setup.get('make_arguments', {}), name=method)
try:
method_parser.add_argument('input_file', nargs='*', help='path to input file(s)')
except:
pass
method_parser.set_defaults(func=_sample(method, translations))


Expand All @@ -291,6 +301,10 @@ def main():
pass
mode_parser.add_argument('--seed', nargs='?', default=False, help='random generator seed (for testing purposes)')
mode_parser.add_argument('--profile', nargs='?', default=False, help='profile each individual child process if any')
try:
mode_parser.add_argument('input_file', nargs='?', help='path to input file')
except:
pass
mode_parser.set_defaults(func=_infer(mode))


Expand All @@ -299,6 +313,10 @@ def main():
dump_parser.set_defaults(func=_dump_rwa)
for arg1, arg2, kwargs in global_arguments:
dump_parser.add_argument(arg1, arg2, dest=arg1[1]+'post', **kwargs)
try:
dump_parser.add_argument('input_file', nargs='?', help='path to input file')
except:
pass


# extract features
Expand All @@ -317,6 +335,10 @@ def main():
curl_parser.add_argument('-L', '--label', '--input-label', help='comma-separated list of input labels')
curl_parser.add_argument('-l', '--output-label', help='output label')
curl_parser.add_argument('-r', '--radius', '-d', '--distance', type=int, default=1, help='radius in number of cells')
try:
curl_parser.add_argument('input_file', nargs='?', help='path to input file')
except:
pass


# plot artefacts
Expand All @@ -338,6 +360,10 @@ def main():
cells_parser.add_argument('-D', '--delaunay', action='store_true', help='plot the Delaunay graph instead of the Voronoi')
cells_parser.add_argument('-H', '--histogram', help="plot/print additional histogram(s); any combination of 'c' (cell count histogram), 'd' (distance between neighboring centers) and 'p' (distance between any pair of locations from distinct neighboring centers)")
cells_parser.add_argument('-p', '--print', choices=fig_formats, help='print figure(s) on disk instead of plotting')
try:
cells_parser.add_argument('input_file', nargs='?', help='path to input file')
except:
pass

# plot map(s)
map_parser = psub.add_parser('map')
Expand All @@ -351,6 +377,10 @@ def main():
map_parser.add_argument('-c', '--clip', type=float, nargs='?', default=0., help='clip map by absolute values; clipping threshold can be specified as a number of interquartile distances above the median')
map_parser.add_argument('-cb', '--colorbar', action='store_false', help='do not plot colorbar')
map_parser.add_argument('-p', '--print', choices=fig_formats, help='print figure(s) on disk instead of plotting')
try:
map_parser.add_argument('input_file', nargs='?', help='path to input file')
except:
pass



Expand Down
7 changes: 6 additions & 1 deletion tramway/core/analyses/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def autoindex(self, pattern=None):
i += 1
return f(i)

def add(self, analysis, label=None, comment=None):
def add(self, analysis, label=None, comment=None, raw=False):
"""
Add an analysis.
Expand All @@ -192,8 +192,13 @@ def add(self, analysis, label=None, comment=None):
comment (str): associated comment.
raw (bool): if `analysis` is not an :class:`Analyses`, it is wrapped into
such a container object; set `raw` to ``True`` to prevent wrapping.
"""
label = self.autoindex(label)
if not (raw or isinstance(analysis, Analyses)):
analysis = type(self)(analysis)
self.instances[label] = analysis
if comment:
self.comments[label] = comment
Expand Down
3 changes: 2 additions & 1 deletion tramway/core/hdf5/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@
else:
regular_mesh_exposes = voronoi_exposes + \
['_diagonal_adjacency', 'lower_bound', 'upper_bound', 'count_per_dim', \
'min_probability', 'max_probability', 'avg_probability', 'grid']
'min_probability', 'max_probability', 'avg_probability', \
'min_distance', 'avg_distance', 'grid']
hdf5_storable(default_storable(RegularMesh, exposes=regular_mesh_exposes), agnostic=True)

# KDTreeMesh
Expand Down
7 changes: 6 additions & 1 deletion tramway/core/xyt.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,12 @@ def load_xyt(path, columns=['n', 'x', 'y', 't'], concat=True, return_paths=False
sample_dt = sample['t'].diff()[1:]
if not all(0 < sample_dt):
if any(0 == sample_dt):
print(sample)
try:
conflicting = sample_dt.values == 0
conflicting = np.logical_or(np.r_[False, conflicting], np.r_[conflicting, False])
print(sample.loc[conflicting])
except:
pass
raise ValueError("some indices refer to multiple simultaneous trajectories in table: '{}'".format(f))
else:
warnings.warn(EfficiencyWarning("table '{}' is not properly ordered".format(f)))
Expand Down
10 changes: 4 additions & 6 deletions tramway/helper/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@
# in the case matplotlib's backend is interactive


#sub_extensions = dict([(ext.upper(), ext) for ext in ['d', 'df', 'dd', 'dv', 'dx']])


def infer(cells, mode='D', output_file=None, partition={}, verbose=False, \
localization_error=None, diffusivity_prior=None, potential_prior=None, jeffreys_prior=None, \
Expand Down Expand Up @@ -135,12 +133,12 @@ def infer(cells, mode='D', output_file=None, partition={}, verbose=False, \
if isinstance(input_label, (tuple, list)):
if input_label[1:]:
analysis = all_analyses
for label in input_label[:-1]:
for label in input_label:#[:-1]
analysis = analysis[label]
cells = analysis.data
analysis = analysis[input_label[-1]]
if not isinstance(cells, CellStats):
cells = analysis.data
#analysis = analysis[input_label[-1]]
#if not isinstance(cells, CellStats):
# cells = analysis.data
else:
input_label = input_label[0]
if cells is None:
Expand Down
22 changes: 14 additions & 8 deletions tramway/helper/tessellation.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,14 +425,20 @@ def _filter_i(voronoi, cell, x):
partition_kwargs['filter'] = _filter_fghi
if 'filter_descriptors_only' not in partition_kwargs:
partition_kwargs['filter_descriptors_only'] = True
if knn is None:
cell_index = tess.cell_index(xyt_data, **partition_kwargs)
else:
if 'min_location_count' not in partition_kwargs:
partition_kwargs['min_location_count'] = min_location_count
if 'metric' not in partition_kwargs:
partition_kwargs['metric'] = 'euclidean'
cell_index = tess.cell_index(xyt_data, knn=knn, **partition_kwargs)
try:
if knn is None:
cell_index = tess.cell_index(xyt_data, **partition_kwargs)
else:
if 'min_location_count' not in partition_kwargs:
partition_kwargs['min_location_count'] = min_location_count
if 'metric' not in partition_kwargs:
partition_kwargs['metric'] = 'euclidean'
cell_index = tess.cell_index(xyt_data, knn=knn, **partition_kwargs)
except MemoryError:
if verbose:
print(traceback.format_exc())
warn('memory error: cannot assign points to cells', RuntimeWarning)
cell_index = None

stats = CellStats(xyt_data, tess, cell_index)

Expand Down
11 changes: 8 additions & 3 deletions tramway/inference/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import numpy as np
import pandas as pd
import scipy.sparse as sparse
import scipy.spatial.qhull
from copy import copy
from collections import OrderedDict
from multiprocessing import Pool, Lock
Expand Down Expand Up @@ -430,10 +431,11 @@ def group(self, ngroups=None, max_cell_count=None, cell_centers=None, \
objects as cells.
"""
ncells = self.adjacency.shape[0]
new = copy(self)
if ngroups or max_cell_count or cell_centers is not None:
if ngroups or (max_cell_count and max_cell_count < ncells) or cell_centers is not None:
any_cell = next(iter(self.cells.values()))
points = np.zeros((self.adjacency.shape[0], self.dim), dtype=any_cell.center.dtype)
points = np.zeros((ncells, self.dim), dtype=any_cell.center.dtype)
ok = np.zeros(points.shape[0], dtype=bool)
for i in self.cells:
points[i] = self.cells[i].center
Expand All @@ -452,6 +454,8 @@ def group(self, ngroups=None, max_cell_count=None, cell_centers=None, \
grid = kmeans.KMeansMesh(avg_probability=avg_probability)
try:
grid.tessellate(points[ok])
except scipy.spatial.qhull.QhullError:
return new
except ValueError:
print(points)
raise
Expand Down Expand Up @@ -498,7 +502,8 @@ def group(self, ngroups=None, max_cell_count=None, cell_centers=None, \
new.ccount = self.ccount
# _tcount is not supposed to change
else:
raise KeyError('`group` expects more input arguments')
pass
#raise KeyError('`group` expects more input arguments')
return new

def run(self, function, *args, **kwargs):
Expand Down
43 changes: 40 additions & 3 deletions tramway/tessellation/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,13 +741,46 @@ def cell_index(self, points, format=None, select=None, knn=None,
min_nn, max_nn = knn, None
points = self.scaler.scale_point(points, inplace=False)
X = self.descriptors(points, asarray=True)
D = cdist(X, self._cell_centers, metric, **kwargs)
Y = self._cell_centers
try:
D = cdist(X, Y, metric, **kwargs)
except MemoryError as memory_error:
# slice X to process less rows at a time
if metric != 'euclidean':
raise #NotImplementedError
K = np.zeros(X.shape[0], dtype=int)
X2 = np.sum(X * X, axis=1, keepdims=True).astype(np.float32)
Y2 = np.sum(Y * Y, axis=1, keepdims=True).astype(np.float32)
X, Y = X.astype(np.float32), Y.astype(np.float32)
n = 0
while True:
n += 1
block = int(ceil(X.shape[0] * 2**(-n)))
try:
np.empty((block, Y.shape[0]), dtype=X.dtype)
except MemoryError:
pass # continue
else:
break
n += 2 # safer
block = int(ceil(X.shape[0] * 2**(-n)))
for i in range(0, X.shape[0], block):
j = min(i+block, X2.size)
Di = np.dot(np.float32(-2.)* X[i:j], Y.T)
Di += X2[i:j]
Di += Y2.T
K[i:j] = np.argmin(Di, axis=1)
D = None
else:
K = None
#
ncells = self._cell_centers.shape[0]
if format == 'force array':
min_nn = None
format = 'array' # for later call to :func:`format_cell_index`
if max_nn or min_nn or min_location_count or filter is not None:
K = np.argmin(D, axis=1) # cell indices
if K is None:
K = np.argmin(D, axis=1) # cell indices
nonempty, positive_count = np.unique(K, return_counts=True)
if filter is not None:
for c in nonempty:
Expand All @@ -770,6 +803,8 @@ def cell_index(self, points, format=None, select=None, knn=None,
if max_nn:
large, = (max_nn < positive_count).nonzero()
if large.size:
if D is None:
raise memory_error
for c in nonempty[large]:
cell = K == c
I = np.argsort(D[cell, c])
Expand All @@ -785,6 +820,8 @@ def cell_index(self, points, format=None, select=None, knn=None,
if min_location_count:
small = np.logical_and(small, min_location_count <= count)
if np.any(small):
if D is None:
raise memory_error
# small and missing cells
I = np.argsort(D[:,small], axis=0)[:min_nn].flatten()
small, = small.nonzero()
Expand All @@ -801,7 +838,7 @@ def cell_index(self, points, format=None, select=None, knn=None,
Jc = Jc[0 <= Jc]
#
K = (np.concatenate((I, Ic)), np.concatenate((J, Jc)))
else:
elif K is None:
K = np.argmin(D, axis=1) # cell indices
point_count = points.shape[0]
#if isinstance(points, pd.DataFrame):
Expand Down

0 comments on commit 7d25b06

Please sign in to comment.