handles memory errors

DecBayComp · Mar 19, 2018 · 7d25b06 · 7d25b06
1 parent a3d91ff
commit 7d25b06
Show file tree

Hide file tree

Showing 15 changed files with 151 additions and 42 deletions.
diff --git a/doc/index.rst b/doc/index.rst
@@ -96,7 +96,7 @@ Where to start
 	:maxdepth: 1
 	:hidden:
 
-	plugins
 	api
+..	plugins
 
 
diff --git a/doc/inference.rst b/doc/inference.rst
@@ -264,7 +264,7 @@ The posterior probability used to infer the local diffusivity :math:`D_i` and po
 	P(D_i, V_i | T_i) \propto \prod_j \frac{\textrm{exp}\left(-\frac{\left(\Delta\textbf{r}_j + \frac{D_i\nabla V_i\Delta t_j}{k_BT}\right)^2}{4\left(D_i+\frac{\sigma^2}{\Delta t_j}\right)\Delta t_j}\right)}{4\pi\left(D_i+\frac{\sigma^2}{\Delta t_j}\right)\Delta t_j}P_S(\textbf{D})P_S(\textbf{V})
 
 :math:`P_S(\textbf{D})` and :math:`P_S(\textbf{V})` are smoothing factors for the diffusivity and potential energy respectively.
-The :math:`P_S(\textbf{D})` smoothing factor is also available for the other inference mode.
+The :math:`P_S(\textbf{D})` smoothing factor is also available for the other inference modes.
 These factors are described in a :ref:`dedicated section <inference_smoothing>`.
 
 This mode supports the :ref:`Jeffreys' prior <inference_jeffreys>`.

diff --git a/doc/tessellation.rst b/doc/tessellation.rst
@@ -100,8 +100,13 @@ The *grid* method
 
 The corresponding tessellation class is :class:`~tramway.tessellation.grid.RegularMesh`.
 
-From the command-line, a key argument is ``--location-count``.
-The :func:`~tramway.helper.tessellation.tessellate` helper function converts the desired average location count per cell into a probability (:attr:`~tramway.tessellation.grid.RegularMesh.avg_probability`) that :class:`~tramway.tessellation.grid.RegularMesh` in turn considers to fit the size of the cells.
+From the command-line, key arguments are ``--location-count`` and ``--distance``.
+
+Per default, ``--distance`` is set to the average translocation distance.
+If ``--location-count`` is not defined, neighbour cells are spaced by twice the value given by ``--distance``.
+
+If ``--location-count`` is defined, the :func:`~tramway.helper.tessellation.tessellate` helper function converts the desired average location count per cell into a probability (:attr:`~tramway.tessellation.grid.RegularMesh.avg_probability`) that :class:`~tramway.tessellation.grid.RegularMesh` in turn considers to fit the size of the cells.
+The cell size (or inter-cell distance) is bounded by :math:`0.8` times the value given by ``--distance``.
 
 Distance-based parameters are not used by this plugin.
 
@@ -128,7 +133,7 @@ The *kmeans* method
 The corresponding tessellation class is :class:`~tramway.tessellation.kmeans.KMeansMesh`.
 
 The algorithm is initialized with a *grid* tessellation.
-As a consequence cells scale wrt the `avg_probability` argument (or command-line option ``--location-count``).
+As a consequence cells scale wrt the `avg_probability` argument (or command-line option ``--location-count``) or `ref_distance` argument (or command-line option ``--distance``).
 
 
 The *gwr* method

diff --git a/tramway/__main__.py b/tramway/__main__.py
@@ -28,11 +28,17 @@
 def _parse_args(args):
 	kwargs = dict(args.__dict__)
 	del kwargs['func']
+	input_files = []
 	try:
 		input_files = kwargs.pop('input')
-		input_files[0]
-	except (KeyError, IndexError):
-		print('please specify input file(s) with -i')
+	except KeyError:
+		pass
+	try:
+		input_files += kwargs.pop('input_file')
+	except KeyError:
+		pass
+	if not input_files:
+		print('please specify input file(s)')
 		sys.exit(1)
 	for input_file in input_files:
 		if not os.path.isfile(input_file):
@@ -265,6 +271,10 @@ def main():
 		method_parser.add_argument('--seed', nargs='?', default=False, \
 			help='random generator seed (for testing purposes)')
 		translations = add_arguments(method_parser, setup.get('make_arguments', {}), name=method)
+		try:
+			method_parser.add_argument('input_file', nargs='*', help='path to input file(s)')
+		except:
+			pass
 		method_parser.set_defaults(func=_sample(method, translations))
 
 
@@ -291,6 +301,10 @@ def main():
 			pass
 		mode_parser.add_argument('--seed', nargs='?', default=False, help='random generator seed (for testing purposes)')
 		mode_parser.add_argument('--profile', nargs='?', default=False, help='profile each individual child process if any')
+		try:
+			mode_parser.add_argument('input_file', nargs='?', help='path to input file')
+		except:
+			pass
 		mode_parser.set_defaults(func=_infer(mode))
 
 
@@ -299,6 +313,10 @@ def main():
 	dump_parser.set_defaults(func=_dump_rwa)
 	for arg1, arg2, kwargs in global_arguments:
 		dump_parser.add_argument(arg1, arg2, dest=arg1[1]+'post', **kwargs)
+	try:
+		dump_parser.add_argument('input_file', nargs='?', help='path to input file')
+	except:
+		pass
 
 
 	# extract features
@@ -317,6 +335,10 @@ def main():
 	curl_parser.add_argument('-L', '--label', '--input-label', help='comma-separated list of input labels')
 	curl_parser.add_argument('-l', '--output-label', help='output label')
 	curl_parser.add_argument('-r', '--radius', '-d', '--distance', type=int, default=1, help='radius in number of cells')
+	try:
+		curl_parser.add_argument('input_file', nargs='?', help='path to input file')
+	except:
+		pass
 
 
 	# plot artefacts
@@ -338,6 +360,10 @@ def main():
 	cells_parser.add_argument('-D', '--delaunay', action='store_true', help='plot the Delaunay graph instead of the Voronoi')
 	cells_parser.add_argument('-H', '--histogram', help="plot/print additional histogram(s); any combination of 'c' (cell count histogram), 'd' (distance between neighboring centers) and 'p' (distance between any pair of locations from distinct neighboring centers)")
 	cells_parser.add_argument('-p', '--print', choices=fig_formats, help='print figure(s) on disk instead of plotting')
+	try:
+		cells_parser.add_argument('input_file', nargs='?', help='path to input file')
+	except:
+		pass
 
 	# plot map(s)
 	map_parser = psub.add_parser('map')
@@ -351,6 +377,10 @@ def main():
 	map_parser.add_argument('-c', '--clip', type=float, nargs='?', default=0., help='clip map by absolute values; clipping threshold can be specified as a number of interquartile distances above the median')
 	map_parser.add_argument('-cb', '--colorbar', action='store_false', help='do not plot colorbar')
 	map_parser.add_argument('-p', '--print', choices=fig_formats, help='print figure(s) on disk instead of plotting')
+	try:
+		map_parser.add_argument('input_file', nargs='?', help='path to input file')
+	except:
+		pass
 
 
 

diff --git a/tramway/core/analyses/base.py b/tramway/core/analyses/base.py
@@ -177,7 +177,7 @@ def autoindex(self, pattern=None):
 				i += 1
 		return f(i)
 
-	def add(self, analysis, label=None, comment=None):
+	def add(self, analysis, label=None, comment=None, raw=False):
 		"""
 		Add an analysis.
 
@@ -192,8 +192,13 @@ def add(self, analysis, label=None, comment=None):
 
 			comment (str): associated comment.
 
+			raw (bool): if `analysis` is not an :class:`Analyses`, it is wrapped into
+				such a container object; set `raw` to ``True`` to prevent wrapping.
+
 		"""
 		label = self.autoindex(label)
+		if not (raw or isinstance(analysis, Analyses)):
+			analysis = type(self)(analysis)
 		self.instances[label] = analysis
 		if comment:
 			self.comments[label] = comment

diff --git a/tramway/core/hdf5/rules.py b/tramway/core/hdf5/rules.py
@@ -37,7 +37,8 @@
 else:
 	regular_mesh_exposes = voronoi_exposes + \
 		['_diagonal_adjacency', 'lower_bound', 'upper_bound', 'count_per_dim', \
-			'min_probability', 'max_probability', 'avg_probability', 'grid']
+			'min_probability', 'max_probability', 'avg_probability', \
+			'min_distance', 'avg_distance', 'grid']
 	hdf5_storable(default_storable(RegularMesh, exposes=regular_mesh_exposes), agnostic=True)
 
 # KDTreeMesh

diff --git a/tramway/core/xyt.py b/tramway/core/xyt.py
@@ -73,7 +73,12 @@ def load_xyt(path, columns=['n', 'x', 'y', 't'], concat=True, return_paths=False
 			sample_dt = sample['t'].diff()[1:]
 			if not all(0 < sample_dt):
 				if any(0 == sample_dt):
-					print(sample)
+					try:
+						conflicting = sample_dt.values == 0
+						conflicting = np.logical_or(np.r_[False, conflicting], np.r_[conflicting, False])
+						print(sample.loc[conflicting])
+					except:
+						pass
 					raise ValueError("some indices refer to multiple simultaneous trajectories in table: '{}'".format(f))
 				else:
 					warnings.warn(EfficiencyWarning("table '{}' is not properly ordered".format(f)))

diff --git a/tramway/helper/inference.py b/tramway/helper/inference.py
@@ -25,8 +25,6 @@
 # in the case matplotlib's backend is interactive
 
 
-#sub_extensions = dict([(ext.upper(), ext) for ext in ['d', 'df', 'dd', 'dv', 'dx']])
-
 
 def infer(cells, mode='D', output_file=None, partition={}, verbose=False, \
 	localization_error=None, diffusivity_prior=None, potential_prior=None, jeffreys_prior=None, \
@@ -135,12 +133,12 @@ def infer(cells, mode='D', output_file=None, partition={}, verbose=False, \
 		if isinstance(input_label, (tuple, list)):
 			if input_label[1:]:
 				analysis = all_analyses
-				for label in input_label[:-1]:
+				for label in input_label:#[:-1]
 					analysis = analysis[label]
 				cells = analysis.data
-				analysis = analysis[input_label[-1]]
-				if not isinstance(cells, CellStats):
-					cells = analysis.data
+				#analysis = analysis[input_label[-1]]
+				#if not isinstance(cells, CellStats):
+				#	cells = analysis.data
 			else:
 				input_label = input_label[0]
 		if cells is None:

diff --git a/tramway/helper/tessellation.py b/tramway/helper/tessellation.py
@@ -425,14 +425,20 @@ def _filter_i(voronoi, cell, x):
 		partition_kwargs['filter'] = _filter_fghi
 		if 'filter_descriptors_only' not in partition_kwargs:
 			partition_kwargs['filter_descriptors_only'] = True
-	if knn is None:
-		cell_index = tess.cell_index(xyt_data, **partition_kwargs)
-	else:
-		if 'min_location_count' not in partition_kwargs:
-			partition_kwargs['min_location_count'] = min_location_count
-		if 'metric' not in partition_kwargs:
-			partition_kwargs['metric'] = 'euclidean'
-		cell_index = tess.cell_index(xyt_data, knn=knn, **partition_kwargs)
+	try:
+		if knn is None:
+			cell_index = tess.cell_index(xyt_data, **partition_kwargs)
+		else:
+			if 'min_location_count' not in partition_kwargs:
+				partition_kwargs['min_location_count'] = min_location_count
+			if 'metric' not in partition_kwargs:
+				partition_kwargs['metric'] = 'euclidean'
+			cell_index = tess.cell_index(xyt_data, knn=knn, **partition_kwargs)
+	except MemoryError:
+		if verbose:
+			print(traceback.format_exc())
+		warn('memory error: cannot assign points to cells', RuntimeWarning)
+		cell_index = None
 
 	stats = CellStats(xyt_data, tess, cell_index)
 

diff --git a/tramway/inference/base.py b/tramway/inference/base.py
@@ -18,6 +18,7 @@
 import numpy as np
 import pandas as pd
 import scipy.sparse as sparse
+import scipy.spatial.qhull
 from copy import copy
 from collections import OrderedDict
 from multiprocessing import Pool, Lock
@@ -430,10 +431,11 @@ def group(self, ngroups=None, max_cell_count=None, cell_centers=None, \
 				objects as cells.
 
 		"""
+		ncells = self.adjacency.shape[0]
 		new = copy(self)
-		if ngroups or max_cell_count or cell_centers is not None:
+		if ngroups or (max_cell_count and max_cell_count < ncells) or cell_centers is not None:
 			any_cell = next(iter(self.cells.values()))
-			points = np.zeros((self.adjacency.shape[0], self.dim), dtype=any_cell.center.dtype)
+			points = np.zeros((ncells, self.dim), dtype=any_cell.center.dtype)
 			ok = np.zeros(points.shape[0], dtype=bool)
 			for i in self.cells:
 				points[i] = self.cells[i].center
@@ -452,6 +454,8 @@ def group(self, ngroups=None, max_cell_count=None, cell_centers=None, \
 					grid = kmeans.KMeansMesh(avg_probability=avg_probability)
 					try:
 						grid.tessellate(points[ok])
+					except scipy.spatial.qhull.QhullError:
+						return new
 					except ValueError:
 						print(points)
 						raise
@@ -498,7 +502,8 @@ def group(self, ngroups=None, max_cell_count=None, cell_centers=None, \
 			new.ccount = self.ccount
 			# _tcount is not supposed to change
 		else:
-			raise KeyError('`group` expects more input arguments')
+			pass
+			#raise KeyError('`group` expects more input arguments')
 		return new
 
 	def run(self, function, *args, **kwargs):

diff --git a/tramway/tessellation/base.py b/tramway/tessellation/base.py
@@ -741,13 +741,46 @@ def cell_index(self, points, format=None, select=None, knn=None,
 			min_nn, max_nn = knn, None
 		points = self.scaler.scale_point(points, inplace=False)
 		X = self.descriptors(points, asarray=True)
-		D = cdist(X, self._cell_centers, metric, **kwargs)
+		Y = self._cell_centers
+		try:
+			D = cdist(X, Y, metric, **kwargs)
+		except MemoryError as memory_error:
+			# slice X to process less rows at a time
+			if metric != 'euclidean':
+				raise #NotImplementedError
+			K = np.zeros(X.shape[0], dtype=int)
+			X2 = np.sum(X * X, axis=1, keepdims=True).astype(np.float32)
+			Y2 = np.sum(Y * Y, axis=1, keepdims=True).astype(np.float32)
+			X, Y = X.astype(np.float32), Y.astype(np.float32)
+			n = 0
+			while True:
+				n += 1
+				block = int(ceil(X.shape[0] * 2**(-n)))
+				try:
+					np.empty((block, Y.shape[0]), dtype=X.dtype)
+				except MemoryError:
+					pass # continue
+				else:
+					break
+			n += 2 # safer
+			block = int(ceil(X.shape[0] * 2**(-n)))
+			for i in range(0, X.shape[0], block):
+				j = min(i+block, X2.size)
+				Di = np.dot(np.float32(-2.)* X[i:j], Y.T)
+				Di += X2[i:j]
+				Di += Y2.T
+				K[i:j] = np.argmin(Di, axis=1)
+			D = None
+		else:
+			K = None
+		#
 		ncells = self._cell_centers.shape[0]
 		if format == 'force array':
 			min_nn = None
 			format = 'array' # for later call to :func:`format_cell_index`
 		if max_nn or min_nn or min_location_count or filter is not None:
-			K = np.argmin(D, axis=1) # cell indices
+			if K is None:
+				K = np.argmin(D, axis=1) # cell indices
 			nonempty, positive_count = np.unique(K, return_counts=True)
 			if filter is not None:
 				for c in nonempty:
@@ -770,6 +803,8 @@ def cell_index(self, points, format=None, select=None, knn=None,
 			if max_nn:
 				large, = (max_nn < positive_count).nonzero()
 				if large.size:
+					if D is None:
+						raise memory_error
 					for c in nonempty[large]:
 						cell = K == c
 						I = np.argsort(D[cell, c])
@@ -785,6 +820,8 @@ def cell_index(self, points, format=None, select=None, knn=None,
 				if min_location_count:
 					small = np.logical_and(small, min_location_count <= count)
 				if np.any(small):
+					if D is None:
+						raise memory_error
 					# small and missing cells
 					I = np.argsort(D[:,small], axis=0)[:min_nn].flatten()
 					small, = small.nonzero()
@@ -801,7 +838,7 @@ def cell_index(self, points, format=None, select=None, knn=None,
 					Jc = Jc[0 <= Jc]
 					#
 					K = (np.concatenate((I, Ic)), np.concatenate((J, Jc)))
-		else:
+		elif K is None:
 			K = np.argmin(D, axis=1) # cell indices
 		point_count = points.shape[0]
 		#if isinstance(points, pd.DataFrame):