Fixed bug in spade, linked to missing call of min_neu, added test (Ne…

…uralEnsemble#575) * fixed bug in spade and added test * code cleanup, strings converted to f-strings, updated references for docs
INM-6 · Jun 29, 2023 · 844eb82 · 844eb82
1 parent 2651da9
commit 844eb82
Show file tree

Hide file tree

Showing 4 changed files with 89 additions and 74 deletions.
diff --git a/doc/conf.py b/doc/conf.py
@@ -349,8 +349,12 @@
 
 # configuration for intersphinx: refer to Viziphant
 intersphinx_mapping = {
-    'viziphant': ('https://viziphant.readthedocs.io/en/latest/', None),
-    'numpy': ('https://numpy.org/doc/stable', None)
+    'viziphant': ('https://viziphant.readthedocs.io/en/stable/', None),
+    'numpy': ('https://numpy.org/doc/stable', None),
+    'neo': ('https://neo.readthedocs.io/en/stable/', None),
+    'quantities': ('https://python-quantities.readthedocs.io/en/stable/', None),
+    'python': ('https://docs.python.org/3/', None),
+    'scipy': ('https://docs.scipy.org/doc/scipy/', None)
 }
 
 

diff --git a/elephant/spade.py b/elephant/spade.py
@@ -110,12 +110,14 @@
 from functools import reduce
 from itertools import chain, combinations
 
-import neo
-from neo.core.spiketrainlist import SpikeTrainList
 import numpy as np
-import quantities as pq
 from scipy import sparse
 
+import quantities as pq
+
+import neo
+from neo.core.spiketrainlist import SpikeTrainList
+
 import elephant.conversion as conv
 import elephant.spike_train_surrogates as surr
 from elephant.spade_src import fast_fca
@@ -169,7 +171,7 @@ def spade(spiketrains, bin_size, winlen, min_spikes=2, min_occ=2,
 
     Parameters
     ----------
-    spiketrains : list of neo.SpikeTrain
+    spiketrains : list of :class:`neo.core.SpikeTrain`
         List containing the parallel spike trains to analyze
     bin_size : pq.Quantity
         The time precision used to discretize the spiketrains (binning).
@@ -200,7 +202,7 @@ def spade(spiketrains, bin_size, winlen, min_spikes=2, min_occ=2,
 
         'n_subsets': int
             Number of subsets of a concept used to approximate its stability.
-            If `n_subsets` is 0, it is calculated according to to the formula
+            If `n_subsets` is 0, it is calculated according to the formula
             given in Babin, Kuznetsov (2012), proposition 6:
 
             .. math::
@@ -304,7 +306,7 @@ def spade(spiketrains, bin_size, winlen, min_spikes=2, min_occ=2,
             * p-value
 
         'non_sgnf_sgnt': list
-            Non significant signatures of 'pvalue_spectrum'.
+            Non-significant signatures of 'pvalue_spectrum'.
 
     Notes
     -----
@@ -355,7 +357,7 @@ def spade(spiketrains, bin_size, winlen, min_spikes=2, min_occ=2,
             min_occ=min_occ, max_spikes=max_spikes, max_occ=max_occ,
             min_neu=min_neu, report='a')
         time_mining = time.time() - time_mining
-        print("Time for data mining: {}".format(time_mining))
+        print(f"Time for data mining: {time_mining}")
 
     # Decide if compute the approximated stability
     if compute_stability:
@@ -368,7 +370,7 @@ def spade(spiketrains, bin_size, winlen, min_spikes=2, min_occ=2,
         concepts = approximate_stability(
             concepts, rel_matrix, **approx_stab_pars)
         time_stability = time.time() - time_stability
-        print("Time for stability computation: {}".format(time_stability))
+        print(f"Time for stability computation: {time_stability}")
         # Filtering the concepts using stability thresholds
         if stability_thresh is not None:
             concepts = [concept for concept in concepts
@@ -386,8 +388,7 @@ def spade(spiketrains, bin_size, winlen, min_spikes=2, min_occ=2,
             max_occ=max_occ, min_neu=min_neu, spectrum=spectrum,
             surr_method=surr_method, **surr_kwargs)
         time_pvalue_spectrum = time.time() - time_pvalue_spectrum
-        print("Time for pvalue spectrum computation: {}".format(
-            time_pvalue_spectrum))
+        print(f"Time for pvalue spectrum computation: {time_pvalue_spectrum}")
         # Storing pvalue spectrum
         output['pvalue_spectrum'] = pv_spec
 
@@ -534,7 +535,7 @@ def _check_input(
     # Check surr_method
     if surr_method not in surr.SURR_METHODS:
         raise ValueError(
-            'specified surr_method (=%s) not valid' % surr_method)
+            f'specified surr_method (={surr_method}) not valid')
 
     # Check psr_param
     if psr_param is not None:
@@ -565,7 +566,8 @@ def concepts_mining(spiketrains, bin_size, winlen, min_spikes=2, min_occ=2,
 
     Parameters
     ----------
-    spiketrains : list of neo.SpikeTrain or conv.BinnedSpikeTrain
+    spiketrains : list of :class:`neo.core.SpikeTrain` or
+     :class:`elephant.conversion.BinnedSpikeTrain`
         Either list of the spiketrains to analyze or
         BinningSpikeTrain object containing the binned spiketrains to analyze
     bin_size : pq.Quantity
@@ -590,7 +592,7 @@ def concepts_mining(spiketrains, bin_size, winlen, min_spikes=2, min_occ=2,
         pattern. If None, no maximal number of occurrences is considered.
         Default: None
     min_neu : int, optional
-        Minimum number of neurons in a sequence to considered a pattern.
+        Minimum number of neurons in a sequence to be considered a pattern.
         Default: 1
     report : {'a', '#', '3d#'}, optional
         Indicates the output of the function.
@@ -611,7 +613,7 @@ def concepts_mining(spiketrains, bin_size, winlen, min_spikes=2, min_occ=2,
 
     Returns
     -------
-    mining_results : np.ndarray
+    mining_results : :class:`numpy.ndarray`
         If report == 'a':
             Numpy array of all the pattern candidates (concepts) found in the
             `spiketrains`. Each pattern is represented as a tuple containing
@@ -626,11 +628,11 @@ def concepts_mining(spiketrains, bin_size, winlen, min_spikes=2, min_occ=2,
              The pattern spectrum is represented as a numpy array of
              quadruplets (pattern size, number of occurrences, difference
              between last and first spike of the pattern, number of patterns)
-    rel_matrix : sparse.coo_matrix
+    rel_matrix : :class:`scipy.sparse.coo_matrix`
         A binary matrix of shape (number of windows, winlen*len(spiketrains)).
         Each row corresponds to a window (order
         according to their position in time). Each column corresponds to one
-        bin and one neuron and it is 0 if no spikes or 1 if one or more spikes
+        bin and one neuron, and it is 0 if no spikes or 1 if one or more spikes
         occurred in that bin for that particular neuron. For example, the entry
         [0,0] of this matrix corresponds to the first bin of the first window
         position for the first neuron, the entry `[0,winlen]` to the first
@@ -639,7 +641,7 @@ def concepts_mining(spiketrains, bin_size, winlen, min_spikes=2, min_occ=2,
     if report not in ('a', '#', '3d#'):
         raise ValueError(
             "report has to assume of the following values:" +
-            "  'a', '#' and '3d#,' got {} instead".format(report))
+            f"  'a', '#' and '3d#,' got {report} instead")
     # if spiketrains is list of neo.SpikeTrain convert to conv.BinnedSpikeTrain
     if isinstance(spiketrains, (list, SpikeTrainList)) and \
             isinstance(spiketrains[0], neo.SpikeTrain):
@@ -720,7 +722,7 @@ def _build_context(binary_matrix, winlen):
         A binary matrix with shape (number of windows,
         winlen*len(spiketrains)). Each row corresponds to a window (order
         according to their position in time).
-        Each column corresponds to one bin and one neuron and it is 0 if no
+        Each column corresponds to one bin and one neuron, and it is 0 if no
         spikes or 1 if one or more spikes occurred in that bin for that
         particular neuron.
         E.g. the entry [0,0] of this matrix corresponds to the first bin of the
@@ -836,7 +838,7 @@ def _fpgrowth(transactions, min_c=2, min_z=2, max_z=None,
         A binary matrix with shape (number of windows,
         winlen*len(spiketrains)). Each row corresponds to a window (order
         according to their position in time).
-        Each column corresponds to one bin and one neuron and it is 0 if no
+        Each column corresponds to one bin and one neuron, and it is 0 if no
         spikes or 1 if one or more spikes occurred in that bin for that
         particular neuron.
         E.g. the entry [0,0] of this matrix corresponds to the first bin of the
@@ -852,7 +854,7 @@ def _fpgrowth(transactions, min_c=2, min_z=2, max_z=None,
         last spike) is then given by winlen*bin_size
         Default: 1
     min_neu: int
-         Minimum number of neurons in a sequence to considered a
+         Minimum number of neurons in a sequence to be considered a
          potential pattern.
          Default: 1
 
@@ -882,7 +884,7 @@ def _fpgrowth(transactions, min_c=2, min_z=2, max_z=None,
     # By default, set the maximum pattern size to the number of spiketrains
     if max_z is None:
         max_z = max(max(map(len, transactions)), min_z + 1)
-    # By default set maximum number of data to number of bins
+    # By default, set maximum number of data to number of bins
     if max_c is None:
         max_c = len(transactions)
 
@@ -912,6 +914,7 @@ def _fpgrowth(transactions, min_c=2, min_z=2, max_z=None,
                 report='a',
                 algo='s',
                 winlen=winlen,
+                min_neu=min_neu,
                 threads=0,
                 verbose=4)
             break
@@ -920,7 +923,7 @@ def _fpgrowth(transactions, min_c=2, min_z=2, max_z=None,
     # Applying min/max conditions and computing extent (window positions)
     # fpgrowth_output = [concept for concept in fpgrowth_output
     #                    if _fpgrowth_filter(concept, winlen, max_c, min_neu)]
-    # filter out subsets of patterns that are found as a side-effect
+    # filter out subsets of patterns that are found as a side effect
     # of using the moving window strategy
     fpgrowth_output = _filter_for_moving_window_subsets(
         fpgrowth_output, winlen)
@@ -967,20 +970,6 @@ def _fpgrowth(transactions, min_c=2, min_z=2, max_z=None,
     return spectrum
 
 
-# def _fpgrowth_filter(concept, winlen, max_c, min_neu):
-#     """
-#     Filter for selecting closed frequent items set with a minimum number of
-#     neurons and a maximum number of occurrences and first spike in the first
-#     bin position
-#     """
-#     intent = np.array(concept[0])
-#     keep_concept = (min(intent % winlen) == 0
-#                     and concept[1] <= max_c
-#                     and np.unique(intent // winlen).shape[0] >= min_neu
-#                     )
-#     return keep_concept
-
-
 def _rereference_to_last_spike(transactions, winlen):
     """
     Converts transactions from the default format
@@ -1008,7 +997,7 @@ def _rereference_to_last_spike(transactions, winlen):
 
 def _filter_for_moving_window_subsets(concepts, winlen):
     """
-    Since we're using a moving window subpatterns starting from
+    Since we're using a moving window, sub patterns starting from
     subsequent spikes after the first pattern spike will also be found.
     This filter removes them if they do not occur on their own in
     addition to the occurrences explained by their superset.
@@ -1106,7 +1095,7 @@ def _fast_fca(context, min_c=2, min_z=2, max_z=None,
         last spike) is then given by winlen*bin_size
         Default: 1
     min_neu: int
-         Minimum number of neurons in a sequence to considered a
+         Minimum number of neurons in a sequence to be considered a
          potential pattern.
          Default: 1
 
@@ -1134,10 +1123,10 @@ def _fast_fca(context, min_c=2, min_z=2, max_z=None,
     # Check parameters
     if min_neu < 1:
         raise ValueError('min_neu must be an integer >=1')
-    # By default set maximum number of attributes
+    # By default, set maximum number of attributes
     if max_z is None:
         max_z = len(context)
-    # By default set maximum number of data to number of bins
+    # By default, set maximum number of data to number of bins
     if max_c is None:
         max_c = len(context)
     if report == '#':
@@ -1223,7 +1212,7 @@ def pvalue_spectrum(
 
     Parameters
     ----------
-    spiketrains : list of neo.SpikeTrain
+    spiketrains : list of :class:`neo.core.SpikeTrain`
         List containing the parallel spike trains to analyze
     bin_size : pq.Quantity
         The time precision used to discretize the `spiketrains` (binning).
@@ -1259,7 +1248,7 @@ def pvalue_spectrum(
         pattern. If None, no maximal number of occurrences is considered.
         Default: None
     min_neu : int, optional
-        Minimum number of neurons in a sequence to considered a pattern.
+        Minimum number of neurons in a sequence to be considered a pattern.
         Default: 1
     spectrum : {'#', '3d#'}, optional
         Defines the signature of the patterns.
@@ -1308,9 +1297,9 @@ def pvalue_spectrum(
         raise ValueError('n_surr has to be >0')
     if surr_method not in surr.SURR_METHODS:
         raise ValueError(
-            'specified surr_method (=%s) not valid' % surr_method)
+            f'specified surr_method (={surr_method}) not valid')
     if spectrum not in ('#', '3d#'):
-        raise ValueError("Invalid spectrum: '{}'".format(spectrum))
+        raise ValueError(f"Invalid spectrum: '{spectrum}'")
 
     len_partition = n_surr // size  # length of each MPI task
     len_remainder = n_surr % size
@@ -1356,7 +1345,7 @@ def pvalue_spectrum(
             return []
 
         # The gather operator gives a list out. This is rearranged as a 2 resp.
-        # 3 dimensional numpy-array.
+        # 3-dimensional numpy-array.
         max_occs = np.vstack(max_occs)
 
     # Compute the p-value spectrum, and return it
@@ -1448,7 +1437,7 @@ def _get_pvalue_spec(max_occs, min_spikes, max_spikes, min_occ, n_surr, winlen,
         [pattern_size, pattern_occ, pattern_dur, p_value]
     """
     if spectrum not in ('#', '3d#'):
-        raise ValueError("Invalid spectrum: '{}'".format(spectrum))
+        raise ValueError(f"Invalid spectrum: '{spectrum}'")
 
     pv_spec = []
     if spectrum == '#':
@@ -1487,9 +1476,9 @@ def _get_max_occ(surr_concepts, min_spikes, max_spikes, winlen, spectrum):
     Returns
     -------
     np.ndarray
-        Two-dimensional array. Each element corresponds to a highest occurrence
-        for a specific pattern size (which range from min_spikes to max_spikes)
-        and pattern duration (which range from 0 to winlen-1).
+        Two-dimensional array. Each element corresponds to the highest
+        occurrence for a specific pattern size (which range from min_spikes to
+        max_spikes) and pattern duration (which range from 0 to winlen-1).
         The first axis corresponds to the pattern size the second to the
         duration.
     """
@@ -1517,7 +1506,7 @@ def _get_max_occ(surr_concepts, min_spikes, max_spikes, winlen, spectrum):
 
 def _stability_filter(concept, stability_thresh):
     """Criteria by which to filter concepts from the lattice"""
-    # stabilities larger then stability_thresh
+    # stabilities larger than stability_thresh
     keep_concept = \
         concept[2] > stability_thresh[0]\
         or concept[3] > stability_thresh[1]
@@ -1663,12 +1652,12 @@ def test_signature_significance(pv_spec, concepts, alpha, winlen,
         return []
 
     if spectrum not in ('#', '3d#'):
-        raise ValueError("spectrum must be either '#' or '3d#', "
-                         "got {} instead".format(spectrum))
+        raise ValueError("spectrum must be either '#' or '3d#', " +
+                         f"got {spectrum} instead")
     if report not in ('spectrum', 'significant', 'non_significant'):
         raise ValueError("report must be either 'spectrum'," +
                          "  'significant' or 'non_significant'," +
-                         "got {} instead".format(report))
+                         f"got {report} instead")
     if corr not in ('bonferroni', 'sidak', 'holm-sidak', 'holm',
                     'simes-hochberg', 'hommel', 'fdr_bh', 'fdr_by',
                     'fdr_tsbh', 'fdr_tsbky', '', 'no'):
@@ -1685,7 +1674,7 @@ def test_signature_significance(pv_spec, concepts, alpha, winlen,
 
     if len(pvalues_totest) > 0:
 
-        # Compute significance for only the non trivial tests
+        # Compute significance for only the non-trivial tests
         if corr in ['', 'no']:  # ...without statistical correction
             tests_selected = pvalues_totest <= alpha
         else:
@@ -1762,11 +1751,11 @@ def approximate_stability(concepts, rel_matrix, n_subsets=0,
         of the  occurrences of the pattern). The spike IDs are defined as:
         `spike_id=neuron_id*bin_id` with `neuron_id` in `[0, len(spiketrains)]`
         and `bin_id` in `[0, winlen]`.
-    rel_matrix : sparse.coo_matrix
+    rel_matrix : :class:`scipy.sparse.coo_matrix`
         A binary matrix with shape (number of windows,
         winlen*len(spiketrains)). Each row corresponds to a window (order
         according to their position in time).
-        Each column corresponds to one bin and one neuron and it is 0 if
+        Each column corresponds to one bin and one neuron, and it is 0 if
         no spikes or 1 if one or more spikes occurred in that bin for that
         particular neuron. For example, the entry [0,0] of this matrix
         corresponds to the first bin of the first window position for the first
@@ -1867,7 +1856,7 @@ def _calculate_single_stability_parameter(intent, extent,
     """
     Calculates the stability parameter for extent or intent.
 
-    For detailed describtion see approximate_stabilty
+    For detailed description see approximate_stabilty
 
     Parameters
     ----------