diff --git a/.github/workflows/all-tests.yml b/.github/workflows/all-tests.yml index 08e54d1909..0d242b759a 100644 --- a/.github/workflows/all-tests.yml +++ b/.github/workflows/all-tests.yml @@ -105,7 +105,7 @@ jobs: run: echo "dataset_hash=$(git ls-remote https://gin.g-node.org/NeuralEnsemble/ephy_testing_data.git HEAD | cut -f1)" >> $GITHUB_OUTPUT - name: Cache datasets - if: env.RUN_EXTRACTORS_TESTS == 'true' + if: env.RUN_EXTRACTORS_TESTS == 'true' || env.RUN_PREPROCESSING_TESTS == 'true' id: cache-datasets uses: actions/cache/restore@v4 with: @@ -115,7 +115,7 @@ jobs: - name: Install git-annex shell: bash - if: env.RUN_EXTRACTORS_TESTS == 'true' + if: env.RUN_EXTRACTORS_TESTS == 'true' || env.RUN_PREPROCESSING_TESTS == 'true' run: | pip install datalad-installer if [ ${{ runner.os }} = 'Linux' ]; then diff --git a/doc/how_to/auto_label_units.rst b/doc/how_to/auto_label_units.rst index 110b9bbfc3..a241613845 100644 --- a/doc/how_to/auto_label_units.rst +++ b/doc/how_to/auto_label_units.rst @@ -295,8 +295,9 @@ page `__. .. image:: auto_label_units_files/auto_label_units_27_1.png - **NOTE:** If you want to train your own models, see the `UnitRefine - repo <%60https://github.com/anoushkajain/UnitRefine%60>`__ for +.. note:: + If you want to train your own models, see the `UnitRefine + repo `__ for instructions! This “How To” demonstrated how to automatically label units after spike diff --git a/doc/index.rst b/doc/index.rst index 85570a56cd..b8410af765 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -12,7 +12,7 @@ amazing algorithms and formats that we interface with. See them all, and how to `references page `_. In the past year, we have added support for the following tools: -- Bombcell. `Bombcell: automated curation and cell classification of spike-sorted electrophysiology data. 2023. `_ (`docs `_) +- Bombcell. `Bombcell: automated curation and cell classification of spike-sorted electrophysiology data. `_ (`docs `_) - SLAy. `SLAy-ing oversplitting errors in high-density electrophysiology spike sorting `_ (`docs `_) - Lupin, Spykingcicus2 and Tridesclous2. `Opening the black box: a modular approach to spike sorting `_ (`docs `_) - RT-Sort. `RT-Sort: An action potential propagation-based algorithm for real time spike detection and sorting with millisecond latencies `_ (`docs `_) diff --git a/doc/modules/benchmark.rst b/doc/modules/benchmark.rst index c11baba6ca..b02f1c4385 100755 --- a/doc/modules/benchmark.rst +++ b/doc/modules/benchmark.rst @@ -2,31 +2,31 @@ Benchmark module ================ -Historically, this module was used to compare/benchmark sorters against ground truth +Historically, this module was used to compare/benchmark sorters against ground truth. With this, sorters can be challenge in multiple situations (noise, drift, small/high snr, small/high spike rate, high/small probe density, ...). The main idea is to generate a synthetic recording using the internal generators :py:func:`~spikeinterface.generation.generate_drifting_recording` or external tools -like ***mearec**. And then to compare the output of each sorter to the ground truth sorting. -Then, theses comparisons can be plotted in various ways to explore all strengths and weakness of +like **mearec**. And then to compare the output of each sorter to the ground truth sorting. +Then, these comparisons can be plotted in various ways to explore all strengths and weakness of sorters tools. The very first paper of spikeinterface was about that, see [Buccino]_. Since version, 0.102.0 the concept of *benchmark* has been extended to challenge/study specific -steps of the sorting pipeline, for instance the motion estimation methods has been carrfully studied +steps of the sorting pipeline, for instance the motion estimation methods has been carefully studied in [Garcia2024]_ or some localisation methods has been compared in [Scopin2024]_. -Also, very specific details (the ability for a sorting to recover collision spike) has been +Also, very specific details (the ability for a sorting to recover collision spikes) has been studied in [Garcia2022]_. -Now, almost all steps of the spike sorting step has implemented in spikeinterface and then -all this steps can be benchmarked more or less the same way with dedicated classes: +Now, almost all steps of the spike sorting pipeline have been implemented in spikeinterface and then +all these steps can be benchmarked more or less the same way with dedicated classes: * :py:func:`~spikeinterface.sortingcomponents.peak_detection.detect_peaks()` methods can be compared with :py:class:`~spikeinterface.benchmark.benchmark_peak_detection.PeakDetectionStudy` * :py:func:`~spikeinterface.sortingcomponents.peak_localization.localize_peaks()` methods can be compared with :py:class:`~spikeinterface.benchmark.benchmark_peak_localization.PeakLocalizationStudy` * :py:func:`~spikeinterface.sortingcomponents.motion.estimate_motion()` - methods can be compared with :py:class:`~spikeinterface.benchmark.benchmark_motion_estimation.MotionEstimationStudyStudy` + methods can be compared with :py:class:`~spikeinterface.benchmark.benchmark_motion_estimation.MotionEstimationStudy` * :py:func:`~spikeinterface.sortingcomponents.clustering.find_clusters_from_peaks()` methods can be compared with :py:class:`~spikeinterface.benchmark.benchmark_clustering.ClusteringStudy` * :py:func:`~spikeinterface.sortingcomponents.matching.find_spikes_from_templates()` @@ -41,19 +41,19 @@ All theses benchmark study classes share the same design : * They accept as input a dict of "cases". A case being a mix of **one method** (or one sorter) in a **particular situation** (drift or not, low/high snr, ...) with **some parameters**. - With this in mind, this is very easy to test either algorithm but also there parameters. - * Study classes has 4 steps : create cases, run methods, compute results and plot results. + With this in mind, it is very easy to test either algorithms or their parameters. + * Study classes have 4 steps : create cases, run methods, compute results and plot results. * Study classes have dedicated plot functions or more general plotting (for instance accuracy vs snr) - * Study classes also cases handle the concept of "levels" : this allows you to compare several + * Study classes also handle the concept of "levels" : this allows you to compare several complexities at the same time. For instance, compare kilosort4 vs kilsort2.5 (level 0) for different noises amplitudes (level 1) combined with several motion vectors (level 2). * When plotting levels can be grouped to make averages. * Internally, they almost all use the :py:mod:`~spikeinterface.comparison` module. - In short this module can compare a set of spiketrains against ground truth spiketrains. - The van diagram (True Posistive, False positive, False negative) against each ground truth units is + In short, this module can compare a set of spiketrains against ground truth spiketrains. + The van diagram (True positive, False positive, False negative) against each ground truth units is performed. An internal agreement matrix is also constructed. With this machinery many metrics can be taken - to estimate the quality of a methods : accuracy, recall, precision + to estimate the quality of the methods : accuracy, recall, precision. * Study classes are persistent on disk. The mechanism is based on an intrinsic organization into a "study_folder" with several subfolders: results, sorting_analyzer, run_logs, cases... @@ -158,8 +158,8 @@ Here a simple code block to generate The :py:func:`~spikeinterface.sortingcomponents.peak_detection.detect_peaks()` function propose mainly (with some variants) 2 main methods : - * "locally_exclussive" : a multichannel peak detection by threhold crossing that taken - in account the neighbor channels + * "locally_exclusive" : a multichannel peak detection by threhold crossing that takes into + account the neighbor channels. * "matched_filtering" : a method based on convolution by a kernel that "looks like a spike" at several spatial scales. @@ -256,9 +256,9 @@ version of spikeinterface for benchmark but re-generating the same figures shoul new version of spikeinterface. Note that since this puplication, new methods has been published (DREDGe and MEDiCINe) and implemented in spikeinterface -so runnning a new comparison could make sens. +so runnning a new comparison could make sense. -Lets be *open-and-reproducible-science*, this is so trendy. This 120 lines script will make the same +Let's be *open-and-reproducible-science*, this is so trendy. This 120 lines script will make the same job done [Garcia2024]_. diff --git a/doc/modules/curation.rst b/doc/modules/curation.rst index f8540accbe..5d2e8c968c 100644 --- a/doc/modules/curation.rst +++ b/doc/modules/curation.rst @@ -284,7 +284,7 @@ This format has two part: * "format_version" : format specification * "unit_ids" : the list of unit_ds * "label_definitions" : list of label categories and possible labels per category. - Every category can be *exclusive=True* onely one label or *exclusive=False* several labels possible + Every category can be *exclusive=True* (only one label) or *exclusive=False* (several labels possible). * **manual output** curation with the folowing keys: diff --git a/doc/modules/generation.rst b/doc/modules/generation.rst index 8c0d926667..5747e0ce3c 100644 --- a/doc/modules/generation.rst +++ b/doc/modules/generation.rst @@ -17,7 +17,7 @@ Brain Laboratory - Brain Wide Map (available on You can check out this collection of over 600 templates from this `web app `_. The :py:mod:`spikeinterface.generation` module offers tools to interact with this database to select and download templates, -manupulating (e.g. rescaling and relocating them), and construct hybrid recordings with them. +manipulating (e.g. rescaling and relocating them), and construct hybrid recordings with them. Importantly, recordings from long-shank probes, such as Neuropixels, usually experience drifts. Such drifts can be taken into account in order to smoothly inject spikes into the recording. diff --git a/doc/modules/metrics.rst b/doc/modules/metrics.rst index 89008a796a..1095553852 100644 --- a/doc/modules/metrics.rst +++ b/doc/modules/metrics.rst @@ -13,7 +13,7 @@ Currently, it contains the following submodules: All metrics extensions inherit from the :py:class:`~spikeinterface.core.analyzer_extension_core.BaseMetricExtension` base class, which provides a common interface for computing and retrieving metrics and has convenience method to access -metric information. For example, you can get the list of available metrics using the and their descriptions with: +metric information. For example, you can get the list of available metrics and their descriptions with: .. code-block:: python @@ -67,18 +67,18 @@ metric information. For example, you can get the list of available metrics using 'extremum channel (1/um). Uses exponential or linear fit based ' 'on linear_fit parameter.', 'main_peak_to_trough_ratio': 'Ratio of main peak amplitude to trough amplitude', - 'main_to_next_extremum_duration': 'Duration in seconds from main extremum to next extremum.', + 'main_to_next_extremum_duration': 'Duration in seconds from main extremum to next extremum.', 'num_negative_peaks': 'Number of negative peaks (troughs) in the template', 'num_positive_peaks': 'Number of positive peaks in the template', - 'peak_after_to_trough_ratio': 'Ratio of peak after amplitude to trough amplitude', + 'peak_after_to_trough_ratio': 'Ratio of peak after amplitude to trough amplitude', 'peak_after_width': 'Width of the main peak after trough in seconds', - 'peak_before_to_peak_after_ratio': 'Ratio of peak before amplitude to peak after amplitude', - 'peak_before_to_trough_ratio': 'Ratio of peak before amplitude to trough amplitude', + 'peak_before_to_peak_after_ratio': 'Ratio of peak before amplitude to peak after amplitude', + 'peak_before_to_trough_ratio': 'Ratio of peak before amplitude to trough amplitude', 'peak_before_width': 'Width of the main peak before trough in seconds', - 'peak_half_width': 'Duration in s at half the amplitude of the peak (maximum) of the template.', - 'peak_to_trough_duration': 'Duration in seconds between the trough (minimum) and the next peak (maximum) of the template.', - 'recovery_slope': 'Slope of the recovery phase of the template, after the peak (maximum) returning to baseline in uV/s.', - 'repolarization_slope': 'Slope of the repolarization phase of the template, between the trough (minimum) and return to baseline in uV/s.', + 'peak_half_width': 'Duration in s at half the amplitude of the peak (maximum) of the template.', + 'peak_to_trough_duration': 'Duration in seconds between the trough (minimum) and the next peak (maximum) of the template.', + 'recovery_slope': 'Slope of the recovery phase of the template, after the peak (maximum) returning to baseline in uV/s.', + 'repolarization_slope': 'Slope of the repolarization phase of the template, between the trough (minimum) and return to baseline in uV/s.', 'spread': 'Spread of the template amplitude in um, calculated as the distance between channels whose templates exceed the spread_threshold.', 'trough_half_width': 'Duration in s at half the amplitude of the trough (minimum) of the template.', 'trough_width': 'Width of the main trough in seconds', diff --git a/doc/modules/metrics/quality_metrics.rst b/doc/modules/metrics/quality_metrics.rst index bfc4897a44..3f76af1717 100644 --- a/doc/modules/metrics/quality_metrics.rst +++ b/doc/modules/metrics/quality_metrics.rst @@ -12,7 +12,7 @@ Completeness metrics (or 'false negative'/'type II' metrics) aim to identify whe Examples include: presence ratio, amplitude cutoff, NN-miss rate. Drift metrics aim to identify changes in waveforms which occur when spike sorters fail to successfully track neurons in the case of electrode drift. -The quality metrics are saved as an extension of a :doc:`SortingAnalyzer <../postprocessing>`. Some metrics can only be computed if certain extensions have been computed first. For example the drift metrics can only be computed the spike locations extension has been computed. By default, as many metrics as possible are computed. Which ones are computed depends on which other extensions have +The quality metrics are saved as an extension of a :doc:`SortingAnalyzer <../postprocessing>`. Some metrics can only be computed if certain extensions have been computed first. For example the drift metrics can only be computed if the spike locations extension has been computed. By default, as many metrics as possible are computed. Which ones are computed depends on which other extensions have been computed. In detail, the default metrics are (click on each metric to find out more about them!): diff --git a/doc/modules/metrics/spiketrain_metrics.rst b/doc/modules/metrics/spiketrain_metrics.rst index 867af567d7..752afdb769 100644 --- a/doc/modules/metrics/spiketrain_metrics.rst +++ b/doc/modules/metrics/spiketrain_metrics.rst @@ -7,4 +7,4 @@ Currently, the following metrics are implemented: - "num_spikes": number of spikes in the spike train. - "firing_rate": firing rate of the spike train (spikes per second). -# TODO: Add more metrics such as ISI distribution, CV, etc. +.. TODO: Add more metrics such as ISI distribution, CV, etc. diff --git a/doc/modules/metrics/template_metrics.rst b/doc/modules/metrics/template_metrics.rst index 983e80e381..a0827bbb22 100644 --- a/doc/modules/metrics/template_metrics.rst +++ b/doc/modules/metrics/template_metrics.rst @@ -134,6 +134,12 @@ template across the probe (these are computed by default if the number of channe is greater than 64, but can be forced on or off with the :code:`include_multi_channel_metrics` parameter). + +.. code-block:: python + + tm = sorting_analyzer.compute(input="template_metrics", include_multi_channel_metrics=True) + + These are the multi-channel metrics that can be computed: velocity_fits @@ -159,9 +165,4 @@ above 20% of the maximum amplitude (default). Template amplitudes are normalized and optionally smoothed over space using a Gaussian filter (default sigma is 20µm). -.. code-block:: python - - tm = sorting_analyzer.compute(input="template_metrics", include_multi_channel_metrics=True) - - For more information, see :py:func:`~spikeinterface.postprocessing.compute_template_metrics` diff --git a/doc/modules/motion_correction.rst b/doc/modules/motion_correction.rst index 189a467d88..e29ce4ca10 100644 --- a/doc/modules/motion_correction.rst +++ b/doc/modules/motion_correction.rst @@ -19,7 +19,7 @@ of the algorithm is create an "image" via the activity profile of the cells duri activity profile should be kept constant over time, the motion can be estimated, by blocks, along the probe's insertion axis (i.e. depth) so that we can interpolate the traces to compensate for this estimated motion. -There are now several algorithms which try to correct for drift as a preprocessing step: the Paninski +There are now several algorithms that try to correct for drift as a preprocessing step: the Paninski group from Columbia University introduced DREDGE (see [Varol2021]_ and [Windolf2023]_), and the Jazayeri lab introduced MEDiCINe ([Watters]_). diff --git a/doc/modules/sorters_internal.rst b/doc/modules/sorters_internal.rst index 4cd6439f98..0fa068bd75 100644 --- a/doc/modules/sorters_internal.rst +++ b/doc/modules/sorters_internal.rst @@ -6,7 +6,7 @@ Internal sorters :py:mod:`spikeinterface.sortingcomponents` implement algorithms to break a sorting pipeline into individual components. With this components it is easy to develop a new sorter. -These components and sorters havs been benchmarked [here](https://github.com/samuelgarcia/sorting_components_benchmark_paper). +These components and sorters have been benchmarked `here `_. At the moment, there are 4 internal sorters implemented in ``spikeinterface``: @@ -20,8 +20,8 @@ At the moment, there are 4 internal sorters implemented in ``spikeinterface``: Lupin ----- -Lupin is components-based sorters, it combine components that give the best reults on benchmarks -for each steps. It is theorically the "best" sorter that ``spikeinterface`` can offer internally. +Lupin is a components-based sorter, it combines components that give the best results on benchmarks +for each step. It is theoretically the "best" sorter that ``spikeinterface`` can offer internally. Lupin components are: * preprocessing (filtering, CMR, whitening) diff --git a/doc/modules/sortingcomponents.rst b/doc/modules/sortingcomponents.rst index 5549fd0317..9cab4d8a08 100644 --- a/doc/modules/sortingcomponents.rst +++ b/doc/modules/sortingcomponents.rst @@ -128,7 +128,7 @@ Currently, the following methods are implemented: see also `here `_ **'monopolar_triangulation'** has some variant with differents optimizers (default is 'minimize_with_log_penality') * **'grid_convolution'** : inspired by the Kilosort approach. This consists of a convolution of traces with waveform - prototypes with varying local spatial footprint on the probe. + prototypes with varying local spatial footprint on the probe. Please have a look at [Scopin2024]_, for details on these methods. @@ -313,7 +313,7 @@ We could now check the ``motion`` object and see if we need to apply a correctio Availables methods are: - * **'dredge_ap'** : the most mature method at the moement, done by [Windolf_b]_ + * **'dredge_ap'** : the most mature method at the moment, done by [Windolf_b]_ * **'decentralized'** : more or less the ancestor of 'dredge_ap' * **'iterative_template'** : this mimics the kilosort approach. * **'medicine'** : a more recent approach done in [Watters]_. @@ -367,16 +367,16 @@ a label for every peak. Some methods have been implemented with various ideas in mind. We really hope that this list will be extended soon by talented people willing to improve it. This is a crucial and not totally resolved step. - * **'iterative-hdbscan'** : method used in spkyking-circus2. This performs local hdbscan clusetrings on - svd waveforms features. - * **'iterative-isosplit'** : method used in tridesclous2. This performs local isosplit clusetrings on - svd waveforms features. - * **'hdbscan-positions'** : This performs a hdbscan clusetring based on the localizations of the spikes. + * **'iterative-hdbscan'** : method used in spkyking-circus2. This performs local hdbscan clustering on + svd waveforms features. + * **'iterative-isosplit'** : method used in tridesclous2. This performs local isosplit clustering on + svd waveforms features. + * **'hdbscan-positions'** : This performs a hdbscan clustering based on the localizations of the spikes. This mimics the herdingspikes approach : make the clustering on spike position only but more flexible because more localization methods are availables. * **'random-projections'** : attempt to make the feature from waveforms with random projections instead of the good-old-school-pca. - * **'graph-clustering'** : attempt to resolve the clusetring globally and not locally. This constructs a global + * **'graph-clustering'** : attempt to resolve the clustering globally and not locally. This constructs a global but sparse distance matrix between all spikes. Can be slow. Then it performs 'classical' algos on graph (Louvain, Leiden or even HDBSCAN). Promising method but not as efficient as the 'iterative-isosplit' or 'iterative-hdbscan'. diff --git a/src/spikeinterface/preprocessing/motion.py b/src/spikeinterface/preprocessing/motion.py index 9fe22fdbcd..4cb2d8e6ec 100644 --- a/src/spikeinterface/preprocessing/motion.py +++ b/src/spikeinterface/preprocessing/motion.py @@ -79,7 +79,7 @@ border_mode="force_extrapolate", spatial_interpolation_method="kriging", sigma_um=20.0, p=2 ), }, - # This preset is the encestor of dredge + # This preset is the ancestor of dredge "nonrigid_accurate": { "doc": "method by Paninski lab (monopolar_triangulation + decentralized)", "detect_kwargs": dict( diff --git a/src/spikeinterface/preprocessing/preprocessing_tools.py b/src/spikeinterface/preprocessing/preprocessing_tools.py index 5898f203db..939d2d08b6 100644 --- a/src/spikeinterface/preprocessing/preprocessing_tools.py +++ b/src/spikeinterface/preprocessing/preprocessing_tools.py @@ -138,15 +138,15 @@ def get_kriging_kernel_distance(locations_1, locations_2, sigma_um, p, distance_ import scipy dist = scipy.spatial.distance.cdist(locations_1, locations_2, metric=distance_metric) - kernal_dist = np.exp(-((dist / sigma_um) ** p)) + kernel_dist = np.exp(-((dist / sigma_um) ** p)) else: # this mimic the kilosort case where a sigma on x and y are diffrents. # note that in that case the distance metric become a cityblock sigma_x, sigma_y = sigma_um distx = np.abs(locations_1[:, 0][:, np.newaxis] - locations_2[:, 0][np.newaxis, :]) disty = np.abs(locations_1[:, 1][:, np.newaxis] - locations_2[:, 1][np.newaxis, :]) - kernal_dist = np.exp(-((distx / sigma_x) ** p) - (disty / sigma_y) ** p) - return kernal_dist + kernel_dist = np.exp(-((distx / sigma_x) ** p) - (disty / sigma_y) ** p) + return kernel_dist def get_kriging_channel_weights(contact_positions1, contact_positions2, sigma_um, p, weight_threshold=0.005): diff --git a/src/spikeinterface/sortingcomponents/motion/motion_interpolation.py b/src/spikeinterface/sortingcomponents/motion/motion_interpolation.py index 9af2b82cd2..a50b9609b9 100644 --- a/src/spikeinterface/sortingcomponents/motion/motion_interpolation.py +++ b/src/spikeinterface/sortingcomponents/motion/motion_interpolation.py @@ -302,9 +302,9 @@ class InterpolateMotionRecording(BasePreprocessor): * "nearest" : use neareast channel sigma_um : float, default: 20.0 - Used in the "kriging" formula + Used in the "kriging" formula. p : int, default: 1 - Used in the "kriging" formula + Used in the "kriging" formula to control the decay of the gaussian kernel. num_closest : int, default: 3 Number of closest channels used by "idw" method for interpolation. border_mode : "remove_channels" | "force_extrapolate" | "force_zeros", default: "remove_channels" diff --git a/src/spikeinterface/sortingcomponents/peak_detection/matched_filtering.py b/src/spikeinterface/sortingcomponents/peak_detection/matched_filtering.py index d7d50277ed..509c3f76f8 100644 --- a/src/spikeinterface/sortingcomponents/peak_detection/matched_filtering.py +++ b/src/spikeinterface/sortingcomponents/peak_detection/matched_filtering.py @@ -30,7 +30,7 @@ class MatchedFilteringPeakDetector(PeakDetector): prototype : array The canonical waveform of action potentials ms_before : float - The time in ms before the maximial value of the absolute prototype + The time in ms before the maximal value of the absolute prototype weight_method : dict Parameter that should be provided to the get_convolution_weights() function in order to know how to estimate the positions. One argument is mode that could