From 90a014298778079d1b290d388c6ed366dd658826 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20WYNGAARD?= Date: Tue, 4 Jun 2024 11:47:09 +0200 Subject: [PATCH 1/3] Fix bug with nan values When the first value was a `nan` (because of no value), we ended up with the wrong dtype (e.g. should have been `str`) --- src/spikeinterface/extractors/phykilosortextractors.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/spikeinterface/extractors/phykilosortextractors.py b/src/spikeinterface/extractors/phykilosortextractors.py index 7fdd77e703..be2483cada 100644 --- a/src/spikeinterface/extractors/phykilosortextractors.py +++ b/src/spikeinterface/extractors/phykilosortextractors.py @@ -172,7 +172,12 @@ def __init__( if load_all_cluster_properties: # pandas loads strings as objects if cluster_info[prop_name].values.dtype.kind == "O": - prop_dtype = type(cluster_info[prop_name].values[0]) + for value in cluster_info[prop_name].values: + if isinstance(value, (np.floating, float)) and np.isnan(value): # Blank values are encoded as 'NaN'. + continue + + prop_dtype = type(value) + break values_ = cluster_info[prop_name].values.astype(prop_dtype) else: values_ = cluster_info[prop_name].values From 8fa1153c8fb6a83ee20602511fac9f0b9e93a197 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 4 Jun 2024 09:48:09 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spikeinterface/extractors/phykilosortextractors.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/spikeinterface/extractors/phykilosortextractors.py b/src/spikeinterface/extractors/phykilosortextractors.py index be2483cada..0a96b9d5af 100644 --- a/src/spikeinterface/extractors/phykilosortextractors.py +++ b/src/spikeinterface/extractors/phykilosortextractors.py @@ -173,9 +173,11 @@ def __init__( # pandas loads strings as objects if cluster_info[prop_name].values.dtype.kind == "O": for value in cluster_info[prop_name].values: - if isinstance(value, (np.floating, float)) and np.isnan(value): # Blank values are encoded as 'NaN'. + if isinstance(value, (np.floating, float)) and np.isnan( + value + ): # Blank values are encoded as 'NaN'. continue - + prop_dtype = type(value) break values_ = cluster_info[prop_name].values.astype(prop_dtype) From dc37bc02dc95a90134b70ca30328a43e79714e13 Mon Sep 17 00:00:00 2001 From: Alessio Buccino Date: Wed, 5 Jun 2024 11:54:48 +0200 Subject: [PATCH 3/3] Add safeguard for prop_dtype --- src/spikeinterface/extractors/phykilosortextractors.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/spikeinterface/extractors/phykilosortextractors.py b/src/spikeinterface/extractors/phykilosortextractors.py index 0a96b9d5af..e65ff0adfb 100644 --- a/src/spikeinterface/extractors/phykilosortextractors.py +++ b/src/spikeinterface/extractors/phykilosortextractors.py @@ -170,7 +170,8 @@ def __init__( self.set_property(key="quality", values=cluster_info[prop_name]) else: if load_all_cluster_properties: - # pandas loads strings as objects + # pandas loads strings with empty values as objects with NaNs + prop_dtype = None if cluster_info[prop_name].values.dtype.kind == "O": for value in cluster_info[prop_name].values: if isinstance(value, (np.floating, float)) and np.isnan( @@ -180,7 +181,11 @@ def __init__( prop_dtype = type(value) break - values_ = cluster_info[prop_name].values.astype(prop_dtype) + if prop_dtype is not None: + values_ = cluster_info[prop_name].values.astype(prop_dtype) + else: + # Could not find a valid dtype for the column. Skip it. + continue else: values_ = cluster_info[prop_name].values self.set_property(key=prop_name, values=values_)