## Curation

**Note: make a copy of this notebook and run the copy to avoid git conflicts in the future**

This is the second in a multi-part tutorial on the Spyglass pipeline used in Loren Frank's lab, UCSF. It demonstrates how to curate the results of spike sorting.

Finish [tutorial 0](0_intro.ipynb) and [tutorial 1](1_spikesorting.ipynb) before proceeding.

Let's start by importing the `spyglass` package, along with a few others. 

In [2]:
import copy
import itertools
import os
import re
import numpy as np
import scipy as sp
import operator
import scipy.stats as stats
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from scipy import stats
import spikeinterface as si
import spikeinterface.extractors as se
import spikeinterface.sorters as ss
import spikeinterface.toolkit as st
import spikeinterface.comparison as sc
import spikeinterface.widgets as sw
import sortingview
import warnings
warnings.simplefilter("ignore", category=DeprecationWarning)
warnings.simplefilter("ignore", category=ResourceWarning)
import webbrowser
import pickle

import pynwb
import datajoint as dj
import spyglass
from spyglass.common import (Session, IntervalList, LabMember, LabTeam, Raw, Session, Nwbfile, Electrode)
from spyglass.spikesorting import (SortGroup,
                                   SortInterval,
                                   SpikeSortingPreprocessingParameters,
                                   SpikeSortingRecording,
                                   SpikeSorterParameters,
                                   SpikeSortingRecordingSelection,
                                   ArtifactDetectionParameters, ArtifactDetectionSelection,
                                   ArtifactRemovedIntervalList, ArtifactDetection,
                                   SpikeSortingSelection, SpikeSorting, CurationFigurl,
                                   CurationFigurlSelection)
from spyglass.spikesorting.spikesorting_curation import (AutomaticCuration,
                                                         AutomaticCurationParameters,
                                                         AutomaticCurationSelection,
                                                         CuratedSpikeSorting,
                                                         CuratedSpikeSortingSelection, Curation,
                                                         MetricParameters, MetricSelection,
                                                         QualityMetrics, WaveformParameters,
                                                         Waveforms, WaveformSelection)
from collections import namedtuple

from IPython.core.display import display, HTML

  import spikeinterface.toolkit as st
[2023-03-17 15:30:10,545][INFO]: Connecting xulu@lmf-db.cin.ucsf.edu:3306
[2023-03-17 15:30:10,603][INFO]: Connected xulu@lmf-db.cin.ucsf.edu:3306


In [3]:
# Define the name of the file and sort_group_id that you used in the spikesorting tutorial
nwb_file_name = "j1620210715.nwb" # Import your own dataset.
filename, file_extension = os.path.splitext(nwb_file_name)
nwb_file_name2 = filename + "_" + file_extension
sort_group_id = 31 # Change this to your to-be-curated sort group.

First, make sure that the results of your sorting from tutorial 1 are stored in `SpikeSorting` table.

In [4]:
spikesorting_key = dict()
spikesorting_key['nwb_file_name'] = nwb_file_name2
spikesorting_key['sort_group_id'] = sort_group_id
spikesorting_key['sorter_params_name'] = 'franklab_probe_ctx_30KHz_115rad_new_mountainsort2' # These params are for sorting cortex data. Adjust it based on your brain region.
# spikesorting_key['sorter_params_name'] = 'franklab_tetrode_hippocampus_30KHz'# These params are for sorting hippo data. Adjust it based on your brain region.
spikesorting_key['sort_interval_name'] = 'sleeps_runs_noPrePostTrialTimes raw data valid times' # Change this to your own sort interval.
SpikeSorting & spikesorting_key

nwb_file_name  name of the NWB file,sort_group_id  identifier for a group of electrodes,sort_interval_name  name for this interval,preproc_params_name,team_name,sorter,sorter_params_name,artifact_removed_interval_list_name,sorting_path,"time_of_sort  in Unix time, to the nearest second"
j1620210715_.nwb,31,sleeps_runs_noPrePostTrialTimes raw data valid times,default,ac_em_xs,mountainsort4,franklab_probe_ctx_30KHz_115rad_new_mountainsort2,j1620210715_.nwb_sleeps_runs_noPrePostTrialTimes raw data valid times_31_default_none_artifact_removed_valid_times,/stelmo/nwb/sorting/j1620210715_.nwb_sleeps_runs_noPrePostTrialTimes raw data valid times_31_default_1528cd92_spikesorting,1676446417


In [9]:
curation_key = (SpikeSorting & spikesorting_key).fetch1('KEY')
curation_id = 0
curation_key['curation_id'] = curation_id


In [85]:
Curation.insert_curation(curation_key)


{'curation_id': 0,
 'nwb_file_name': 'j1620210715_.nwb',
 'sort_group_id': 31,
 'sort_interval_name': 'sleeps_runs_noPrePostTrialTimes raw data valid times',
 'preproc_params_name': 'default',
 'team_name': 'ac_em_xs',
 'sorter': 'mountainsort4',
 'sorter_params_name': 'franklab_probe_ctx_30KHz_115rad_new_mountainsort2',
 'artifact_removed_interval_list_name': 'j1620210715_.nwb_sleeps_runs_noPrePostTrialTimes raw data valid times_31_default_none_artifact_removed_valid_times'}

In [10]:
waveform_key = (Curation & curation_key).fetch1("KEY")
waveform_key['waveform_params_name'] = 'default_whitened_20000spikes_20jobs'#Can be used for cortex data.
#waveform_key['waveform_params_name'] = 'RSN_whitened_float' #This is used for merge stats for hippocampal data.

In [11]:
# Make sure the waveform_key looks right
waveform_key

{'curation_id': 0,
 'nwb_file_name': 'j1620210715_.nwb',
 'sort_group_id': 31,
 'sort_interval_name': 'sleeps_runs_noPrePostTrialTimes raw data valid times',
 'preproc_params_name': 'default',
 'team_name': 'ac_em_xs',
 'sorter': 'mountainsort4',
 'sorter_params_name': 'franklab_probe_ctx_30KHz_115rad_new_mountainsort2',
 'artifact_removed_interval_list_name': 'j1620210715_.nwb_sleeps_runs_noPrePostTrialTimes raw data valid times_31_default_none_artifact_removed_valid_times',
 'waveform_params_name': 'default_whitened_20000spikes_20jobs'}

In [None]:
WaveformSelection.insert1(waveform_key, skip_duplicates=True)
Waveforms.populate([(WaveformSelection & waveform_key).proj()])
wp = WaveformParameters().fetch()

Extracting waveforms...


In [12]:
metrics_key = (Waveforms & waveform_key).fetch1("KEY")
metrics_key['metric_params_name'] =  'peak_offset_num_spikes_20000spikes' # params for cortex data.
# metrics_key['metric_params_name'] =  'peak_offest_num_spikes'# params potentially working for hippo data. Ask others for hippo data metrics or define your own.


In [None]:
MetricSelection.insert1(metrics_key, skip_duplicates=True)

In [15]:
# Check whether the metric selection table contains the newly inserted keys.
MetricSelection() & metrics_key

curation_id  a number correponding to the index of this curation,nwb_file_name  name of the NWB file,sort_group_id  identifier for a group of electrodes,sort_interval_name  name for this interval,preproc_params_name,team_name,sorter,sorter_params_name,artifact_removed_interval_list_name,waveform_params_name  name of waveform extraction parameters,metric_params_name
0,j1620210715_.nwb,31,sleeps_runs_noPrePostTrialTimes raw data valid times,default,ac_em_xs,mountainsort4,franklab_probe_ctx_30KHz_115rad_new_mountainsort2,j1620210715_.nwb_sleeps_runs_noPrePostTrialTimes raw data valid times_31_default_none_artifact_removed_valid_times,default_whitened_20000spikes_20jobs,peak_offset_num_spikes_20000spikes


In [None]:
QualityMetrics.populate([(MetricSelection & metrics_key).proj()])
QualityMetrics() & {"nwb_file_name":nwb_file_name2,"sort_group_id":sort_group_id}

In [16]:
# auto_curation_params_name = 'mike_noise_03_offset_2_isi_0025_mua' # params for hippocampal data from Mike.
auto_curation_params_name = 'noise0.03_isi0.0025_offset2' # params for cortex data
autocuration_key = metrics_key
autocuration_key['auto_curation_params_name'] = auto_curation_params_name

In [17]:
# Make sure the autocuration_key looks right
autocuration_key

{'curation_id': 0,
 'nwb_file_name': 'j1620210715_.nwb',
 'sort_group_id': 31,
 'sort_interval_name': 'sleeps_runs_noPrePostTrialTimes raw data valid times',
 'preproc_params_name': 'default',
 'team_name': 'ac_em_xs',
 'sorter': 'mountainsort4',
 'sorter_params_name': 'franklab_probe_ctx_30KHz_115rad_new_mountainsort2',
 'artifact_removed_interval_list_name': 'j1620210715_.nwb_sleeps_runs_noPrePostTrialTimes raw data valid times_31_default_none_artifact_removed_valid_times',
 'waveform_params_name': 'default_whitened_20000spikes_20jobs',
 'metric_params_name': 'peak_offset_num_spikes_20000spikes',
 'auto_curation_params_name': 'noise0.03_isi0.0025_offset2'}

In [None]:
# Populate the autocuration table.
AutomaticCurationSelection.insert1(autocuration_key, skip_duplicates=True)
AutomaticCuration.populate([(AutomaticCurationSelection & autocuration_key).proj()])


In [18]:
# check that we've successfully populated the AutomaticCuration table.
AutomaticCuration & autocuration_key

curation_id  a number correponding to the index of this curation,nwb_file_name  name of the NWB file,sort_group_id  identifier for a group of electrodes,sort_interval_name  name for this interval,preproc_params_name,team_name,sorter,sorter_params_name,artifact_removed_interval_list_name,waveform_params_name  name of waveform extraction parameters,metric_params_name,auto_curation_params_name  name of this parameter set,auto_curation_key  the key to the curation inserted by make
0,j1620210715_.nwb,31,sleeps_runs_noPrePostTrialTimes raw data valid times,default,ac_em_xs,mountainsort4,franklab_probe_ctx_30KHz_115rad_new_mountainsort2,j1620210715_.nwb_sleeps_runs_noPrePostTrialTimes raw data valid times_31_default_none_artifact_removed_valid_times,default_whitened_20000spikes_20jobs,peak_offset_num_spikes_20000spikes,noise0.03_isi0.0025_offset2,=BLOB=


In [20]:
auto_curation_id = (AutomaticCuration & autocuration_key).fetch1('auto_curation_key')
auto_curation_out_key = (Curation & auto_curation_id).fetch1("KEY")

In [21]:
# Check that the auto_curation_out_key looks right.
auto_curation_out_key

{'curation_id': 1,
 'nwb_file_name': 'j1620210715_.nwb',
 'sort_group_id': 31,
 'sort_interval_name': 'sleeps_runs_noPrePostTrialTimes raw data valid times',
 'preproc_params_name': 'default',
 'team_name': 'ac_em_xs',
 'sorter': 'mountainsort4',
 'sorter_params_name': 'franklab_probe_ctx_30KHz_115rad_new_mountainsort2',
 'artifact_removed_interval_list_name': 'j1620210715_.nwb_sleeps_runs_noPrePostTrialTimes raw data valid times_31_default_none_artifact_removed_valid_times'}

In [None]:
# Ask Mike for help with setting up your github with figurl & manual curation.

sort_interval_name = 'sleeps_runs_noPrePostTrialTimes raw data valid times' # Again use yoru own sort interval.
session_id = nwb_file_name+'_'+sort_interval_name
github_url = ('gh://LorenFrankLab/sorting-curations/main/xulu/'+str(session_id)+'/'
              +str(sort_group_id)+'/curation.json')
print('oringal key',auto_curation_out_key)

curated_spike_sorting_key = auto_curation_out_key.copy()
auto_curation_out_key['new_curation_uri'] = github_url

In [None]:
# generate figURL for manual curation
CurationFigurlSelection.insert1(auto_curation_out_key,skip_duplicates=True)
CurationFigurl.populate(auto_curation_out_key)

In [22]:
# Check that you've successfully inserted the auto-curated dataset into the curation figurl table; use the url to load figurl for manual curation.
CurationFigurl & auto_curation_out_key


curation_id  a number correponding to the index of this curation,nwb_file_name  name of the NWB file,sort_group_id  identifier for a group of electrodes,sort_interval_name  name for this interval,preproc_params_name,team_name,sorter,sorter_params_name,artifact_removed_interval_list_name,url,initial_curation_uri,new_curation_uri
1,j1620210715_.nwb,31,sleeps_runs_noPrePostTrialTimes raw data valid times,default,ac_em_xs,mountainsort4,franklab_probe_ctx_30KHz_115rad_new_mountainsort2,j1620210715_.nwb_sleeps_runs_noPrePostTrialTimes raw data valid times_31_default_none_artifact_removed_valid_times,"https://figurl.org/f?v=gs://figurl/spikesortingview-10&d=sha1://d9ed6aef020c9a0b847ee46b85c6e594ba3c9b61&s={""initialSortingCuration"":""sha1://b6b2c7a0c8a1f2bdd4f014b16796a491e5d2509f"",""sortingCuration"":""gh://LorenFrankLab/sorting-curations/main/xulu/j1620210715.nwb_sleeps_runs_noPrePostTrialTimes raw data valid times/31/curation.json""}&label=j1620210715_.nwb_sleeps_runs_noPrePostTrialTimes%20raw%20data%20valid%20times_31_default%20j1620210715_.nwb_sleeps_runs_noPrePostTrialTimes%20raw%20data%20valid%20times_31_default_8e3b49fd_spikesorting",sha1://b6b2c7a0c8a1f2bdd4f014b16796a491e5d2509f,gh://LorenFrankLab/sorting-curations/main/xulu/j1620210715.nwb_sleeps_runs_noPrePostTrialTimes raw data valid times/31/curation.json


In [None]:
# After manual curation, populate CuratedSpikeSorting
CuratedSpikeSortingSelection.insert1(curated_spike_sorting_key, skip_duplicates=True)
CuratedSpikeSortingSelection() & auto_curation_out_key


In [None]:
CuratedSpikeSorting.populate(auto_curation_out_key)


In [23]:
# Check that the curated spike table is successfully populated.
CuratedSpikeSorting() & auto_curation_out_key

curation_id  a number correponding to the index of this curation,nwb_file_name  name of the NWB file,sort_group_id  identifier for a group of electrodes,sort_interval_name  name for this interval,preproc_params_name,team_name,sorter,sorter_params_name,artifact_removed_interval_list_name,analysis_file_name  name of the file,units_object_id
1,j1620210715_.nwb,31,sleeps_runs_noPrePostTrialTimes raw data valid times,default,ac_em_xs,mountainsort4,franklab_probe_ctx_30KHz_115rad_new_mountainsort2,j1620210715_.nwb_sleeps_runs_noPrePostTrialTimes raw data valid times_31_default_none_artifact_removed_valid_times,j1620210715_E0ELZDTVJX.nwb,b5d5d773-b094-48c3-b3ad-74a0f6464152


In [108]:
# Load the curated data of interest.
units = (CuratedSpikeSorting()&{'nwb_file_name':nwb_file_name2,'sort_group_id':sort_group_id,
                       'sort_interval_name':'sleeps_runs_noPrePostTrialTimes raw data valid times'}).fetch_nwb()[0]['units']

In [111]:
units['spike_times']

id
3     [1626366503.5269132, 1626366525.8692486, 16263...
5     [1626366503.4160469, 1626366503.4628134, 16263...
6     [1626366503.8008463, 1626366504.0010128, 16263...
7     [1626366503.3358803, 1626366503.8239129, 16263...
9     [1626366503.51438, 1626366503.6476798, 1626366...
10    [1626366505.7951767, 1626366506.8059087, 16263...
11    [1626366504.1870124, 1626366504.2431457, 16263...
16    [1626366720.2088737, 1626366721.3888388, 16263...
17    [1626366517.1241608, 1626366518.7990918, 16263...
24    [1626366525.250716, 1626366565.0279932, 162636...
25    [1626366503.3512802, 1626366508.4144397, 16263...
28    [1626366508.0717735, 1626366508.2400398, 16263...
29    [1626366752.0725954, 1626366794.930768, 162636...
36    [1626366506.869442, 1626366507.1153083, 162636...
37    [1626366504.019246, 1626366515.0441303, 162636...
39    [1626366503.3300803, 1626366504.3431456, 16263...
40    [1626366504.125846, 1626366504.2081125, 162636...
45    [1626366686.8226876, 1626366897.1277237