Connect to db

In [1]:
import datajoint as dj
import os
from pathlib import Path

# set dirs
base_dir = Path('/hdd/dj') # change this to your desired directory
if (base_dir).exists() is False:
    os.mkdir(base_dir)
raw_dir = base_dir / 'raw'
if (raw_dir).exists() is False:
    os.mkdir(raw_dir)
analysis_dir = base_dir / 'analysis'
if (analysis_dir).exists() is False:
    os.mkdir(analysis_dir)
tmp_dir = base_dir / 'tmp'
if (tmp_dir).exists() is False:
    os.mkdir(tmp_dir)

# set dj config
dj.config['database.host'] = 'localhost'
dj.config['database.user'] = 'root'
dj.config['database.password'] = 'tutorial'
dj.config['database.port'] = 3306
dj.config['stores'] = {
  'raw': {
    'protocol': 'file',
    'location': str(raw_dir),
    'stage': str(raw_dir)
  },
  'analysis': {
    'protocol': 'file',
    'location': str(analysis_dir),
    'stage': str(analysis_dir)
  }
}
dj.config["enable_python_native_blobs"] = True


# set env vars
os.environ['SPYGLASS_BASE_DIR'] = str(base_dir)
os.environ['SPYGLASS_TEMP_DIR'] = str(tmp_dir)
os.environ['KACHERY_CLOUD_DIR'] = '/home/kyu/.kachery-cloud'
os.environ['DJ_SUPPORT_FILEPATH_MANAGEMENT'] = 'TRUE'


%load_ext autoreload
%autoreload 2

import

In [2]:
import spyglass as sg
import spyglass.common as sgc
import spyglass.spikesorting.v1 as sgs
import spyglass.data_import as sgi

[2023-11-15 15:43:38,458][INFO]: Connecting root@localhost:3306
[2023-11-15 15:43:38,532][INFO]: Connected root@localhost:3306


insert LabMember and Session

In [3]:
nwb_file_name = "wilbur20210326.nwb"
nwb_file_name2 = "wilbur20210326_.nwb"

In [4]:
sgc.LabMember.insert_from_nwbfile(nwb_file_name)

Please add the Google user ID for Alison Comrie in the LabMember.LabMemberInfo table to help manage permissions.


In [5]:
sgi.insert_sessions(nwb_file_name)

insert SortGroup

In [6]:
sgs.SortGroup.set_group_by_shank(nwb_file_name=nwb_file_name2)

insert SpikeSortingRecordingSelection. use `insert_selection` method. this automatically generates a unique recording id

In [7]:
key = {"nwb_file_name" : nwb_file_name2,
       "sort_group_name" : "0",
       "interval_list_name" : "03_r1",
       "preproc_param_name" : "default",
       "team_name" : "Alison Comrie"}

In [8]:
sgs.SpikeSortingRecordingSelection.insert_selection(key)

This row has already been inserted into SpikeSortingRecordingSelection.


{'recording_id': 'wilbur20210326_.nwb_R_9492dd',
 'nwb_file_name': 'wilbur20210326_.nwb',
 'sort_group_name': '0',
 'interval_list_name': '03_r1',
 'preproc_param_name': 'default',
 'team_name': 'Alison Comrie'}

In [9]:
sgs.SpikeSortingRecordingSelection()

recording_id,nwb_file_name  name of the NWB file,sort_group_name,interval_list_name  descriptive name of this interval list,preproc_param_name,team_name
wilbur20210326_.nwb_R_9492dd,wilbur20210326_.nwb,0,03_r1,default,Alison Comrie


preprocess recording (filtering and referencing)

In [10]:
sgs.SpikeSortingRecording.populate()

In [11]:
sgs.SpikeSortingRecording()

recording_id,analysis_file_name  name of the file,object_id  Object ID for the processed recording in NWB file
wilbur20210326_.nwb_R_9492dd,wilbur20210326_XPEBDC33L0.nwb,837bc39a-82fc-492b-b366-d51cb98bc716


insert ArtifactDetectionSelection

In [12]:
sgs.ArtifactDetectionSelection.insert_selection({'recording_id':'wilbur20210326_.nwb_R_9492dd',
                                                 'artifact_param_name':'default'})

This row has already been inserted into ArtifactDetectionSelection.


{'artifact_id': 'wilbur20210326_.nwb_A_1745f2',
 'recording_id': 'wilbur20210326_.nwb_R_9492dd',
 'artifact_param_name': 'default'}

detect artifact; note the output is stored in IntervalList

In [13]:
sgs.ArtifactDetection.populate()

In [14]:
sgs.ArtifactDetection()

artifact_id
wilbur20210326_.nwb_A_1745f2


insert SpikeSortingSelection. again use `insert_selection` method

In [15]:
key = {
        "recording_id":"wilbur20210326_.nwb_R_9492dd",
        "sorter":"mountainsort4",
        "sorter_param_name": "franklab_tetrode_hippocampus_30KHz",
        "nwb_file_name":nwb_file_name2,
        "interval_list_name":"wilbur20210326_.nwb_A_1745f2"
    }

In [16]:
sgs.SpikeSortingSelection.insert_selection(key)

This row has already been inserted into SpikeSortingSelection.


{'sorting_id': 'wilbur20210326_.nwb_S_f78ec9',
 'recording_id': 'wilbur20210326_.nwb_R_9492dd',
 'sorter': 'mountainsort4',
 'sorter_param_name': 'franklab_tetrode_hippocampus_30KHz',
 'nwb_file_name': 'wilbur20210326_.nwb',
 'interval_list_name': 'wilbur20210326_.nwb_A_1745f2'}

In [17]:
sgs.SpikeSortingSelection()

sorting_id,recording_id,sorter,sorter_param_name,nwb_file_name  name of the NWB file,interval_list_name  descriptive name of this interval list
wilbur20210326_.nwb_S_f78ec9,wilbur20210326_.nwb_R_9492dd,mountainsort4,franklab_tetrode_hippocampus_30KHz,wilbur20210326_.nwb,wilbur20210326_.nwb_A_1745f2


run spike sorting

In [18]:
sgs.SpikeSorting.populate()

In [19]:
sgs.SpikeSorting()

sorting_id,analysis_file_name  name of the file,object_id  Object ID for the sorting in NWB file,"time_of_sort  in Unix time, to the nearest second"
wilbur20210326_.nwb_S_f78ec9,wilbur20210326_FRIEZ5O5ZD.nwb,d160ce6c-9cd2-4d5a-962a-21a958045843,1700028234


we have two main ways of curating spike sorting: by computing quality metrics and applying threshold; and manually applying curation labels. to do so, we first insert CurationV1. use `insert_curation` method.

In [20]:
sgs.CurationV1.insert_curation(sorting_id="wilbur20210326_.nwb_S_f78ec9",
                               description="testing sort")

[{'sorting_id': 'wilbur20210326_.nwb_S_f78ec9', 'curation_id': 0}]

In [21]:
sgs.CurationV1()

sorting_id,curation_id,parent_curation_id,analysis_file_name  name of the file,object_id,merges_applied,description
wilbur20210326_.nwb_S_f78ec9,0,-1,wilbur20210326_VUWVJ7HJBL.nwb,a644b8c6-5f3b-4914-bb0b-ea349ef53826,False,testing sort
wilbur20210326_.nwb_S_f78ec9,1,0,wilbur20210326_TCDLNG711X.nwb,7b20aa0a-08d3-4dd2-b294-f557f82fa632,False,after metric curation


we will first do an automatic curation based on quality metrics

In [22]:
key = {
    "sorting_id":"wilbur20210326_.nwb_S_f78ec9",
    "curation_id":0,
    "waveform_param_name":"default_not_whitened",
    "metric_param_name":"franklab_default",
    "metric_curation_param_name":"default"
}

In [23]:
sgs.MetricCurationSelection.insert_selection(key)

This row has already been inserted into MetricCurationSelection.


{'metric_curation_id': 'wilbur20210326_.nwb_MC_d9e746',
 'sorting_id': 'wilbur20210326_.nwb_S_f78ec9',
 'curation_id': 0,
 'waveform_param_name': 'default_not_whitened',
 'metric_param_name': 'franklab_default',
 'metric_curation_param_name': 'default'}

In [24]:
sgs.MetricCurationSelection()

metric_curation_id,sorting_id,curation_id,waveform_param_name  name of waveform extraction parameters,metric_param_name,metric_curation_param_name
wilbur20210326_.nwb_MC_d9e746,wilbur20210326_.nwb_S_f78ec9,0,default_not_whitened,franklab_default,default


In [25]:
sgs.MetricCuration.populate()

In [26]:
sgs.MetricCuration()

metric_curation_id,analysis_file_name  name of the file,object_id  Object ID for the metrics in NWB file
wilbur20210326_.nwb_MC_d9e746,wilbur20210326_6F8ZNYCG6E.nwb,2366e71b-1a6a-4031-8fe4-b3c2b240841f


to do another round of curation, fetch the relevant info and insert back into CurationV1 using `insert_curation`

In [27]:
key = {"metric_curation_id":"wilbur20210326_.nwb_MC_d9e746"}

In [28]:
labels = sgs.MetricCuration.get_labels(key)

In [29]:
merge_groups = sgs.MetricCuration.get_merge_groups(key)

In [30]:
metrics = sgs.MetricCuration.get_metrics(key)

In [31]:
sgs.CurationV1.insert_curation(sorting_id="wilbur20210326_.nwb_S_f78ec9",
                               parent_curation_id=0,
                               labels=labels,
                               merge_groups= merge_groups,
                               metrics=metrics,
                               description="after metric curation")

Writing new NWB file wilbur20210326_VVA1RZ3YJQ.nwb


{'sorting_id': 'wilbur20210326_.nwb_S_f78ec9',
 'curation_id': 2,
 'parent_curation_id': 0,
 'analysis_file_name': 'wilbur20210326_VVA1RZ3YJQ.nwb',
 'object_id': 'c4acc8c7-8099-4ebf-a029-23bfd382e6be',
 'merges_applied': 'False',
 'description': 'after metric curation'}

In [32]:
# (sgs.CurationV1()&{'curation_id':1}).delete()

In [33]:
sgs.CurationV1()

sorting_id,curation_id,parent_curation_id,analysis_file_name  name of the file,object_id,merges_applied,description
wilbur20210326_.nwb_S_f78ec9,0,-1,wilbur20210326_VUWVJ7HJBL.nwb,a644b8c6-5f3b-4914-bb0b-ea349ef53826,False,testing sort
wilbur20210326_.nwb_S_f78ec9,1,0,wilbur20210326_TCDLNG711X.nwb,7b20aa0a-08d3-4dd2-b294-f557f82fa632,False,after metric curation
wilbur20210326_.nwb_S_f78ec9,2,0,wilbur20210326_VVA1RZ3YJQ.nwb,c4acc8c7-8099-4ebf-a029-23bfd382e6be,False,after metric curation


next we will do manual curation. this is done with figurl. to incorporate info from other stages of processing (e.g. metrics) we have to store that with kachery cloud and get curation uri referring to it. it can be done with `generate_curation_uri`.

In [34]:
curation_uri = sgs.FigURLCurationSelection.generate_curation_uri({"sorting_id":"wilbur20210326_.nwb_S_f78ec9",
                                                                  "curation_id":1})

In [35]:
key = {"sorting_id":"wilbur20210326_.nwb_S_f78ec9",
       "curation_id":1,
       "curation_uri": curation_uri,
       "metrics_figurl":list(metrics.keys())}

In [36]:
sgs.FigURLCurationSelection.insert1(key,skip_duplicates=True)

In [37]:
sgs.FigURLCurationSelection()

sorting_id,curation_id,curation_uri  GitHub-based URI to a file to which the manual curation will be saved,metrics_figurl  metrics to display in the figURL
wilbur20210326_.nwb_S_f78ec9,1,gh://LorenFrankLab/sorting-curations/main/khl02007/test/curation.json,=BLOB=
wilbur20210326_.nwb_S_f78ec9,1,sha1://2800ea072728fd141d8e5bc88525ac0c6c137d04,=BLOB=


In [38]:
sgs.FigURLCuration.populate()

In [39]:
sgs.FigURLCuration()

sorting_id,curation_id,curation_uri  GitHub-based URI to a file to which the manual curation will be saved,url
wilbur20210326_.nwb_S_f78ec9,1,gh://LorenFrankLab/sorting-curations/main/khl02007/test/curation.json,"https://figurl.org/f?v=gs://figurl/sortingview-11&d=sha1://cdff947c7c78c23c6f75e73055454431d1e4a0d0&s={""initialSortingCuration"":""gh://LorenFrankLab/sorting-curations/main/khl02007/test/curation.json"",""sortingCuration"":""gh://LorenFrankLab/sorting-curations/main/khl02007/test/curation.json""}&label=wilbur20210326_.nwb_R_9492dd%20wilbur20210326_.nwb_S_f78ec9"
wilbur20210326_.nwb_S_f78ec9,1,sha1://2800ea072728fd141d8e5bc88525ac0c6c137d04,"https://figurl.org/f?v=gs://figurl/sortingview-11&d=sha1://cddad5ccdab9c0548feb03a8c10b67b3907530a3&s={""initialSortingCuration"":""sha1://2800ea072728fd141d8e5bc88525ac0c6c137d04"",""sortingCuration"":""sha1://2800ea072728fd141d8e5bc88525ac0c6c137d04""}&label=wilbur20210326_.nwb_R_9492dd%20wilbur20210326_.nwb_S_f78ec9"


or you can manually specify it if you already have a `curation.json`

In [40]:
gh_curation_uri = "gh://LorenFrankLab/sorting-curations/main/khl02007/test/curation.json"

key = {"sorting_id" : "wilbur20210326_.nwb_S_f78ec9",
       "curation_id" : 1,
       "curation_uri" : gh_curation_uri
       "metrics_figurl" : []}

In [41]:
sgs.FigURLCurationSelection.insert1(key,skip_duplicates=True)

In [42]:
sgs.FigURLCuration.populate()

In [43]:
sgs.FigURLCuration()

sorting_id,curation_id,curation_uri  GitHub-based URI to a file to which the manual curation will be saved,url
wilbur20210326_.nwb_S_f78ec9,1,gh://LorenFrankLab/sorting-curations/main/khl02007/test/curation.json,"https://figurl.org/f?v=gs://figurl/sortingview-11&d=sha1://cdff947c7c78c23c6f75e73055454431d1e4a0d0&s={""initialSortingCuration"":""gh://LorenFrankLab/sorting-curations/main/khl02007/test/curation.json"",""sortingCuration"":""gh://LorenFrankLab/sorting-curations/main/khl02007/test/curation.json""}&label=wilbur20210326_.nwb_R_9492dd%20wilbur20210326_.nwb_S_f78ec9"
wilbur20210326_.nwb_S_f78ec9,1,sha1://2800ea072728fd141d8e5bc88525ac0c6c137d04,"https://figurl.org/f?v=gs://figurl/sortingview-11&d=sha1://cddad5ccdab9c0548feb03a8c10b67b3907530a3&s={""initialSortingCuration"":""sha1://2800ea072728fd141d8e5bc88525ac0c6c137d04"",""sortingCuration"":""sha1://2800ea072728fd141d8e5bc88525ac0c6c137d04""}&label=wilbur20210326_.nwb_R_9492dd%20wilbur20210326_.nwb_S_f78ec9"


once you apply manual curation (curation labels and merge groups) you can store them as nwb by inserting another row in CurationV1. And then you can do more rounds of curation if you want.

In [44]:
labels = sgs.FigURLCuration.get_labels("gh://LorenFrankLab/sorting-curations/main/khl02007/test/curation.json")

Exception: File not found: https://raw.githubusercontent.com/LorenFrankLab/sorting-curations/main/khl02007/test/curation.json

In [None]:
merge_groups = sgs.FigURLCuration.get_merge_groups("gh://LorenFrankLab/sorting-curations/main/khl02007/test/curation.json")

In [None]:
sgs.CurationV1.insert_curation(sorting_id="wilbur20210326_.nwb_S_f78ec9",
                               parent_curation_id=1,
                               labels=labels,
                               merge_groups= merge_groups,
                               metrics=metrics,
                               description="after figurl curation")