# Sorting Notebook

This notebook will download and sort electrophysiology collected using an Intan headstage, in the .rhd format. 

The data is intracranial mouse recording, from a 16 channel microarray. The paper can be found here: https://doi.org/10.1371/journal.pone.0221510


# Getting Set Up

Open a terminal. Make sure "Sorter" environment is active. 

```
conda deactivate
conda activate sorter
```

navigate to the correct directory, and examine the folders available. 

```
cd ~/codespace
box folders:items 352606395707
```

Download the patient level folder, which will contain multiple session folders. 
Replace with the correct file number.
```
box folders:download 123456789 --destination="data"
```

When finished, upload the sorted data back to box

```
box folders:upload "/home/marco/codespace/data/Intan_RDH_2000/Session2/sorted"   --parent-folder 352604968054
```

In [19]:
!box folders:items 352606395707

[2m----- Folder 352606396623 -----[22m
[36mType:[39m folder
[36mID:[39m '352606396623'
[36mSequence ID:[39m '0'
[36mETag:[39m '0'
[36mName:[39m Intan_RDH_2000

[2m----- Folder 354522525287 -----[22m
[36mType:[39m folder
[36mID:[39m '354522525287'
[36mSequence ID:[39m '0'
[36mETag:[39m '0'
[36mName:[39m Intan_RDH_2000 (1)


In [None]:
# Add helpers to path if needed

# import sys
# from pathlib import Path
# sys.path.append(str(Path.home() / "codespace/sorting_script"))
# sys.path


['/home/marco/miniconda3/envs/sorter/lib/python311.zip',
 '/home/marco/miniconda3/envs/sorter/lib/python3.11',
 '/home/marco/miniconda3/envs/sorter/lib/python3.11/lib-dynload',
 '',
 '/home/marco/miniconda3/envs/sorter/lib/python3.11/site-packages',
 '/home/marco/codespace',
 '/home/marco/codespace/sorting_script/scripts',
 '/home/marco/codespace/sorting_script']

In [None]:
from scripts import box_managment

In [None]:
# Download data if needede

from pathlib import Path
import os

# ---- USER INPUT ONLY ----
patient_name = "Intan_RDH_2000"
# -------------------------

# 1) Download full patient folder (all sessions) from Box
patient_local_root = box_managment.download_patient(patient_name)

# 2) When you're done sorting all the sessions you care about,
#    push ALL sessions' `sorted/` folders back to Box:
# upload_all_sorted(patient_name)


In [1]:
# Set Patient and Session
patient = "Intan_RDH_2000"
session = "Session1"

In [None]:
import os
from pathlib import Path

# Set base paths
codespace = Path.home() / "codespace"
base_folder = codespace / "data"
session_location =  base_folder / patient / session
sorted_data = session_location / "sorted"
sorter_output_folder = sorted_data / "sorter_folder" 

analyzer_folder = sorted_data / "analyzer_folder"

os.chdir(session_location)

# Dynamically retrieve the .rhd file in the session folder
raw_folder = session_location / "raw"
rhd_files = list(raw_folder.glob("*.rhd"))
if len(rhd_files) == 0:
    raise FileNotFoundError(f"No .rhd file found in {raw_folder}")
elif len(rhd_files) > 1:
    print(f"Warning: Multiple .rhd files found in {raw_folder}, using the first one: {rhd_files[0].name}")
intan_file = rhd_files[0]
print(f"Found Intan file: {intan_file}")

Found Intan file: /home/marco/codespace/data/Intan_RDH_2000/Session1/raw/Intan RHD file1.rhd


# Load recording into spike interface

In [None]:
# Load Intan
from spikeinterface.sorters import run_sorter
import spikeinterface.full as si
import probeinterface as pi
from pathlib import Path

# Load Recording, creates recording object
rec = si.read_intan(intan_file, stream_id = "0")
rec

In [None]:
# # Create custom probe geometry if needed
# import probeinterface as pi

# probe = pi.Probe(ndim=2)
# positions = []

# for i in range(16):
#     positions.append([0, i * 50])
# probe.set_contacts(positions = positions, shapes = "circle", shape_params = {'radius':5})

# probe.set_device_channel_indices(range(16))
# probe.set_contact_ids([f"ch{i}" for i in range(16)])

# probe_path = codespace / "sorting_script/Custom_Probes/neuronexus-A16x1_2mm_50_177_A16.json"
# pi.write_probeinterface(probe_path, probe)


In [38]:
# Attach probe to recording object

import probeinterface as pi
# from probeinterface.plotting import plot_probe, plot_probegroup

probe_path = codespace / "sorting_script/Custom_Probes/neuronexus-A16x1_2mm_50_177_A16.json"

# Load from JSON
probegroup = pi.read_probeinterface(probe_path)

# Extract the single Probe for SpikeInterface
probe = probegroup.probes[0]

# Attach to recording
rec = rec.set_probe(probe)

n_rec = rec.get_num_channels()
n_probe = probe.get_contact_count()

if n_probe != n_rec:
    raise ValueError(f"Probe contacts ({n_probe}) != recording channels ({n_rec}). "
                     f"Pick the correct probe variant or subset/remap accordingly.")


# Load sorter and analyzer if they exist

In [None]:
# Load sorter and analyzer if they exist
import spikeinterface.full as si
import spikeinterface.sorters as ss

sorter_folder = r"/home/marco/codespace/data/Intan_RDH_2000/Session1/sorted/sorter_folder"
sorting_KS4 = ss.read_sorter_folder(sorter_folder)

analyzer_folder = r"/home/marco/codespace/data/Intan_RDH_2000/Session1/sorted/analyzer_folder"
sorting_analyzer = si.load_sorting_analyzer(analyzer_folder)

# If they do not exist, run the sorter and create analyzer

In [None]:
# Run Kilosort, in order to create sorting object as well as sorting folder

sorting_KS4 = run_sorter(
    sorter_name="kilosort4",
    recording=rec,
    folder=sorter_output_folder,
    remove_existing_folder = True,
    verbose = True
)

write_binary_recording (no parallelization):   0%|          | 0/1201 [00:00<?, ?it/s]

kilosort.run_kilosort:  
kilosort.run_kilosort: Computing preprocessing variables.
kilosort.run_kilosort: ----------------------------------------
kilosort.run_kilosort: N samples: 24000480
kilosort.run_kilosort: N seconds: 1200.024
kilosort.run_kilosort: N batches: 401
kilosort.run_kilosort: Preprocessing filters computed in 0.94s; total 0.94s
kilosort.run_kilosort:  
kilosort.run_kilosort: Resource usage after preprocessing
kilosort.run_kilosort: ********************************************************
kilosort.run_kilosort: CPU usage:     6.70 %
kilosort.run_kilosort: Mem used:      5.20 %     |       3.22 GB
kilosort.run_kilosort: Mem avail:    58.88 / 62.10 GB
kilosort.run_kilosort: ------------------------------------------------------
kilosort.run_kilosort: GPU usage:    `conda install pynvml` for GPU usage
kilosort.run_kilosort: GPU memory:    1.86 %     |      0.27   /    14.58 GB
kilosort.run_kilosort: Allocated:     0.06 %     |      0.01   /    14.58 GB
kilosort.run_kilosor

kilosort4 run time 43.83s


In [None]:
# Create Sorting Analyzer
import spikeinterface.full as si

# Load Recording
recording = si.read_intan(intan_file, stream_id = "0")
recording = recording.set_probe(probe, in_place=False)
recording = si.unsigned_to_signed(recording)
recording_filtered = si.bandpass_filter(recording)

job_kwargs = dict(n_jobs=-1, progress_bar=True, chunk_duration="1s")

sorting_analyzer = si.create_sorting_analyzer(sorting=sorting_KS4, recording=recording_filtered, folder=analyzer_folder, overwrite = True,
format="binary_folder", **job_kwargs)

sorting_analyzer.compute("random_spikes", method="uniform", max_spikes_per_unit=500)
sorting_analyzer.compute("waveforms", **job_kwargs)
sorting_analyzer.compute("templates", **job_kwargs)
sorting_analyzer.compute("noise_levels")
sorting_analyzer.compute("unit_locations", method = "monopolar_triangulation")
sorting_analyzer.compute("isi_histograms")
sorting_analyzer.compute("correlograms", window_ms=100, bin_ms=5)
sorting_analyzer.compute("principal_components", n_components=3, mode="by_channel_global", whiten=True, **job_kwargs)
sorting_analyzer.compute("quality_metrics", metric_names=["snr", "firing_rate"])
sorting_analyzer.compute("template_similarity")
sorting_analyzer.compute("spike_amplitudes", **job_kwargs)

# Now that Sorter and Analyzer each exist, run the curation gui from the terminal
## Make sure to replace with the correct path


```

sigui --mode=web --curation "/home/marco/codespace/data/Intan_RDH_2000/Session1/sorted/analyzer_folder"

```

In [None]:
# Load Sorting Analyzer

import json
import spikeinterface as si
from spikeinterface.curation import apply_curation
from pathlib import Path

curation_filepath = f"{analyzer_folder}/spikeinterface_gui/curation_data.json"

with open(curation_filepath, "r") as f:
    curation_dict = json.load(f)

sorting_analyzer = si.load_sorting_analyzer(folder=analyzer_folder)

In [18]:
# Apply curation and save new analyzer to disk

clean_analyzer = apply_curation(sorting_analyzer, curation_dict_or_model=curation_dict)

out = Path(f"{sorted_data}/cleaned_analyzer")

clean_analyzer = clean_analyzer.save_as(format="zarr", folder=out)

print("Wrote:", out.with_suffix(".zarr") if out.suffix != ".zarr" else out)

Wrote: /home/marco/codespace/data/Intan_RDH_2000/Session1/sorted/cleaned_analyzer.zarr


In [43]:
sorting_obj = clean_analyzer.sorting

In [44]:
unit_ids = sorting_obj.unit_ids
sampling_frequency = sorting_obj.sampling_frequency
print(f"Unit IDs: {unit_ids}")
print(f"Sampling Frequency: {sampling_frequency} Hz")

Unit IDs: [0 2]
Sampling Frequency: 20000.0 Hz


In [51]:
# Check out the spike times for the unit

unit_to_get = unit_ids[0] # Get the first unit
spike_train_indices = sorting_obj.get_unit_spike_train(unit_id=unit_to_get, segment_index=0) # segment_index=0 for single-segment data

spike_times = sorting_obj.get_unit_spike_train(unit_id=unit_to_get)

sampling_frequency = sorting_obj.get_sampling_frequency()

spike_times_sec = spike_times / sampling_frequency

print(spike_times_sec[:10])



[ 36.63165 134.88705 165.07295 290.03875 356.3038  385.10455 393.86645
 432.13435 449.13645 471.8905 ]


In [52]:
!box folders:items 352606395707

[2m----- Folder 352606396623 -----[22m
[36mType:[39m folder
[36mID:[39m '352606396623'
[36mSequence ID:[39m '0'
[36mETag:[39m '0'
[36mName:[39m Intan_RDH_2000

[2m----- Folder 354522525287 -----[22m
[36mType:[39m folder
[36mID:[39m '354522525287'
[36mSequence ID:[39m '0'
[36mETag:[39m '0'
[36mName:[39m Intan_RDH_2000 (1)


In [53]:
!box folders:items 352606396623

[2m----- Folder 352605477299 -----[22m
[36mType:[39m folder
[36mID:[39m '352605477299'
[36mSequence ID:[39m '0'
[36mETag:[39m '0'
[36mName:[39m Session1

[2m----- Folder 352604968054 -----[22m
[36mType:[39m folder
[36mID:[39m '352604968054'
[36mSequence ID:[39m '0'
[36mETag:[39m '0'
[36mName:[39m Session2


In [54]:
!box folders:items 352605477299

[2m----- Folder 352607353389 -----[22m
[36mType:[39m folder
[36mID:[39m '352607353389'
[36mSequence ID:[39m '0'
[36mETag:[39m '0'
[36mName:[39m raw

[2m----- Folder 354623627238 -----[22m
[36mType:[39m folder
[36mID:[39m '354623627238'
[36mSequence ID:[39m '0'
[36mETag:[39m '0'
[36mName:[39m sorted


In [None]:
!box folders:upload 354623627238

[2m----- Folder 354623330278 -----[22m
[36mType:[39m folder
[36mID:[39m '354623330278'
[36mSequence ID:[39m '0'
[36mETag:[39m '0'
[36mName:[39m analyzer_folder

[2m----- Folder 354622496806 -----[22m
[36mType:[39m folder
[36mID:[39m '354622496806'
[36mSequence ID:[39m '0'
[36mETag:[39m '0'
[36mName:[39m sorter_folder


In [58]:
!box folders:upload "/home/marco/codespace/data/Intan_RDH_2000/Session1/sorted/cleaned_analyzer.zarr" -p 354623627238

[36mType:[39m folder
[36mID:[39m '355655922049'
[36mSequence ID:[39m '0'
[36mETag:[39m '0'
[36mName:[39m cleaned_analyzer.zarr
[36mCreated At:[39m '2025-12-12T11:40:22-08:00'
[36mModified At:[39m '2025-12-12T11:40:22-08:00'
[36mDescription:[39m ''
[36mSize:[39m 0
[36mPath Collection:[39m
[36m    Total Count:[39m 5
[36m    Entries:[39m
[36m        -[39m
[36m            Type:[39m folder
[36m            ID:[39m '0'
[36m            Sequence ID:[39m null
[36m            ETag:[39m null
[36m            Name:[39m All Files
[36m        -[39m
[36m            Type:[39m folder
[36m            ID:[39m '352606395707'
[36m            Sequence ID:[39m '0'
[36m            ETag:[39m '0'
[36m            Name:[39m Cloud_Sorter
[36m        -[39m
[36m            Type:[39m folder
[36m            ID:[39m '352606396623'
[36m            Sequence ID:[39m '0'
[36m            ETag:[39m '0'
[36m            Name:[39m Intan_RDH_2000
[36m        -[39m
[36m