# Sorting Notebook

This notebook will download and sort electrophysiology collected using an Intan headstage, in the glx format

The data is intracranial human recordings, and can be found here: https://datadryad.org/dataset/doi%3A10.7272/Q6ST7N3B#methods




In [1]:
from pathlib import Path
import requests, zipfile, io

url = "https://datadryad.org/downloads/file_stream/1742565"
base_folder = Path.cwd() / "Data"
zip_path = base_folder / "NP03_B02_g0_t0.imec0.ap.bin"
# extract_to = base_folder / "intan_data"


print("Downloading File...")
response = requests.get(url, stream=True)
response.raise_for_status()

# with open(zip_path, "wb") as f:
#     for chunk in response.iter_content(chunk_size=8192):
#         f.write(chunk)

print(f"Download complete: {zip_path}")

# 3. Unzip to a folder
# print("Extracting files...")
# with zipfile.ZipFile(zip_path, 'r') as zip_ref:
#     zip_ref.extractall(extract_to)

# print(f"Files extracted to: {extract_to}")

Downloading File...


HTTPError: 403 Client Error: Forbidden for url: https://datadryad.org/downloads/file_stream/1742565

# Load recording into spike interface

In [1]:
# Load Intan
from spikeinterface.sorters import run_sorter
import spikeinterface.full as si
import probeinterface as pi
from pathlib import Path

base_folder = Path.cwd() / "Data"
glx_folder = base_folder / "glx"

# Load Recording
rec = si.read_spikeglx(folder_path = glx_folder, stream_id="imec0.ap")
rec


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
probegroup = rec.get_probegroup()
probe = probegroup.probes[0]

n_rec = rec.get_num_channels()
n_probe_contacts = probe.get_contact_count()

assert n_rec <= n_probe_contacts, (
    f"Recording channels ({n_rec}) cannot exceed probe sites ({n_probe_contacts})."
)


In [None]:
import os
from pathlib import Path
import spikeinterface as si
import spikeinterface.preprocessing as spre
from spikeinterface.sorters import run_sorter

# (optional) reduce CUDA fragmentation before importing torch-heavy bits elsewhere
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True,max_split_size_mb:256"

# ----- cache to on-disk binary (parallelized) -----
cache_dir = Path("cache_np1_binary")
cache_dir.mkdir(parents=True, exist_ok=True)

rec_pp = spre.bandpass_filter(rec, freq_min=300, freq_max=6000)
# If you common-reference here, don't also CR inside KS4.
# rec_pp = spre.common_reference(rec_pp, operator="median")

rec_pp.save_to_folder(
    folder=str(cache_dir),
    format="binary",          # <-- your SI wants "binary" (not "binary_folder")
    n_jobs=12,
    chunk_duration="1s",
    progress_bar=True,
    overwrite=True
)

# ----- reopen cached extractor (portable across SI versions) -----
try:
    # Newer convenience (may not exist on your build)
    rec_cached = si.read_binary_folder(cache_dir)
except Exception:
    try:
        # Works on many SI versions; auto-detects the saved extractor type
        rec_cached = si.load_extractor(cache_dir)
    except Exception:
        # Lowest-level fallback
        from spikeinterface.core import BinaryFolderRecording
        rec_cached = BinaryFolderRecording(cache_dir)

# ----- run Kilosort4 with tighter VRAM budget -----
ks4_params = dict(
    batch_size=64_000,   # if OOM, drop to 48_000 or 32_000
    # device='cuda',
    # num_workers=4,      # only if your SI build exposes it
)

sorting = run_sorter(
    sorter_name="kilosort4",
    recording=rec_cached,
    folder="Data/glx_output",
    verbose=True,
    **ks4_params
)


write_binary_recording 
engine=process - n_jobs=12 - samples_per_chunk=30,000 - chunk_memory=21.97 MiB - total_memory=263.67 MiB - chunk_duration=1.00s (999.98 ms)


write_binary_recording (workers: 12 processes): 100%|██████████| 765/765 [02:14<00:00,  5.69it/s]
kilosort.run_kilosort:  
kilosort.run_kilosort: Computing preprocessing variables.
kilosort.run_kilosort: ----------------------------------------
kilosort.run_kilosort: N samples: 22942599
kilosort.run_kilosort: N seconds: 764.7381729422808
kilosort.run_kilosort: N batches: 359
kilosort.run_kilosort: Preprocessing filters computed in 1.84s; total 1.85s
kilosort.run_kilosort:  
kilosort.run_kilosort: Resource usage after preprocessing
kilosort.run_kilosort: ********************************************************
kilosort.run_kilosort: CPU usage:     6.60 %
kilosort.run_kilosort: Mem used:      7.80 %     |       4.85 GB
kilosort.run_kilosort: Mem avail:    57.25 / 62.10 GB
kilosort.run_kilosort: ------------------------------------------------------
kilosort.run_kilosort: GPU usage:    `conda install pynvml` for GPU usage
kilosort.run_kilosort: GPU memory:   38.63 %     |      5.63   /   

In [None]:
# Run Kilosort
sorting_KS4 = run_sorter(
    sorter_name="kilosort4",
    recording=rec,
    folder="Data/glx_output",
    verbose=True
)

write_binary_recording (no parallelization): 100%|██████████| 765/765 [02:13<00:00,  5.71it/s]
kilosort.run_kilosort:  
kilosort.run_kilosort: Computing preprocessing variables.
kilosort.run_kilosort: ----------------------------------------
kilosort.run_kilosort: N samples: 22942599
kilosort.run_kilosort: N seconds: 764.7381729422808
kilosort.run_kilosort: N batches: 383
kilosort.run_kilosort: Preprocessing filters computed in 2.10s; total 2.10s
kilosort.run_kilosort:  
kilosort.run_kilosort: Resource usage after preprocessing
kilosort.run_kilosort: ********************************************************
kilosort.run_kilosort: CPU usage:     7.50 %
kilosort.run_kilosort: Mem used:      6.60 %     |       4.10 GB
kilosort.run_kilosort: Mem avail:    58.00 / 62.10 GB
kilosort.run_kilosort: ------------------------------------------------------
kilosort.run_kilosort: GPU usage:    `conda install pynvml` for GPU usage
kilosort.run_kilosort: GPU memory:   12.91 %     |      1.88   /    14

In [None]:
# Create Sorting Analyzer
import spikeinterface.full as si

base_folder = Path.cwd() / "Data"
intan_file = base_folder / "intan_data/Intan RHD 2000 file of electrophysiological recordings/Intan RHD file1.rhd"

# Load Recording
recording = si.read_intan(intan_file, stream_id = "0")
recording = recording.set_probe(probe, in_place=False)
recording = si.unsigned_to_signed(recording)
recording_filtered = si.bandpass_filter(recording)

job_kwargs = dict(n_jobs=-1, progress_bar=True, chunk_duration="1s")

sorting_analyzer = si.create_sorting_analyzer(sorting_KS4, recording_filtered, overwrite = True,
format="binary_folder", folder="Data/intan_analyzer",
**job_kwargs )
sorting_analyzer.compute("random_spikes", method="uniform", max_spikes_per_unit=500)
sorting_analyzer.compute("waveforms", **job_kwargs)
sorting_analyzer.compute("templates", **job_kwargs)
sorting_analyzer.compute("noise_levels")
sorting_analyzer.compute("unit_locations", method = "monopolar_triangulation")
sorting_analyzer.compute("isi_histograms")
sorting_analyzer.compute("correlograms", window_ms=100, bin_ms=5)
sorting_analyzer.compute("principal_components", n_components=3, mode="by_channel_global", whiten=True, **job_kwargs)
sorting_analyzer.compute("quality_metrics", metric_names=["snr", "firing_rate"])
sorting_analyzer.compute("template_similarity")
sorting_analyzer.compute("spike_amplitudes", **job_kwargs)

estimate_sparsity (workers: 16 processes): 100%|██████████| 1201/1201 [00:00<00:00, 1709.69it/s]
compute_waveforms (workers: 16 processes): 100%|██████████| 1201/1201 [00:01<00:00, 1047.96it/s]
noise_level (no parallelization): 100%|██████████| 20/20 [00:00<00:00, 96.59it/s]
Fitting PCA: 100%|██████████| 29/29 [00:00<00:00, 91.85it/s]
Projecting waveforms: 100%|██████████| 29/29 [00:00<00:00, 549.37it/s]
spike_amplitudes (workers: 16 processes): 100%|██████████| 1201/1201 [00:01<00:00, 648.90it/s]


<spikeinterface.postprocessing.spike_amplitudes.ComputeSpikeAmplitudes at 0x78cb58ff4790>

In [None]:
# Run Curation GUI
import spikeinterface.full as si
from spikeinterface_gui import run_mainwindow

sorting_analyzer = si.load_sorting_analyzer(folder="Data/intan_analyzer")

run_mainwindow(sorting_analyzer, mode="web", curation=True)


   pip install jupyter_bokeh

or:
    conda install jupyter_bokeh

and try again.
  pn.extension("tabulator")



   pip install jupyter_bokeh

or:
    conda install jupyter_bokeh

and try again.
  pn.extension("tabulator")



   pip install jupyter_bokeh

or:
    conda install jupyter_bokeh

and try again.
  pn.extension("tabulator")



   pip install jupyter_bokeh

or:
    conda install jupyter_bokeh

and try again.
  pn.extension("tabulator")



   pip install jupyter_bokeh

or:
    conda install jupyter_bokeh

and try again.
  pn.extension("gridstack")


Found available port: 56439
Launching server at http://localhost:56439


<spikeinterface_gui.backend_panel.PanelMainWindow at 0x78cbf0144c10>

ERROR:tornado.application:Exception in callback functools.partial(<bound method IOLoop._discard_future_result of <tornado.platform.asyncio.AsyncIOMainLoop object at 0x78cc081efd10>>, <Task finished name='Task-31649' coro=<ServerSession.with_document_locked() done, defined at /home/ubuntu/miniconda3/envs/sorter/lib/python3.11/site-packages/bokeh/server/session.py:77> exception=RuntimeError("Models must be owned by only a single document, Range1d(id='33ba933f-b522-4932-91cf-a18b40c98108', ...) is already in a doc")>)
Traceback (most recent call last):
  File "/home/ubuntu/miniconda3/envs/sorter/lib/python3.11/site-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
          ^^^^^^^^^^
  File "/home/ubuntu/miniconda3/envs/sorter/lib/python3.11/site-packages/tornado/ioloop.py", line 782, in _discard_future_result
    future.result()
  File "/home/ubuntu/miniconda3/envs/sorter/lib/python3.11/site-packages/bokeh/server/session.py", line 94, in _needs_document_lock_w

In [2]:
# python
from one.api import ONE
from pathlib import Path

DATA = Path("/home/ubuntu/ibl_cache")
DATA.mkdir(parents=True, exist_ok=True)

one = ONE(base_url='https://openalyx.internationalbrainlab.org', cache_dir=DATA)

# Find probe insertions that include hippocampal CA1
ins = one.alyx.rest('insertions', 'list', atlas_acronym='CA1', project='brainwide')
assert ins, "No CA1 insertions found."

pid = ins[0]['id']                         # pick one
eid = one.pid2eid(pid)[0]                  # session id
probe_label = ins[0]['name']               # e.g. 'probe00'

print("Picked:", pid, eid, probe_label)

# List raw AP datasets for that probe (SpikeGLX compressed triplet)
dsets = one.list_datasets(
    eid,
    collection=f'raw_ephys_data/{probe_label}',
    filename='*_imec0.ap.*'
)
print("\nRaw AP files in collection:")
for d in dsets:
    print(" -", d)

# Download them (this will fetch .cbin, .meta, .ch)
files, _ = one.load_datasets(eid, dsets, download_only=True)
print("\nDownloaded:")
for f in files:
    print(" -", f)


ONE Parameter files location: /home/ubuntu/.one


HTTPError: [Errno 400] https://openalyx.internationalbrainlab.org/auth-token: 'Alyx authentication failed with credentials: user = intbrainlab, password = None'