<a href="https://colab.research.google.com/github/IanQS/neuromatch_project/blob/main/NMA_2023_IBL_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

First install the IBL pipeline, which NMA curated to only have the behavioral data, we'll have to import the database

In [None]:
# install IBL pipeline package to access and navigate the pipeline
!pip install --quiet nma-ibl

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.0/99.0 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.4/77.4 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.3/4.3 MB[0m [31m46.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m45.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.1/82.1 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25h

Configure datajoint to link up with the NMA IBL database

In [None]:
import datajoint as dj
dj.config['database.host'] = 'datajoint-public.internationalbrainlab.org'
dj.config['database.user'] = 'ibl-public'
dj.config['database.password'] = 'ibl-public'

from nma_ibl import reference, subject, action, acquisition, data, behavior, behavior_analyses

[2023-07-14 14:34:36,576][INFO]: Connecting ibl-public@datajoint-public.internationalbrainlab.org:3306
INFO:datajoint:Connecting ibl-public@datajoint-public.internationalbrainlab.org:3306
[2023-07-14 14:34:36,979][INFO]: Connected ibl-public@datajoint-public.internationalbrainlab.org:3306
INFO:datajoint:Connected ibl-public@datajoint-public.internationalbrainlab.org:3306


In [None]:
#imports here
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score

Below we pip install the library we need for working with the IBL ephys data

In [None]:
!pip install -q ONE-api ibllib

# Turn off logging, this is a hidden cell on docs page
import logging
logger = logging.getLogger('ibllib')
logger.setLevel(logging.CRITICAL)

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.3/104.3 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [31m63.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.8/57.8 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.7/135.7 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m237.9/237.9 kB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.4/176.4 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

We should load the data from NMA (IBL Behavior) and the data from IBL themselves (the cleaned up spikes) below in a way that makes sense

In [None]:
#here we run imports for the IBL ephys data
from one.api import ONE
from brainbox.io.one import SpikeSortingLoader
from ibllib.atlas import AllenAtlas

Now we set up the interface with the ephys database, first we authenticate with the public password, then make a list of **probe IDs** with ```pids```, load a particular **pid** with ```SpikeSortingLoader()```

In [None]:
ONE.setup(base_url='https://openalyx.internationalbrainlab.org', silent=True)
one = ONE(password='international')
# one = ONE()
ba = AllenAtlas()
pids = [
   '1a276285-8b0e-4cc9-9f0a-a3a002978724',
   '1e104bf4-7a24-4624-a5b2-c2c8289c0de7',
   '5d570bf6-a4c6-4bf1-a14b-2c878c84ef0e',
   '5f7766ce-8e2e-410c-9195-6bf089fea4fd',
   '6638cfb3-3831-4fc2-9327-194b76cf22e1',
   '749cb2b7-e57e-4453-a794-f6230e4d0226',
   'd7ec0892-0a6c-4f4f-9d8f-72083692af5c',
   'da8dfec1-d265-44e8-84ce-6ae9c109b8bd',
   'dab512bd-a02d-4c1f-8dbc-9155a163efc0',
   'dc7e9403-19f7-409f-9240-05ee57cb7aea',
   'e8f9fba4-d151-4b00-bee7-447f0f3e752c',
   'eebcaf65-7fa4-4118-869d-a084e84530e2',
   'fe380793-8035-414e-b000-09bfe5ece92a',
]
pid = pids[0]
eid, name = one.pid2eid(pid)

sl = SpikeSortingLoader(pid=pid, one=one, atlas=ba)
spikes, clusters, channels = sl.load_spike_sorting()
clusters = sl.merge_clusters(spikes, clusters, channels)

Connected to https://openalyx.internationalbrainlab.org as user "intbrainlab"
Downloading: /root/Downloads/ONE/openalyx.internationalbrainlab.org/tmpu2ormlbt/cache.zip Bytes: 11816964


100%|██████████| 11.269535064697266/11.269535064697266 [00:00<00:00, 52.05it/s]


Downloading: /root/Downloads/ONE/openalyx.internationalbrainlab.org/histology/ATLAS/Needles/Allen/average_template_25.nrrd Bytes: 32998960


100%|██████████| 31.470260620117188/31.470260620117188 [00:06<00:00,  4.63it/s]


Downloading: /root/Downloads/ONE/openalyx.internationalbrainlab.org/histology/ATLAS/Needles/Allen/annotation_25.nrrd Bytes: 4035363


100%|██████████| 3.848422050476074/3.848422050476074 [00:04<00:00,  1.05s/it]
/root/Downloads/ONE/openalyx.internationalbrainlab.org/cortexlab/Subjects/KS046/2020-12-03/001/alf/probe00/pykilosort/spikes.times.npy: 100%|██████████| 94.3M/94.3M [00:01<00:00, 49.6MB/s]
/root/Downloads/ONE/openalyx.internationalbrainlab.org/cortexlab/Subjects/KS046/2020-12-03/001/alf/probe00/pykilosort/spikes.amps.npy: 100%|██████████| 94.3M/94.3M [00:00<00:00, 110MB/s]
/root/Downloads/ONE/openalyx.internationalbrainlab.org/cortexlab/Subjects/KS046/2020-12-03/001/alf/probe00/pykilosort/spikes.depths.npy: 100%|██████████| 94.3M/94.3M [00:00<00:00, 128MB/s]
/root/Downloads/ONE/openalyx.internationalbrainlab.org/cortexlab/Subjects/KS046/2020-12-03/001/alf/probe00/pykilosort/spikes.clusters.npy: 100%|██████████| 47.2M/47.2M [00:00<00:00, 103MB/s]
/root/Downloads/ONE/openalyx.internationalbrainlab.org/cortexlab/Subjects/KS046/2020-12-03/001/alf/probe00/pykilosort/clusters.depths.npy: 100%|██████████| 3.76k/3.76

After loading in the data sets we should extract relevant details to structure that's gonna work easier with numpy and scikitlearn.

w1d3 notebook loads the Steinmetz data as a dictionary, ```spikes```: an array of normalized spike rates with shape (n_trials, n_neurons), and ```choices```: a vector of 0s and 1s, indicating the animal's behavioural response, with length n_trials.



In [None]:
#squish data into something easy to work with, a dictionary of arrays works

Then just train a model and instead of just taking vanilla accuracy we can use cross-validation

In [None]:
##   accuracies = cross_val_score(LogisticRegression(penalty=None), spikes, choices, cv=8)  # k=8 cross validation

I can imagine we're gonna have WAY more features than samples if we take each neuron to be a feature