Getting the subset of IBL data to test on.

In [2]:
import os
import subprocess 
import numpy as np
import pandas as pd
from one.api import ONE
import random

from one.api import ONE
ONE.setup(base_url='https://openalyx.internationalbrainlab.org', silent=True)
one = ONE(password='international')

In [3]:
#Searching for datasets
brain_acronym = 'CA1'
# query sessions endpoint
sessions, sess_details = one.search(atlas_acronym=brain_acronym, query_type='remote', details=True)
print(f'No. of detected sessions: {len(sessions)}')

# query insertions endpoint
insertions = one.search_insertions(atlas_acronym=brain_acronym)
print(f'No. of detected insertions: {len(insertions)}')

No. of detected sessions: 201
No. of detected insertions: 201


In [4]:
tags = {t['name']: t['description'] for t in one.alyx.rest('tags', 'list') if t['public']}
for key, value in tags.items():
    print(f"{key}\n{value}\n")

2021_Q1_IBL_et_al_Behaviour
https://doi.org/10.7554/eLife.63711

2021_Q2_PreRelease
https://figshare.com/articles/online_resource/Spike_sorting_pipeline_for_the_International_Brain_Laboratory/19705522/3

2021_Q2_Varol_et_al
https://doi.org/10.1109/ICASSP39728.2021.9414145

2021_Q3_Whiteway_et_al
https://doi.org/10.1371/journal.pcbi.1009439

2022_Q2_IBL_et_al_RepeatedSite
https://doi.org/10.1101/2022.05.09.491042

2022_Q3_IBL_et_al_DAWG
https://doi.org/10.1101/827873

2022_Q4_IBL_et_al_BWM
https://figshare.com/articles/preprint/Data_release_-_Brainwide_map_-_Q4_2022/21400815

2023_Q1_Biderman_Whiteway_et_al


2023_Q1_Mohammadi_et_al


2023_Q3_Findling_Hubert_et_al
https://doi.org/10.1101/2023.07.04.547684

2023_Q4_Bruijns_et_al


2023_Q4_IBL_et_al_BWM_2


2023_Q4_IBL_et_al_BWM_passive


Brainwidemap




In [5]:
# get all sessions from the paper with the adjusted priors
one.load_cache(tag = '2023_Q3_Findling_Hubert_et_al',)
sessions_adjusted_priors = one.search()  # All sessions used in the Findling paper

# To return to the full cache containing an index of all IBL experiments
ONE.cache_clear()
one = ONE(base_url='https://openalyx.internationalbrainlab.org')

Downloading: /space/scratch/IBL_data_cache/2023_Q3_Findling_Hubert_et_al/tmp8gso6ued/cache.zip Bytes: 63104


100%|██████████| 0.0601806640625/0.0601806640625 [00:00<00:00,  1.02it/s]


In [6]:
# get all sessions from the paper with the adjusted priors
one.load_cache(tag = '2022_Q2_IBL_et_al_RepeatedSite',)
sessions_rep_sites = one.search()  # All sessions used in the Findling paper

# To return to the full cache containing an index of all IBL experiments
ONE.cache_clear()
one = ONE(base_url='https://openalyx.internationalbrainlab.org')

Downloading: /space/scratch/IBL_data_cache/2022_Q2_IBL_et_al_RepeatedSite/tmpkl2gk84s/cache.zip Bytes: 339313


100%|██████████| 0.3235940933227539/0.3235940933227539 [00:00<00:00,  1.29it/s]


In [7]:
# here we querry the session with ca1 to see which ones are from the Findling et al.,(2023) paper with changing priors
all_sesh_with_ca1_eid = [eid for eid in sessions]


In [17]:
# How many sessions with CA1 recordings aref rom Fidling et al., (2023)?
print(np.sum(np.isin(all_sesh_with_ca1_eid, sessions_adjusted_priors)))
np.isin(all_sesh_with_ca1_eid, sessions_adjusted_priors)

13


array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False,  True, False, False, False, False, False,  True, False,
       False, False,  True, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
        True,  True, False, False, False,  True,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False,  True, False, False, False, False,
       False, False,

In [16]:
# How many sessions with CA1 recordings are from the rep sites paper?
print(np.sum(np.isin(all_sesh_with_ca1_eid, sessions_rep_sites)))
np.isin(all_sesh_with_ca1_eid, sessions_rep_sites)

85


array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False,  True,  True, False, False,  True,  True,
        True, False,  True,  True,  True, False,  True,  True,  True,
        True,  True, False, False,  True,  True,  True,  True, False,
        True,  True,  True,  True,  True, False, False,  True,  True,
       False, False, False,  True,  True,  True,  True, False,  True,
        True,  True,  True,  True,  True, False,  True,  True, False,
       False, False,  True, False, False,  True,  True,  True, False,
        True, False,  True, False,  True, False, False,  True,  True,
        True, False,  True,  True,  True, False, False,  True, False,
       False, False, False,  True, False, False,  True,  True,  True,
       False,  True,

In [71]:
# hpw many are in both?
print(np.sum(np.isin(all_sesh_with_ca1_eid, sessions_rep_sites) & np.isin(all_sesh_with_ca1_eid, sessions_adjusted_priors)))
np.isin(all_sesh_with_ca1_eid, sessions_rep_sites) & np.isin(all_sesh_with_ca1_eid, sessions_adjusted_priors)

4


array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False,  True, False,
       False, False,  True, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False,  True,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [11]:
# how many from the Findling et al. (2023) collection have CA1 recordings?
np.isin( sessions_adjusted_priors, all_sesh_with_ca1_eid)

array([False, False, False, False,  True,  True, False, False,  True,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,  True, False, False, False, False, False, False,
        True, False, False, False, False, False, False, False,  True,
        True, False, False, False,  True,  True,  True, False, False])

In [20]:
# I'm going to pick ten random recordings from each to test my workflows on before running the whole dataset
sesh_list_full = np.array([x for x in sessions])

# get the subset of sessions from the Findling et al., (2023) paper
subset_ids = list(sesh_list_full[np.isin(all_sesh_with_ca1_eid, sessions_adjusted_priors)]) # list() is required for sample to work

# append 10 random sessions not from the Findling et al., (2023) paper
reps_not_in_priors_idx = np.isin(all_sesh_with_ca1_eid, sessions_adjusted_priors, invert = True) & np.isin(all_sesh_with_ca1_eid, sessions_rep_sites)
subset_ids = subset_ids + random.sample(list(sesh_list_full[reps_not_in_priors_idx]), 13) # list() is required for sample to work

In [21]:
subset_ids

['0c828385-6dd6-4842-a702-c5075f5f5e81',
 '111c1762-7908-47e0-9f40-2f2ee55b6505',
 '8a3a0197-b40a-449f-be55-c00b23253bbf',
 '1a507308-c63a-4e02-8f32-3239a07dc578',
 '1a507308-c63a-4e02-8f32-3239a07dc578',
 '73918ae1-e4fd-4c18-b132-00cb555b1ad2',
 '73918ae1-e4fd-4c18-b132-00cb555b1ad2',
 '09b2c4d1-058d-4c84-9fd4-97530f85baf6',
 '5339812f-8b91-40ba-9d8f-a559563cc46b',
 '034e726f-b35f-41e0-8d6c-a22cc32391fb',
 '83e77b4b-dfa0-4af9-968b-7ea0c7a0c7e4',
 '83e77b4b-dfa0-4af9-968b-7ea0c7a0c7e4',
 '931a70ae-90ee-448e-bedb-9d41f3eda647',
 'd2832a38-27f6-452d-91d6-af72d794136c',
 'dda5fc59-f09a-4256-9fb5-66c67667a466',
 'e2b845a1-e313-4a08-bc61-a5f662ed295e',
 'a4a74102-2af5-45dc-9e41-ef7f5aed88be',
 '572a95d1-39ca-42e1-8424-5c9ffcb2df87',
 '781b35fd-e1f0-4d14-b2bb-95b7263082bb',
 'b01df337-2d31-4bcc-a1fe-7112afd50c50',
 'e535fb62-e245-4a48-b119-88ce62a6fe67',
 '614e1937-4b24-4ad3-9055-c8253d089919',
 '7f6b86f9-879a-4ea2-8531-294a221af5d0',
 '824cf03d-4012-4ab1-b499-c83a92c5589e',
 '4b00df29-3769-

In [22]:
#save to file
np.save('testing_ibl_list.npy', np.array(subset_ids))