In [1]:
from allensdk.brain_observatory.ecephys.ecephys_project_cache import EcephysProjectCache
import os
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def initialize_cache(data_dir: str) -> EcephysProjectCache:
    """
    Initialize the AllenSDK EcephysProjectCache object for the given data directory.
    """
    manifest_path = os.path.join(data_dir, "manifest.json")
    return EcephysProjectCache.from_warehouse(manifest=manifest_path)

In [3]:
def get_session_data_from_sessionid(session_id: int, cache: EcephysProjectCache):
    """
    Get the EcephysSession object for the given session ID."
    """
    return cache.get_session_data(session_id)

In [4]:
def stimulus_spike_table(stimuli_table, columns, session, units):
    """
    Create a table of spike rates for each stimulus presentation and unit.
    """
    stimuli_ids = stimuli_table.index
    spikes_per_stimulus = session.presentationwise_spike_times(stimulus_presentation_ids = stimuli_ids)
    spikes_per_stimulus.reset_index(inplace=True)
    
    spike_counts = spikes_per_stimulus.groupby(['stimulus_presentation_id', 'unit_id'])['spike_time'].count().reset_index()
    spike_counts.rename(columns={'spike_time': 'spike_count'}, inplace=True)
    
    dataset = pd.merge(spike_counts, stimuli_table[columns + ['duration']], on='stimulus_presentation_id')
    dataset['spikes_per_second'] = dataset['spike_count'] / dataset['duration']
    dataset = dataset.drop(columns=['duration', 'spike_count'])
    dataset['area'] = units.loc[dataset['unit_id'], 'ecephys_structure_acronym'].values

    return dataset

In [22]:
def replace_not_recognized_nulls(dataset, drop_nan = False):
    """
    Replace all values that are not recognized with np.nan.
    Prints the number of replacements if any are found.
    """
    unrecognized_values = ['null', 'NULL', '', ' ']
    
    mask = dataset.isin(unrecognized_values)
    count = mask.sum().sum()  
    
    if count > 0:
        print(f"Replacing {count} unrecognized values with np.nan.")
    
    dataset = dataset.replace(unrecognized_values, np.nan)
    
    if drop_nan:
        dataset = dataset.dropna()
        print("Dropped rows containing NaN values.")

    return dataset

We initialize a cache from the previously defined function in the data directory of our repo.

In [6]:
data_dir = "./allendata"
cache = initialize_cache(data_dir)
session_table = cache.get_session_table()
session_table

Unnamed: 0_level_0,published_at,specimen_id,session_type,age_in_days,sex,full_genotype,unit_count,channel_count,probe_count,ecephys_structure_acronyms
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
715093703,2019-10-03T00:00:00Z,699733581,brain_observatory_1.1,118.0,M,Sst-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt,884,2219,6,"[CA1, VISrl, nan, PO, LP, LGd, CA3, DG, VISl, ..."
719161530,2019-10-03T00:00:00Z,703279284,brain_observatory_1.1,122.0,M,Sst-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt,755,2214,6,"[TH, Eth, APN, POL, LP, DG, CA1, VISpm, nan, N..."
721123822,2019-10-03T00:00:00Z,707296982,brain_observatory_1.1,125.0,M,Pvalb-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt,444,2229,6,"[MB, SCig, PPT, NOT, DG, CA1, VISam, nan, LP, ..."
732592105,2019-10-03T00:00:00Z,717038288,brain_observatory_1.1,100.0,M,wt/wt,824,1847,5,"[grey, VISpm, nan, VISp, VISl, VISal, VISrl]"
737581020,2019-10-03T00:00:00Z,718643567,brain_observatory_1.1,108.0,M,wt/wt,568,2218,6,"[grey, VISmma, nan, VISpm, VISp, VISl, VISrl]"
739448407,2019-10-03T00:00:00Z,716813543,brain_observatory_1.1,112.0,M,wt/wt,625,2221,6,"[grey, VISam, nan, VIS, VISp, VISl, VISrl]"
742951821,2019-10-03T00:00:00Z,723627604,brain_observatory_1.1,120.0,M,wt/wt,893,2219,6,"[VISal, nan, grey, VISl, VISrl, VISp, VISpm, VIS]"
743475441,2019-10-03T00:00:00Z,722882755,brain_observatory_1.1,121.0,M,wt/wt,553,2225,6,"[LP, LGd, HPF, DG, CA3, CA1, VISrl, nan, PP, P..."
744228101,2019-10-03T00:00:00Z,719817805,brain_observatory_1.1,122.0,M,wt/wt,659,2226,6,"[Eth, TH, LP, POL, APN, DG, CA1, VIS, nan, CA3..."
746083955,2019-10-03T00:00:00Z,726170935,brain_observatory_1.1,98.0,F,Pvalb-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt,582,2216,6,"[VPM, TH, LGd, CA3, CA2, CA1, VISal, nan, grey..."


From the column above "id" we select one session_id from which we get the session data.

In [7]:
session_id = 798911424
session = get_session_data_from_sessionid(session_id, cache)

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


In [8]:
units = cache.get_units()
units

Unnamed: 0_level_0,waveform_PT_ratio,waveform_amplitude,amplitude_cutoff,cumulative_drift,d_prime,waveform_duration,ecephys_channel_id,firing_rate,waveform_halfwidth,isi_violations,...,phase,sampling_rate,has_lfp_data,date_of_acquisition,published_at,specimen_id,session_type,age_in_days,sex,genotype
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
915956282,0.611816,164.878740,0.072728,309.71,3.910873,0.535678,850229419,6.519432,0.164824,0.104910,...,3a,29999.915391,True,2019-01-09T00:26:20Z,2019-10-03T00:00:00Z,717038288,brain_observatory_1.1,100.0,M,wt/wt
915956340,0.439372,247.254345,0.000881,160.24,5.519024,0.563149,850229419,9.660554,0.206030,0.006825,...,3a,29999.915391,True,2019-01-09T00:26:20Z,2019-10-03T00:00:00Z,717038288,brain_observatory_1.1,100.0,M,wt/wt
915956345,0.500520,251.275830,0.001703,129.36,3.559911,0.521943,850229419,12.698430,0.192295,0.044936,...,3a,29999.915391,True,2019-01-09T00:26:20Z,2019-10-03T00:00:00Z,717038288,brain_observatory_1.1,100.0,M,wt/wt
915956349,0.424620,177.115380,0.096378,169.29,2.973959,0.508208,850229419,16.192413,0.192295,0.120715,...,3a,29999.915391,True,2019-01-09T00:26:20Z,2019-10-03T00:00:00Z,717038288,brain_observatory_1.1,100.0,M,wt/wt
915956356,0.512847,214.954545,0.054706,263.01,2.936851,0.549414,850229419,2.193113,0.233501,0.430427,...,3a,29999.915391,True,2019-01-09T00:26:20Z,2019-10-03T00:00:00Z,717038288,brain_observatory_1.1,100.0,M,wt/wt
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
951911942,0.368040,189.249060,0.001083,122.82,5.362617,0.631826,850103120,0.984659,0.192295,0.080230,...,3a,29999.958001,True,2018-10-31T17:39:23Z,2019-10-03T00:00:00Z,735109609,brain_observatory_1.1,102.0,M,Sst-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt
951911948,0.660350,80.706600,0.000416,108.17,3.507884,0.302178,850103122,10.327972,0.233501,0.000365,...,3a,29999.958001,True,2018-10-31T17:39:23Z,2019-10-03T00:00:00Z,735109609,brain_observatory_1.1,102.0,M,Sst-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt
951911955,0.357672,117.232440,0.023316,336.04,2.790057,0.714238,850103126,1.782535,0.192295,0.318256,...,3a,29999.958001,True,2018-10-31T17:39:23Z,2019-10-03T00:00:00Z,735109609,brain_observatory_1.1,102.0,M,Sst-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt
951911961,0.263957,82.125030,0.032783,361.88,2.954858,0.769179,850103134,3.211115,0.247236,0.497900,...,3a,29999.958001,True,2018-10-31T17:39:23Z,2019-10-03T00:00:00Z,735109609,brain_observatory_1.1,102.0,M,Sst-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt


In [9]:
session.units.head()

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Unnamed: 0_level_0,waveform_PT_ratio,waveform_amplitude,amplitude_cutoff,cluster_id,cumulative_drift,d_prime,firing_rate,isi_violations,isolation_distance,L_ratio,...,ecephys_structure_id,ecephys_structure_acronym,anterior_posterior_ccf_coordinate,dorsal_ventral_ccf_coordinate,left_right_ccf_coordinate,probe_description,location,probe_sampling_rate,probe_lfp_sampling_rate,probe_has_lfp_data
unit_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
951088679,0.620607,82.147455,0.023654,1,480.97,2.575648,7.429131,0.05335,51.364291,0.028062,...,215.0,APN,8328.0,3018.0,7095.0,probeA,See electrode locations,29999.965974,1249.998582,True
951088664,0.587044,78.399165,0.001739,0,427.07,3.085334,6.843864,0.033732,40.358293,0.051513,...,215.0,APN,8328.0,3018.0,7095.0,probeA,See electrode locations,29999.965974,1249.998582,True
951088734,0.560996,187.504005,0.001249,5,311.01,5.152564,1.418476,0.356931,56.950961,0.001292,...,215.0,APN,8316.0,2988.0,7099.0,probeA,See electrode locations,29999.965974,1249.998582,True
951088721,0.474457,196.741545,5.9e-05,4,355.17,4.295014,11.517381,0.000271,58.358552,0.020218,...,215.0,APN,8316.0,2988.0,7099.0,probeA,See electrode locations,29999.965974,1249.998582,True
951088862,0.534674,92.12502,0.089901,16,239.5,4.043348,19.953126,0.013619,79.934237,0.011335,...,215.0,APN,8304.0,2958.0,7103.0,probeA,See electrode locations,29999.965974,1249.998582,True


In [10]:
session.metadata.keys()

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


dict_keys(['specimen_name', 'session_type', 'full_genotype', 'sex', 'age_in_days', 'rig_equipment_name', 'num_units', 'num_channels', 'num_probes', 'num_stimulus_presentations', 'session_start_time', 'ecephys_session_id', 'structure_acronyms', 'stimulus_names'])

In [11]:
session.metadata['stimulus_names']

['spontaneous',
 'gabors',
 'flashes',
 'drifting_gratings',
 'natural_movie_three',
 'natural_movie_one',
 'static_gratings',
 'natural_scenes',
 'drifting_gratings_contrast']

In [12]:
drifting_stimuli = session.get_stimulus_table('drifting_gratings')
drifting_stimuli.head()

Unnamed: 0_level_0,stimulus_block,start_time,stop_time,contrast,temporal_frequency,stimulus_name,size,phase,orientation,spatial_frequency,duration,stimulus_condition_id
stimulus_presentation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3798,2.0,1586.180717,1588.182387,0.8,15.0,drifting_gratings,"[250.0, 250.0]","[5308.98333333, 5308.98333333]",0.0,0.04,2.00167,246
3799,2.0,1589.183244,1591.184924,0.8,2.0,drifting_gratings,"[250.0, 250.0]","[5308.98333333, 5308.98333333]",180.0,0.04,2.00168,247
3800,2.0,1592.185737,1594.187427,0.8,1.0,drifting_gratings,"[250.0, 250.0]","[5308.98333333, 5308.98333333]",45.0,0.04,2.00169,248
3801,2.0,1595.188274,1597.189944,0.8,8.0,drifting_gratings,"[250.0, 250.0]","[5308.98333333, 5308.98333333]",90.0,0.04,2.00167,249
3802,2.0,1598.190777,1600.192447,0.8,2.0,drifting_gratings,"[250.0, 250.0]","[5308.98333333, 5308.98333333]",270.0,0.04,2.00167,250


In [13]:
static_stimuli = session.get_stimulus_table('static_gratings')
static_stimuli.head()

Unnamed: 0_level_0,stimulus_block,start_time,stop_time,contrast,stimulus_name,size,phase,orientation,spatial_frequency,duration,stimulus_condition_id
stimulus_presentation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
49434,8.0,5399.367847,5399.618056,0.8,static_gratings,"[250.0, 250.0]",0.5,150.0,0.02,0.250209,4787
49435,8.0,5399.618056,5399.868265,0.8,static_gratings,"[250.0, 250.0]",0.5,0.0,0.16,0.250209,4788
49436,8.0,5399.868265,5400.118474,0.8,static_gratings,"[250.0, 250.0]",0.25,30.0,0.04,0.250209,4789
49437,8.0,5400.118474,5400.368684,0.8,static_gratings,"[250.0, 250.0]",0.75,60.0,0.04,0.250209,4790
49438,8.0,5400.368684,5400.618889,0.8,static_gratings,"[250.0, 250.0]",0.0,60.0,0.08,0.250206,4791


In [23]:
drifting_dataset = stimulus_spike_table(drifting_stimuli, ['orientation', 'temporal_frequency'], session, units)
drifting_dataset.head()

Unnamed: 0,stimulus_presentation_id,unit_id,orientation,temporal_frequency,spikes_per_second,area
0,3798,951088664,0.0,15.0,12.489571,APN
1,3798,951088679,0.0,15.0,11.989988,APN
2,3798,951088721,0.0,15.0,12.489571,APN
3,3798,951088823,0.0,15.0,0.999166,APN
4,3798,951088862,0.0,15.0,38.967462,APN


In [24]:
static_dataset = stimulus_spike_table(static_stimuli, ['orientation', 'spatial_frequency'], session, units)
static_dataset.head()

Unnamed: 0,stimulus_presentation_id,unit_id,orientation,spatial_frequency,spikes_per_second,area
0,49434,951088664,150.0,0.02,11.989965,APN
1,49434,951088679,150.0,0.02,7.99331,APN
2,49434,951088721,150.0,0.02,31.973239,APN
3,49434,951088823,150.0,0.02,3.996655,APN
4,49434,951088862,150.0,0.02,19.983275,APN


In [25]:
drifting_dataset_clean = replace_not_recognized_nulls(drifting_dataset, drop_nan=True)
static_dataset_clean = replace_not_recognized_nulls(static_dataset, drop_nan=True)

Replacing 37284 unrecognized values with np.nan.
Dropped rows containing NaN values.
Replacing 165940 unrecognized values with np.nan.
Dropped rows containing NaN values.


In [26]:
drifting_dataset_clean.head()

Unnamed: 0,stimulus_presentation_id,unit_id,orientation,temporal_frequency,spikes_per_second,area
0,3798,951088664,0.0,15.0,12.489571,APN
1,3798,951088679,0.0,15.0,11.989988,APN
2,3798,951088721,0.0,15.0,12.489571,APN
3,3798,951088823,0.0,15.0,0.999166,APN
4,3798,951088862,0.0,15.0,38.967462,APN


In [None]:
static_dataset_clean.head()

Unnamed: 0,stimulus_presentation_id,unit_id,orientation,spatial_frequency,spikes_per_second,area
0,49434,951088664,150.0,0.02,11.989965,APN
1,49434,951088679,150.0,0.02,7.99331,APN
2,49434,951088721,150.0,0.02,31.973239,APN
3,49434,951088823,150.0,0.02,3.996655,APN
4,49434,951088862,150.0,0.02,19.983275,APN


In [None]:
drifting_dataset_clean.to_parquet(f'ourdata/drifting_dataset_session_{session}.parquet')
static_dataset_clean.to_parquet(f'ourdata/static_dataset_session_{session}.parquet')