#### Imports

In [1]:
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys

import pickle

import allensdk
from allensdk.brain_observatory.behavior.behavior_project_cache import VisualBehaviorNeuropixelsProjectCache
from allensdk.brain_observatory.ecephys.ecephys_project_cache import EcephysProjectCache

from allensdk.brain_observatory.visualization import plot_running_speed

import warnings
from matplotlib.cbook import MatplotlibDeprecationWarning

warnings.filterwarnings("ignore", category=MatplotlibDeprecationWarning)\

# Confirming your allensdk version
print(f"Your allensdk version is: {allensdk.__version__}")

  from .autonotebook import tqdm as notebook_tqdm


Your allensdk version is: 2.15.2


In [2]:
# Update this to a valid directory in your filesystem. This is where the data will be stored.
DOWNLOAD_COMPLETE_DATASET = False
output_path = "/Users/rp/Desktop/Research/CN^3/Thesis Material/2-Region-Latent-Alignment/data/raw/allen-sdk-data"
	
# Example cache directory path, it determines where downloaded data will be stored
manifest_path = os.path.join(output_path, "manifest.json")

cache = EcephysProjectCache.from_warehouse(manifest=manifest_path)

# Access table of all session metadata
FCon_sessions_VISp = cache.get_session_table()

In [13]:
contains_CA1 = lambda arr: 'CA1' in arr
contains_VISp = lambda arr: 'VISp' in arr

# Looking for functional connectivity stimulus set
stim_set_name = "functional_connectivity"
FCon_sessions = session_table[session_table['session_type'] == stim_set_name]

# Filter for sessions that collect readings from CA1. (Can be changed)
FCon_sessions_CA1 = FCon_sessions[FCon_sessions.apply(lambda row: contains_CA1(row['ecephys_structure_acronyms']), axis=1)]
FCon_CA1_session_ids = FCon_sessions_CA1.index
# Filter for sessions that collect readings from VISp. (Can be changed)
FCon_sessions_VISp = FCon_sessions[FCon_sessions.apply(lambda row: contains_VISp(row['ecephys_structure_acronyms']), axis=1)]
FCon_VISp_session_ids = FCon_sessions_VISp.index


In [63]:
FCon_sessions_VISp.sort_values(by='unit_count', ascending=False)

# Fellow wild type: 771160300
# Female (Sst-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt) : 794812542

Unnamed: 0_level_0,published_at,specimen_id,session_type,age_in_days,sex,full_genotype,unit_count,channel_count,probe_count,ecephys_structure_acronyms
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
794812542,2019-10-03T00:00:00Z,774672366,functional_connectivity,120.0,F,Sst-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt,1005,2229,6,"[MB, APN, DG, CA1, VISrl, nan, PIL, MGm, MGv, ..."
771160300,2019-10-03T00:00:00Z,754488979,functional_connectivity,142.0,M,wt/wt,930,2230,6,"[SCig, PPT, NOT, SUB, ProS, CA1, VISam, nan, A..."
847657808,2019-10-03T00:00:00Z,827809884,functional_connectivity,126.0,F,wt/wt,874,2298,6,"[APN, NOT, DG, HPF, ProS, CA1, VISam, nan, MB,..."
779839471,2019-10-03T00:00:00Z,760960653,functional_connectivity,122.0,M,wt/wt,863,2220,6,"[APN, LP, MB, DG, CA1, VISrl, nan, TH, LGd, CA..."
766640955,2019-10-03T00:00:00Z,744912849,functional_connectivity,133.0,M,wt/wt,842,2233,6,"[MB, APN, NOT, DG, CA1, VISam, nan, PF, TH, LP..."
781842082,2019-10-03T00:00:00Z,760946813,functional_connectivity,126.0,M,wt/wt,833,2232,6,"[APN, LP, DG, CA1, VISrl, nan, MGm, TH, MGv, L..."
768515987,2019-10-03T00:00:00Z,754477358,functional_connectivity,136.0,M,wt/wt,802,2217,6,"[MB, APN, NOT, DG, CA1, VISam, nan, Eth, LP, P..."
778998620,2019-10-03T00:00:00Z,759674770,functional_connectivity,121.0,M,wt/wt,793,2229,6,"[APN, POL, LP, DG, CA3, CA1, VISrl, nan, ZI, M..."
778240327,2019-10-03T00:00:00Z,760938797,functional_connectivity,120.0,M,wt/wt,784,2234,6,"[APN, POL, LP, DG, CA1, VISrl, nan, ZI, IntG, ..."
793224716,2019-10-03T00:00:00Z,769319624,functional_connectivity,120.0,M,wt/wt,781,2229,6,"[APN, NOT, MB, DG, CA1, VIS, nan, Eth, LP, SGN..."


In [62]:
FCon_sessions_VISp.loc[794812542].ecephys_structure_acronyms

array(['MB', 'APN', 'DG', 'CA1', 'VISrl', nan, 'PIL', 'MGm', 'MGv', 'MGd',
       'CA3', 'VISal', 'CA2', 'VISl', 'LP', 'POST', 'SUB', 'VISp', 'SCig',
       'NOT', 'HPF', 'ProS', 'VISam', 'POL', 'VISpm'], dtype=object)

In [24]:
FCon_sessions_VISp.loc[771160300].ecephys_structure_acronyms

array(['SCig', 'PPT', 'NOT', 'SUB', 'ProS', 'CA1', 'VISam', nan, 'APN',
       'MB', 'DG', 'VISpm', 'LP', 'VISp', 'CA3', 'CA2', 'VISli', 'ZI',
       'IntG', 'IGL', 'LGd', 'VISal', 'OP', 'VIS'], dtype=object)

In [None]:
stimulus_name_of_interest = "natural_movie_one_more_repeats"

In [34]:
regions = FCon_sessions.loc[766640955]['ecephys_structure_acronyms']
regions

array(['MB', 'APN', 'NOT', 'DG', 'CA1', 'VISam', nan, 'PF', 'TH', 'LP',
       'VISmma', 'VISp', 'MGd', 'CA3', 'VISl', 'VPM', 'LGd', 'VISal',
       'VISrl'], dtype=object)

In [52]:
FCon_sessions.loc[766640955]

published_at                                               2019-10-03T00:00:00Z
specimen_id                                                           744912849
session_type                                            functional_connectivity
age_in_days                                                               133.0
sex                                                                           M
full_genotype                                                             wt/wt
unit_count                                                                  842
channel_count                                                              2233
probe_count                                                                   6
ecephys_structure_acronyms    [MB, APN, NOT, DG, CA1, VISam, nan, PF, TH, LP...
Name: 766640955, dtype: object

In [14]:
FCon_CA1_session_ids[0]

766640955

In [3]:
NATURAL_MOVIE_SESSION_0_ID = 766640955
NATURAL_MOVIE_SESSION_MALE_ID = 771160300
NATURAL_MOVIE_SESSION_FEMALE_ID = 794812542
# Download takes time
# FCon_session_1_id = FCon_CA1_session_ids[0] # 766640955 - 2.88GB
# NATURAL_MOVIE_SESSION_MALE_ID # 771160300 - 2.78GB := 930 units
# NATURAL_MOVIE_SESSION_FEMALE_ID # 794812542 -

session = cache.get_session_data(NATURAL_MOVIE_SESSION_0_ID)

In [19]:
stim_table = session.get_stimulus_table()
stim_table.loc[natural_movie_stimulus_presentation_ids]

Unnamed: 0_level_0,Dir,Speed,coherence,color,contrast,frame,orientation,phase,size,spatial_frequency,start_time,stimulus_block,stimulus_name,stop_time,temporal_frequency,x_position,y_position,duration,stimulus_condition_id
stimulus_presentation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
4339,,,,,1.0,0.0,0.0,,"[1920.0, 1080.0]",,2297.212884,3.0,natural_movie_one_more_repeats,2297.246246,,,,0.033361,282
4340,,,,,1.0,1.0,0.0,,"[1920.0, 1080.0]",,2297.246246,3.0,natural_movie_one_more_repeats,2297.279607,,,,0.033361,283
4341,,,,,1.0,2.0,0.0,,"[1920.0, 1080.0]",,2297.279607,3.0,natural_movie_one_more_repeats,2297.312968,,,,0.033361,284
4342,,,,,1.0,3.0,0.0,,"[1920.0, 1080.0]",,2297.312968,3.0,natural_movie_one_more_repeats,2297.346330,,,,0.033361,285
4343,,,,,1.0,4.0,0.0,,"[1920.0, 1080.0]",,2297.346330,3.0,natural_movie_one_more_repeats,2297.379691,,,,0.033361,286
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76936,,,,,1.0,895.0,0.0,,"[1920.0, 1080.0]",,8302.063780,8.0,natural_movie_one_more_repeats,8302.097141,,,,0.033361,1177
76937,,,,,1.0,896.0,0.0,,"[1920.0, 1080.0]",,8302.097141,8.0,natural_movie_one_more_repeats,8302.130501,,,,0.033361,1178
76938,,,,,1.0,897.0,0.0,,"[1920.0, 1080.0]",,8302.130501,8.0,natural_movie_one_more_repeats,8302.163862,,,,0.033361,1179
76939,,,,,1.0,898.0,0.0,,"[1920.0, 1080.0]",,8302.163862,8.0,natural_movie_one_more_repeats,8302.197223,,,,0.033361,1180


In [23]:
session.stimulus_presentations

Unnamed: 0_level_0,Dir,Speed,coherence,color,contrast,frame,orientation,phase,size,spatial_frequency,start_time,stimulus_block,stimulus_name,stop_time,temporal_frequency,x_position,y_position,duration,stimulus_condition_id
stimulus_presentation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0,,,,,,,,,,,15.289644,,spontaneous,75.356384,,,,60.066740,0
1,,,,,0.8,,0.0,"[3644.93333333, 3644.93333333]","[20.0, 20.0]",0.08,75.356384,0.0,gabors,75.589910,4.0,20.0,30.0,0.233525,1
2,,,,,0.8,,45.0,"[3644.93333333, 3644.93333333]","[20.0, 20.0]",0.08,75.589910,0.0,gabors,75.840115,4.0,0.0,-20.0,0.250206,2
3,,,,,0.8,,0.0,"[3644.93333333, 3644.93333333]","[20.0, 20.0]",0.08,75.840115,0.0,gabors,76.090321,4.0,-20.0,20.0,0.250206,3
4,,,,,0.8,,0.0,"[3644.93333333, 3644.93333333]","[20.0, 20.0]",0.08,76.090321,0.0,gabors,76.340527,4.0,30.0,-20.0,0.250206,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77431,90.0,0.02,0.9,,1.0,,,,,,9283.050114,9.0,dot_motion,9284.050997,,,,1.000883,2108
77432,270.0,0.005,0.9,,1.0,,,,,,9285.051814,9.0,dot_motion,9286.052667,,,,1.000853,2104
77433,180.0,0.01,0.9,,1.0,,,,,,9287.053494,9.0,dot_motion,9288.054337,,,,1.000843,2110
77434,315.0,0.01,0.9,,1.0,,,,,,9289.055134,9.0,dot_motion,9290.056017,,,,1.000883,2093


In [12]:
for key, values in session.get_stimulus_parameter_values().items():
    print(f'{key}: {values}')

Dir: [90.0 45.0 225.0 315.0 180.0 0.0 135.0 270.0]
Speed: [0.001 0.005 0.01 0.02]
coherence: [0.9]
color: [1.0 -1.0]
contrast: [0.8 0.13 0.02 0.35 0.2 0.08 0.01 0.04 0.6 1.0 0.1]
frame: [0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 12.0 13.0 14.0 15.0
 16.0 17.0 18.0 19.0 20.0 21.0 22.0 23.0 24.0 25.0 26.0 27.0 28.0 29.0
 30.0 31.0 32.0 33.0 34.0 35.0 36.0 37.0 38.0 39.0 40.0 41.0 42.0 43.0
 44.0 45.0 46.0 47.0 48.0 49.0 50.0 51.0 52.0 53.0 54.0 55.0 56.0 57.0
 58.0 59.0 60.0 61.0 62.0 63.0 64.0 65.0 66.0 67.0 68.0 69.0 70.0 71.0
 72.0 73.0 74.0 75.0 76.0 77.0 78.0 79.0 80.0 81.0 82.0 83.0 84.0 85.0
 86.0 87.0 88.0 89.0 90.0 91.0 92.0 93.0 94.0 95.0 96.0 97.0 98.0 99.0
 100.0 101.0 102.0 103.0 104.0 105.0 106.0 107.0 108.0 109.0 110.0 111.0
 112.0 113.0 114.0 115.0 116.0 117.0 118.0 119.0 120.0 121.0 122.0 123.0
 124.0 125.0 126.0 127.0 128.0 129.0 130.0 131.0 132.0 133.0 134.0 135.0
 136.0 137.0 138.0 139.0 140.0 141.0 142.0 143.0 144.0 145.0 146.0 147.0
 148.0 149.0 150.0 151.0 

In [10]:
stim_cond = session.stimulus_conditions
stim_cond[stim_cond['stimulus_name']==stimulus_name_of_interest]

Unnamed: 0_level_0,Dir,Speed,coherence,color,contrast,dotLife,dotSize,fieldPos,fieldShape,fieldSize,...,orientation,phase,size,spatial_frequency,stimulus_name,temporal_frequency,units,x_position,y_position,color_triplet
stimulus_condition_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
282,,,,,1.0,,,,,,...,0.0,,"[1920.0, 1080.0]",,natural_movie_one_more_repeats,,pix,,,"[1.0, 1.0, 1.0]"
283,,,,,1.0,,,,,,...,0.0,,"[1920.0, 1080.0]",,natural_movie_one_more_repeats,,pix,,,"[1.0, 1.0, 1.0]"
284,,,,,1.0,,,,,,...,0.0,,"[1920.0, 1080.0]",,natural_movie_one_more_repeats,,pix,,,"[1.0, 1.0, 1.0]"
285,,,,,1.0,,,,,,...,0.0,,"[1920.0, 1080.0]",,natural_movie_one_more_repeats,,pix,,,"[1.0, 1.0, 1.0]"
286,,,,,1.0,,,,,,...,0.0,,"[1920.0, 1080.0]",,natural_movie_one_more_repeats,,pix,,,"[1.0, 1.0, 1.0]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1177,,,,,1.0,,,,,,...,0.0,,"[1920.0, 1080.0]",,natural_movie_one_more_repeats,,pix,,,"[1.0, 1.0, 1.0]"
1178,,,,,1.0,,,,,,...,0.0,,"[1920.0, 1080.0]",,natural_movie_one_more_repeats,,pix,,,"[1.0, 1.0, 1.0]"
1179,,,,,1.0,,,,,,...,0.0,,"[1920.0, 1080.0]",,natural_movie_one_more_repeats,,pix,,,"[1.0, 1.0, 1.0]"
1180,,,,,1.0,,,,,,...,0.0,,"[1920.0, 1080.0]",,natural_movie_one_more_repeats,,pix,,,"[1.0, 1.0, 1.0]"


In [7]:
nm_1 = session.stimulus_conditions.loc[natural_movie_stimulus_presentation_ids]

KeyError: "None of [Int64Index([ 4339,  4340,  4341,  4342,  4343,  4344,  4345,  4346,  4347,\n             4348,\n            ...\n            76931, 76932, 76933, 76934, 76935, 76936, 76937, 76938, 76939,\n            76940],\n           dtype='int64', name='stimulus_condition_id', length=54000)] are in the [index]"

In [5]:
# From natural_movie_EDA.ipynb, but it has other noisey code.
# UNITS = session.units

stimulus_name_of_interest = "natural_movie_one_more_repeats"
# look at responses to the natural movie stimulus
natural_movie_stimulus_presentation_ids = session.stimulus_presentations[
    session.stimulus_presentations['stimulus_name'] == stimulus_name_of_interest
].index.values


In [36]:
len(natural_movie_stimulus_presentation_ids)

54000

## Spike Count

### CA1

In [20]:
units_of_interest_CA1 = UNITS[(UNITS.structure_acronym.str.find('CA1') > -1)]
len(units_of_interest_CA1)

decent_snr_CA1_unit_ids = units_of_interest_CA1[
    units_of_interest_CA1['snr'] >= 1.5
].index.values

In [23]:
CA1_spikes = session.presentationwise_spike_times(
    stimulus_presentation_ids=natural_movie_stimulus_presentation_ids,
    unit_ids=decent_snr_CA1_unit_ids
)

In [24]:
CA1_spikes

Unnamed: 0_level_0,stimulus_presentation_id,unit_id,time_since_stimulus_presentation_onset
spike_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2297.214321,4339,950928969,0.001437
2297.215488,4339,950928415,0.002604
2297.215788,4339,950927639,0.002904
2297.217588,4339,950928969,0.004704
2297.218184,4339,950919607,0.005300
...,...,...,...
8302.222046,76940,950934764,0.024823
8302.222708,76940,950919714,0.025484
8302.227808,76940,950919726,0.030584
8302.228020,76940,950928272,0.030797


In [25]:
CA1_spikes["count"] = np.zeros(CA1_spikes.shape[0])
spikes = CA1_spikes.groupby(["stimulus_presentation_id", "unit_id"]).count()

design = pd.pivot_table(
    spikes, 
    values="count", 
    index="stimulus_presentation_id", 
    columns="unit_id", 
    fill_value=0.0,
    aggfunc=np.sum
)

design

unit_id,950912608,950912626,950912880,950912902,950912954,950913038,950913047,950913145,950913226,950913235,...,950949699,950949789,950949803,950949863,950949879,950949893,950949950,950949963,950949979,950951255
stimulus_presentation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4339,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4340,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4341,0,1,0,1,0,0,0,0,0,0,...,1,1,0,0,0,0,0,0,0,0
4342,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4343,0,0,0,1,0,0,1,1,0,0,...,0,0,0,2,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76936,0,0,0,0,0,0,0,0,0,0,...,3,0,3,1,0,1,1,0,2,0
76937,0,0,0,0,0,0,0,0,0,0,...,0,0,2,3,1,1,1,2,0,0
76938,0,0,0,0,0,0,0,0,0,0,...,0,0,1,3,2,0,0,0,3,0
76939,0,0,0,0,0,0,0,0,0,0,...,0,0,2,0,1,0,0,1,2,0


In [None]:
# Fill in stimulus presentation ids for rows with 0 spike counts
missing_elements = np.setdiff1d(natural_movie_stimulus_presentation_ids, design.index)

print(f"Missing {len(missing_elements)} rows: \n", missing_elements)

for element in missing_elements:
    design.loc[element] = 0

CA1_spike_count_piv = design.sort_index()

### VISp

In [48]:
'''
VISp only
'''
units_of_interest_VISp = UNITS[(UNITS.structure_acronym.str.find('VISp') > -1)]
# and get a set of units with only decent snr
decent_snr_VISp_unit_ids = units_of_interest_VISp[
    units_of_interest_VISp['snr'] >= 1.5
].index.values

print(len(decent_snr_VISp_unit_ids))

spikes_visp_v1 = session.presentationwise_spike_times(
    stimulus_presentation_ids=natural_movie_stimulus_presentation_ids,
    unit_ids=decent_snr_VISp_unit_ids
)

49


In [51]:
session.structurewise_unit_counts

CA1       163
LP        101
LGd        76
VISal      65
APN        64
DG         59
VISmma     59
VISam      56
VISp       52
VISrl      37
VISl       34
VPM        25
MB         21
CA3        16
TH         13
NOT         1
Name: ecephys_structure_acronym, dtype: int64

In [50]:
decent_snr_VISp_unit_ids

array([950929283, 950929267, 950929299, 950931534, 950929433, 950929417,
       950929451, 950929536, 950929599, 950929583, 950929626, 950929614,
       950929641, 950929723, 950929680, 950929775, 950929763, 950929833,
       950929874, 950929988, 950929846, 950929943, 950929932, 950929916,
       950929889, 950930055, 950930042, 950930027, 950931566, 950931592,
       950931577, 950930110, 950930096, 950930071, 950931606, 950930198,
       950930170, 950930154, 950930227, 950930303, 950930398, 950930360,
       950930793, 950930846, 950930902, 950930889, 950930977, 950930945,
       950931035])

In [None]:
decent_snr_VIS_unit_ids - decent_snr_VISp_unit_ids

In [42]:
# Filter for units in all key visual cortex regions
units_of_interest_VIS = UNITS[UNITS['structure_acronym'].isin(['VISp', 'VISam', 'VISal', 'VISrl', 'VISl'])]

# and get a set of units with only decent snr
decent_snr_VIS_unit_ids = units_of_interest_VIS[
    units_of_interest_VIS['snr'] >= 1.5
].index.values

print(len(decent_snr_VIS_unit_ids))

234


In [43]:
spikes_vis_general = session.presentationwise_spike_times(
    stimulus_presentation_ids=natural_movie_stimulus_presentation_ids,
    unit_ids=decent_snr_VIS_unit_ids
)

In [44]:
spikes_visp = spikes_vis_general

In [45]:
spikes_visp["count"] = np.zeros(spikes_visp.shape[0])
spikes_visp = spikes_visp.groupby(["stimulus_presentation_id", "unit_id"]).count()

design_visp = pd.pivot_table(
    spikes_visp, 
    values="count", 
    index="stimulus_presentation_id", 
    columns="unit_id", 
    fill_value=0.0,
    aggfunc=np.sum
)

In [46]:
missing_elements = np.setdiff1d(natural_movie_stimulus_presentation_ids, design_visp.index)

print(f"Missing {len(missing_elements)} rows: \n", missing_elements)

for element in missing_elements:
    design_visp.loc[element] = 0

VISp_spike_count_piv = design_visp.sort_index()

Missing 46 rows: 
 [ 5648  5662  5663  5678  6374  6829  7785  7786  9182  9189  9196 10177
 13209 14088 14089 14110 14111 14112 14264 14274 14280 14281 15542 15543
 17664 17665 17672 17673 17718 17967 18356 21167 23287 23288 23295 23296
 24086 24130 25964 25972 26036 26937 26938 27769 27797 27860]


In [47]:
VISp_spike_count_piv

unit_id,950913540,950913721,950913754,950913798,950913842,950913849,950913864,950913895,950913904,950913913,...,950950754,950950767,950950778,950950813,950950827,950950840,950950901,950950915,950950928,950950976
stimulus_presentation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4339,0,1,0,0,0,0,0,0,1,0,...,1,1,0,0,0,0,0,0,0,0
4340,0,0,0,0,0,0,0,0,0,0,...,0,1,0,1,0,0,0,0,2,1
4341,0,0,0,0,0,0,0,0,1,0,...,1,0,0,2,0,0,0,0,0,2
4342,0,0,0,0,0,0,0,0,0,0,...,2,0,0,1,0,1,0,0,0,0
4343,0,0,0,0,0,0,0,0,0,0,...,6,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76936,0,0,0,0,2,0,0,0,0,0,...,1,0,0,0,0,0,1,0,0,0
76937,1,0,0,0,1,0,0,0,0,0,...,1,1,0,0,1,0,0,0,0,2
76938,0,0,1,0,1,0,0,0,0,1,...,0,0,0,2,0,1,1,0,0,0
76939,0,0,0,0,2,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0


In [None]:
data_path = "/Users/rp/Desktop/Research/CN^3/Thesis Material/2-Region-Latent-Alignment/data/processed/"
session_path = f"session_{FCon_session_1_id}/"
directory_path = data_path+session_path

if not os.path.exists(directory_path):
    os.makedirs(directory_path)

# CA1_spike_count_piv.to_hdf(directory_path+f'CA1_spike_count_{FCon_session_1_id}_pivot.h5', key='df', mode='w')
VISp_spike_count_piv.to_hdf(directory_path+f'VISp_spike_count_{FCon_session_1_id}_pivot.h5', key='df', mode='w')

#### Check unit ids

In [14]:
ecephys_session_id = "766640955"

# Load CA1 IDs from file
with open(f'CA1_ids_{ecephys_session_id}.pkl', 'rb') as f:
    CA1_ids_loaded = pickle.load(f)

# Load VISp IDs from file
with open(f'VISp_ids_{ecephys_session_id}.pkl', 'rb') as f:
    VISp_ids_loaded = pickle.load(f)

In [39]:
print(sum(VISp_spike_count_piv.columns.values == VISp_ids_loaded) == len(VISp_ids_loaded),
sum(CA1_spike_count_piv.columns.values == CA1_ids_loaded) == len(CA1_ids_loaded))

True True


## Behavior Variables

### Load stimuli tables

In [59]:
data_path = "../data/"
session_number = NATURAL_MOVIE_SESSION_0_ID

# Load in neccesary tables
RUN_SPEED = session.running_speed
PUPIL_DATA = session.get_pupil_data()
STIMULUS_DATA = session.get_stimulus_table([stimulus_name_of_interest])

RUN_SPEED_TRUNC = RUN_SPEED[(RUN_SPEED['start_time'] >= min(STIMULUS_DATA.start_time)) & (RUN_SPEED['end_time'] <= max(STIMULUS_DATA.stop_time))]

PUPIL_DATA_TRUNC = PUPIL_DATA[(PUPIL_DATA.index >  min(STIMULUS_DATA.start_time)) & (PUPIL_DATA.index < max(STIMULUS_DATA.stop_time))]

### Functions

In [76]:
def run_speed_for_stim(row):
    stim_speed_df = RUN_SPEED_TRUNC[(RUN_SPEED_TRUNC['start_time'] <= row['stop_time']) & (RUN_SPEED_TRUNC['end_time'] >= row['start_time'])]
    avg_speed = np.nan if stim_speed_df.empty else np.mean(stim_speed_df['velocity'])
    return avg_speed

In [78]:
def pupil_size_for_stim(row):
    df = PUPIL_DATA_TRUNC[(PUPIL_DATA_TRUNC.index <= row['stop_time'])].iloc[-1]
    # Assuming df is your original DataFrame with the given columns
    # Create new features
    pupil_size = np.pi * df['pupil_width'] * df['pupil_height']
    pupil_location_x = df['pupil_center_x'] / df['eye_width']  # Normalize by eye width
    pupil_location_y = df['pupil_center_y'] / df['eye_height']  # Normalize by eye height

    return pupil_size, pupil_location_x, pupil_location_y

In [79]:
def chunk_movies(movies_df):
    chunk_size = 900
    num_rows = len(movies_df)

    # Calculate the number of chunks needed
    num_chunks = int(np.ceil(num_rows / chunk_size))

    # Create a list to store the chunks
    movie_chunks = [movies_df.iloc[i * chunk_size:(i + 1) * chunk_size] for i in range(num_chunks)]

    return movie_chunks

In [80]:
def compute_relative_positions(movie_chunks):
    chunk_positions = []
    for movie_chunk in movie_chunks:
        movie_chunk_positions = movie_chunk['velocity'].cumsum()
        adj_rat_positions = movie_chunk_positions - movie_chunk_positions.iloc[0]
        chunk_positions.append(adj_rat_positions)
    return pd.concat(chunk_positions, axis=0)

In [81]:
def compute_position(movies_df):
    return compute_relative_positions(chunk_movies(movies_df))

### Frame

In [82]:
filtered_movie_df = STIMULUS_DATA[['stimulus_block', "duration", "start_time", "stop_time", "frame"]]

### Distance

In [None]:
filtered_movie_df['velocity'] = filtered_movie_df.apply(run_speed_for_stim, axis=1)

In [None]:
filtered_movie_df['position'] = compute_position(filtered_movie_df)

### Pupil Size

In [87]:
pupil_temp_df = filtered_movie_df.apply(pupil_size_for_stim, axis=1)
pupil_df = pd.DataFrame(pupil_temp_df.tolist(), columns=['pupil_size', 'pupil_center_x_normalized', 'pupil_center_y_normalized'])
pupil_df = pupil_df.set_index(pupil_temp_df.index)

In [88]:
behavior_df = pd.concat([filtered_movie_df, pupil_df], axis=1)

#### Check behavior data

In [45]:
data_path = "../data/"
session_number = NATURAL_MOVIE_SESSION_0_ID

behvior_variables_path = "NaturalMovie_Behvaior_" + str(session_number)+ ".h5"
behavior_data_df = pd.read_hdf(data_path+behvior_variables_path, key="w") # Behavior

In [46]:
behavior_data_df

Unnamed: 0_level_0,stimulus_block,duration,start_time,stop_time,frame,velocity,position,pupil_size,pupil_center_x_normalized,pupil_center_y_normalized,frame_norm
stimulus_presentation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
4339,3.0,0.033361,2297.212884,2297.246246,0.0,-0.829370,0.000000,3788.108714,0.961539,0.800420,0.000000
4340,3.0,0.033361,2297.246246,2297.279607,1.0,0.032865,0.032865,3829.684818,0.960886,0.799850,0.001112
4341,3.0,0.033361,2297.279607,2297.312968,2.0,0.453049,0.485914,3823.329682,0.960392,0.799987,0.002225
4342,3.0,0.033361,2297.312968,2297.346330,3.0,-0.425225,0.060689,3716.425171,0.959373,0.801302,0.003337
4343,3.0,0.033361,2297.346330,2297.379691,4.0,-0.398370,-0.337681,3774.721457,0.958592,0.801187,0.004449
...,...,...,...,...,...,...,...,...,...,...,...
76936,8.0,0.033361,8302.063780,8302.097141,895.0,1.360972,7.022430,6719.909899,1.053908,0.705983,0.995551
76937,8.0,0.033361,8302.097141,8302.130501,896.0,-1.424704,5.597727,6719.909899,1.053908,0.705983,0.996663
76938,8.0,0.033361,8302.130501,8302.163862,897.0,0.408880,6.006607,6317.194557,1.056777,0.712099,0.997775
76939,8.0,0.033361,8302.163862,8302.197223,898.0,0.129173,6.135780,6308.315794,1.059431,0.705928,0.998888


In [89]:
behavior_df

Unnamed: 0_level_0,stimulus_block,duration,start_time,stop_time,frame,velocity,position,pupil_size,pupil_center_x_normalized,pupil_center_y_normalized
stimulus_presentation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
4339,3.0,0.033361,2297.212884,2297.246246,0.0,-0.829370,0.000000,3788.108714,0.961539,0.800420
4340,3.0,0.033361,2297.246246,2297.279607,1.0,0.032865,0.032865,3829.684818,0.960886,0.799850
4341,3.0,0.033361,2297.279607,2297.312968,2.0,0.453049,0.485914,3823.329682,0.960392,0.799987
4342,3.0,0.033361,2297.312968,2297.346330,3.0,-0.425225,0.060689,3716.425171,0.959373,0.801302
4343,3.0,0.033361,2297.346330,2297.379691,4.0,-0.398370,-0.337681,3774.721457,0.958592,0.801187
...,...,...,...,...,...,...,...,...,...,...
76936,8.0,0.033361,8302.063780,8302.097141,895.0,1.360972,7.022430,6719.909899,1.053908,0.705983
76937,8.0,0.033361,8302.097141,8302.130501,896.0,-1.424704,5.597727,6719.909899,1.053908,0.705983
76938,8.0,0.033361,8302.130501,8302.163862,897.0,0.408880,6.006607,6317.194557,1.056777,0.712099
76939,8.0,0.033361,8302.163862,8302.197223,898.0,0.129173,6.135780,6308.315794,1.059431,0.705928


## Save Data

In [90]:
data_path = "../data/"
session_path = f"session_{FCon_session_1_id}/"
directory_path = data_path+session_path

if not os.path.exists(directory_path):
    os.makedirs(directory_path)

CA1_spike_count_piv.to_hdf(directory_path+f'CA1_spike_count_{FCon_session_1_id}_pivot.h5', key='df', mode='w')
VISp_spike_count_piv.to_hdf(directory_path+f'VISp_spike_count_{FCon_session_1_id}_pivot.h5', key='df', mode='w')

behavior_df.to_hdf(directory_path+f'NaturalMovie_Behavior_{FCon_session_1_id}_df.h5', key='df', mode='w')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->floating,key->block1_values] [items->Index(['stimulus_block', 'frame'], dtype='object')]

  behavior_df.to_hdf(directory_path+f'NaturalMovie_Behavior_{FCon_session_1_id}_df.h5', key='df', mode='w')


## Clean Data + Normalize

In [74]:
def normalize_column(df, column_name):
    """
    Normalize a specified column in a DataFrame.

    Parameters:
    - df: DataFrame
        The DataFrame containing the column to be normalized.
    - column_name: str
        The name of the column to be normalized.
    """
    # Check if the specified column exists in the DataFrame
    if column_name not in df.columns:
        raise ValueError(f"Column '{column_name}' not found in the DataFrame.")

    # Normalize the specified column
    # if "position" in column_name:
    #     normalized_column = df[column_name] / df[column_name].max()

    normalized_column = (df[column_name] - df[column_name].min()) / (df[column_name].max() - df[column_name].min())
    

    # Create a new DataFrame with the normalized column
    df[column_name+"_norm"] = normalized_column


In [71]:
# Read Data back in
FCon_session_1_id = 771160300

data_path = "/Users/rp/Desktop/Research/CN^3/Thesis Material/2-Region-Latent-Alignment/data/processed/"
session_path = f"session_{FCon_session_1_id}/"
directory_path = data_path+session_path

behavior_df=pd.read_hdf(directory_path+f'NaturalMovie_Behavior_{FCon_session_1_id}_df.h5', key='df')

### Handle NA values
- Position/velocity
- Pupil size

In [72]:
behavior_df.isna().sum()

stimulus_block    0
duration          0
start_time        0
stop_time         0
trial             0
frame             0
velocity          0
position          0
total_distance    0
pupil_size        0
dtype: int64

In [69]:
# Interpolate missing values for 'velocity' column
behavior_df['velocity_clean'] = behavior_df['velocity'].interpolate(limit_area='inside')

# Interpolate missing values for 'position_clean' column
behavior_df['position_clean'] = behavior_df['position'].interpolate(limit_area='inside')

# Interpolate missing values for 'pupil_size_clean' column
behavior_df['pupil_size_clean'] = behavior_df['pupil_size'].interpolate(limit_area='inside')

In [70]:
# Define the chunk size (number of rows per trial)
chunk_size = 900

# Calculate the total number of trials
num_trials = len(behavior_df) // chunk_size

# Create a 'trial' column with unique identifiers for each trial
behavior_df['trial'] = sum([[i] * chunk_size for i in range(num_trials)], [])

# Define a function to compute total distance for each trial
def compute_total_distance(chunk):
    chunk['total_distance'] = chunk['position_clean'].diff().abs().cumsum()
    chunk['total_distance'].iloc[0] = 0  # Set the first value to 0
    return chunk

# Apply the function to each trial separately using groupby
behavior_df['total_distance'] = behavior_df.groupby('trial').apply(compute_total_distance)['total_distance']

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  behavior_df['total_distance'] = behavior_df.groupby('trial').apply(compute_total_distance)['total_distance']


In [71]:
behavior_df

Unnamed: 0_level_0,stimulus_block,duration,start_time,stop_time,frame,velocity,position,pupil_size,pupil_center_x_normalized,pupil_center_y_normalized,velocity_clean,position_clean,pupil_size_clean,trial,total_distance
stimulus_presentation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
4339,3.0,0.033361,2297.212884,2297.246246,0.0,-0.829370,0.000000,3788.108714,0.961539,0.800420,-0.829370,0.000000,3788.108714,0,0.000000
4340,3.0,0.033361,2297.246246,2297.279607,1.0,0.032865,0.032865,3829.684818,0.960886,0.799850,0.032865,0.032865,3829.684818,0,0.032865
4341,3.0,0.033361,2297.279607,2297.312968,2.0,0.453049,0.485914,3823.329682,0.960392,0.799987,0.453049,0.485914,3823.329682,0,0.485914
4342,3.0,0.033361,2297.312968,2297.346330,3.0,-0.425225,0.060689,3716.425171,0.959373,0.801302,-0.425225,0.060689,3716.425171,0,0.911139
4343,3.0,0.033361,2297.346330,2297.379691,4.0,-0.398370,-0.337681,3774.721457,0.958592,0.801187,-0.398370,-0.337681,3774.721457,0,1.309509
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76936,8.0,0.033361,8302.063780,8302.097141,895.0,1.360972,7.022430,6719.909899,1.053908,0.705983,1.360972,7.022430,6719.909899,59,645.608423
76937,8.0,0.033361,8302.097141,8302.130501,896.0,-1.424704,5.597727,6719.909899,1.053908,0.705983,-1.424704,5.597727,6719.909899,59,647.033126
76938,8.0,0.033361,8302.130501,8302.163862,897.0,0.408880,6.006607,6317.194557,1.056777,0.712099,0.408880,6.006607,6317.194557,59,647.442007
76939,8.0,0.033361,8302.163862,8302.197223,898.0,0.129173,6.135780,6308.315794,1.059431,0.705928,0.129173,6.135780,6308.315794,59,647.571180


### Normalize columns

In [75]:
cols_to_normalize = ['frame', 'position_clean', 'pupil_size_clean', 'total_distance']
for col in cols_to_normalize:
    normalize_column(behavior_df, col)

In [76]:
behavior_df

Unnamed: 0_level_0,stimulus_block,duration,start_time,stop_time,trial,frame,velocity,position,total_distance,pupil_size,velocity_norm
stimulus_presentation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
4339,3.0,0.033360,2297.386085,2297.419445,0,0.000000,-1.189573,0.002976,0.000000,0.372302,0.185017
4340,3.0,0.033360,2297.419445,2297.452805,0,0.001112,0.509527,0.003016,0.000039,0.369977,0.215832
4341,3.0,0.033360,2297.452805,2297.486166,0,0.002225,-1.181293,0.002924,0.000131,0.369158,0.185167
4342,3.0,0.033360,2297.486166,2297.519526,0,0.003337,-0.232152,0.002906,0.000149,0.375585,0.202381
4343,3.0,0.033360,2297.519526,2297.552887,0,0.004449,1.558697,0.003027,0.000270,0.363722,0.234859
...,...,...,...,...,...,...,...,...,...,...,...
76936,8.0,0.033361,8302.237438,8302.270799,59,0.995551,1.896312,0.003106,0.076379,0.231060,0.240982
76937,8.0,0.033361,8302.270799,8302.304161,59,0.996663,-1.112173,0.003020,0.076465,0.228712,0.186421
76938,8.0,0.033361,8302.304161,8302.337522,59,0.997775,0.034616,0.003022,0.076467,0.236061,0.207219
76939,8.0,0.033361,8302.337522,8302.370883,59,0.998888,2.341220,0.003205,0.076649,0.242728,0.249051


In [73]:
behavior_to_save = ['trial', 'frame_norm', 'velocity_clean','position_clean_norm', 'total_distance_norm','pupil_size_clean_norm']
aux_cols = list(behavior_df.columns.values[:4])

df = behavior_df[aux_cols + behavior_to_save]

#### Prepare and package data for analysis

In [74]:
# Define a mapping dictionary for renaming columns
mapping = {
    'frame_norm': 'frame',
    'velocity_clean': 'velocity',
    'position_clean_norm': 'position',
    'pupil_size_clean_norm': 'pupil_size',
    'total_distance_norm' : 'total_distance'
}

# Rename the columns using the mapping dictionary
df = df.rename(columns=lambda x: mapping.get(x, x))

### Unit Testing

In [75]:
class TestDataframe:
    def __init__(self, df):
        self.df = df
        self.failed_tests = []
        self.behavior_variables = ['frame', 'position', 'pupil_size', 'total_distance']

    def test_no_na_values(self):
        # Check for no NA values in the specified columns
        print("\n---- NA Test ----")
        for col in self.behavior_variables:
            if self.df[col].isna().any():
                self.failed_tests.append(f"No NA values found in column '{col}'.")
            else:
                print(f"Test passed: No NA values found in column '{col}'.")

    def test_values_in_range(self):
        # Check if all values in specified columns lie within the range [0, 1]
        print("\n---- Normalized test ----")
        for col in self.behavior_variables:
            if not all(0 <= self.df[col]) or not all(self.df[col] <= 1):
                self.failed_tests.append(f"Not all values in column '{col}' are normalized.")
            else:
                print(f"Test passed: All values in column '{col}' are normalized.")

    def run_all_tests(self):
        self.test_no_na_values()
        self.test_values_in_range()

        print("\n---- Test Summary ----")
        if not self.failed_tests:
            print("All tests passed!")
        else:
            print("Failed tests:")
            for test in self.failed_tests:
                print(test)
                
        num_tests = 2
        print(f"\nTotal passed tests: {len(self.behavior_variables) * num_tests - len(self.failed_tests)}")
        print(f"Total failed tests: {len(self.failed_tests)}")

In [76]:
unitTest = TestDataframe(df)
unitTest.run_all_tests()


---- NA Test ----
Test passed: No NA values found in column 'frame'.
Test passed: No NA values found in column 'position'.
Test passed: No NA values found in column 'pupil_size'.
Test passed: No NA values found in column 'total_distance'.

---- Normalized test ----
Test passed: All values in column 'frame' are normalized.
Test passed: All values in column 'position' are normalized.
Test passed: All values in column 'pupil_size' are normalized.
Test passed: All values in column 'total_distance' are normalized.

---- Test Summary ----
All tests passed!

Total passed tests: 8
Total failed tests: 0


### Store data

In [77]:
df['frame'] = df['frame'].astype('float64')

In [78]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 54000 entries, 4339 to 76940
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   stimulus_block  54000 non-null  object 
 1   duration        54000 non-null  float64
 2   start_time      54000 non-null  float64
 3   stop_time       54000 non-null  float64
 4   trial           54000 non-null  int64  
 5   frame           54000 non-null  float64
 6   velocity        54000 non-null  float64
 7   position        54000 non-null  float64
 8   total_distance  54000 non-null  float64
 9   pupil_size      54000 non-null  float64
dtypes: float64(8), int64(1), object(1)
memory usage: 4.5+ MB


In [79]:
FCon_session_1_id = 766640955

data_path = "../data/"
session_path = f"session_{FCon_session_1_id}/"
directory_path = data_path+session_path

if not os.path.exists(directory_path):
    os.makedirs(directory_path)

df.to_hdf(directory_path+f'NaturalMovie_Behavior_{FCon_session_1_id}_normalized.h5', key='df', mode='w')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->floating,key->block2_values] [items->Index(['stimulus_block'], dtype='object')]

  df.to_hdf(directory_path+f'NaturalMovie_Behavior_{FCon_session_1_id}_normalized.h5', key='df', mode='w')


### Re-pickle data

In [1]:
import os
import pandas as pd

# FCon_session_1_id = 766640955
ecephys_session_id = 771160300

data_path = "/Users/rp/Desktop/Research/CN^3/Thesis Material/2-Region-Latent-Alignment/data/processed/"
session_path = f"session_{ecephys_session_id}/"
directory_path = data_path+session_path

if not os.path.exists(directory_path):
    os.makedirs(directory_path)

# df.to_hdf(directory_path+f'NaturalMovie_Behavior_{FCon_session_1_id}_normalized.h5', key='df', mode='w')
brain_region = 'CA1'

# Read spike count data pivoted by neurons and time bins
spike_data_file = f'{brain_region}_spike_count_{ecephys_session_id}_pivot.h5'
spike_file_path = os.path.join(directory_path, spike_data_file)
if not os.path.exists(spike_file_path):
    raise FileNotFoundError(f"File {spike_file_path} does not exist")
spike_count_pivot = pd.read_hdf(spike_file_path, key='df')

# Read behavior data
behavior_data_file = f'NaturalMovie_Behavior_{ecephys_session_id}_normalized.h5'
behavior_file_path = os.path.join(directory_path, behavior_data_file)
if not os.path.exists(behavior_file_path):
    raise FileNotFoundError(f"File {behavior_file_path} does not exist")
behavior_data_df = pd.read_hdf(behavior_file_path, key='df')


In [6]:
#, key='data', format='table', pickle_protocol=4)

spike_data_file = f'{brain_region}_spike_count_{ecephys_session_id}_pivot_p4.h5'
spike_file_path = os.path.join(directory_path, spike_data_file)
# if not os.path.exists(spike_file_path):
#     raise FileNotFoundError(f"File {spike_file_path} does not exist")
spike_count_pivot.to_hdf(spike_file_path, key='data', format='table', pickle_protocol=4)

# Read behavior data
behavior_data_file = f'NaturalMovie_Behavior_{ecephys_session_id}_normalized_p4.h5'
behavior_file_path = os.path.join(directory_path, behavior_data_file)
# if not os.path.exists(behavior_file_path):
#     raise FileNotFoundError(f"File {behavior_file_path} does not exist")
behavior_data_df.to_hdf(behavior_file_path, key='data', format='table', pickle_protocol=4)

TypeError: NDFrame.to_hdf() got an unexpected keyword argument 'pickle_protocol'

In [2]:
spike_data_file = f'{brain_region}_spike_count_{ecephys_session_id}_pivot_p4.csv'
spike_file_path = os.path.join(directory_path, spike_data_file)
# if not os.path.exists(spike_file_path):
#     raise FileNotFoundError(f"File {spike_file_path} does not exist")
spike_count_pivot.to_csv(spike_file_path, index=False)

# Read behavior data
behavior_data_file = f'NaturalMovie_Behavior_{ecephys_session_id}_normalized_p4.csv'
behavior_file_path = os.path.join(directory_path, behavior_data_file)
# if not os.path.exists(behavior_file_path):
#     raise FileNotFoundError(f"File {behavior_file_path} does not exist")
behavior_data_df.to_csv(behavior_file_path, index=False)

In [9]:
spike_count_pivot.values

AttributeError: 'numpy.ndarray' object has no attribute 'nunique'

In [10]:
import numpy as np

np.unique(spike_count_pivot.values)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 13])

In [11]:
spike_count_pivot.shape

(54000, 82)