In [2]:
## NB: YOU MUST DOWNGRADE TO PANDAS 1.23 (for Python <= 3.8) OR PICKLED DFs WILL BE UNREADABLE BY COLAB

# Import packages
import os
import requests
import numpy as np
import pandas as pd
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.manifold import TSNE

import mindscope_utilities
import mindscope_utilities.visual_behavior_ophys as ophys

from allensdk.brain_observatory.behavior.behavior_project_cache import VisualBehaviorOphysProjectCache

pd.set_option('display.max_columns', 500)

# Mount Google Drive
try:
  import google.colab
  google.colab.drive.mount('/content/drive')
  cache_directory = "/temp/"  # Note: this path must exist on your local drive
  save_data_directory = '/content/drive/MyDrive/'
  IN_COLAB = True
except:
  cache_directory = "./cache/"  # Note: this path must exist on your local drive
  save_data_directory = './'
  IN_COLAB = False

In [3]:
# Get preprocessed dataset
# fname = "allen_visual_behavior_2p_change_detection_familiar_novel_image_sets.parquet"
# url = "https://ndownloader.figshare.com/files/28470255"
# if not os.path.isfile(fname):
#   try:
#     r = requests.get(url)
#   except requests.ConnectionError:
#     print("!!! Failed to download data !!!")
#   else:
#     if r.status_code != requests.codes.ok:
#       print("!!! Failed to download data !!!")
#     else:
#       with open(fname, "wb") as fid:
#         fid.write(r.content)

# filename = "allen_visual_behavior_2p_change_detection_familiar_novel_image_sets.parquet"
# preprocessed_data = pd.read_parquet(filename)
# preprocessed_data.head(5)

# print(preprocessed_data.session_type.unique())
# print(np.sort(preprocessed_data.query('cre_line == "Vip-IRES-Cre" and session_type == "OPHYS_3_images_B"')['mouse_id'].unique()))
# print(np.sort(preprocessed_data.query('cre_line == "Vip-IRES-Cre" and session_type == "OPHYS_4_images_A"')['mouse_id'].unique()))

In [4]:
# Get high-level csv files from S3 storage
cache = VisualBehaviorOphysProjectCache.from_s3_cache(cache_dir=cache_directory)

In [5]:
# Load session table (and experiment table if desired) as data frames
session_table = cache.get_ophys_session_table()
#ophys_session_ids = session_table.index
#experiment_table = cache.get_ophys_experiment_table()
print(session_table.session_type.unique())
print(session_table.cre_line.unique())

['OPHYS_1_images_A' 'OPHYS_2_images_A_passive' 'OPHYS_3_images_A'
 'OPHYS_4_images_B' 'OPHYS_5_images_B_passive' 'OPHYS_6_images_B'
 'OPHYS_1_images_B' 'OPHYS_2_images_B_passive' 'OPHYS_3_images_B'
 'OPHYS_4_images_A' 'OPHYS_6_images_A' 'OPHYS_5_images_A_passive'
 'OPHYS_1_images_G' 'OPHYS_3_images_G' 'OPHYS_2_images_G_passive'
 'OPHYS_4_images_H' 'OPHYS_5_images_H_passive' 'OPHYS_6_images_H']
['Sst-IRES-Cre' 'Vip-IRES-Cre' 'Slc17a7-IRES2-Cre']


In [6]:
# Select a subset of mouse cre lines and sessions
target_cre_lines = ["Slc17a7-IRES2-Cre"]
target_session_types = ["OPHYS_1_images_B", "OPHYS_3_images_B", "OPHYS_4_images_A", "OPHYS_6_images_A"];
session_table_subset = session_table.query('cre_line in @target_cre_lines and session_type in @target_session_types')

# Determine which mice in this subset have data from ALL sessions
#session_table_subset.groupby(['mouse_id','session_type'])['age_in_days'].mean()
mouse_sessions = pd.DataFrame(session_table_subset.groupby(['mouse_id',])['session_type'].unique())
mouse_sessions['has_all_sessions'] = mouse_sessions.apply(lambda x: len(x['session_type'])==4, axis=1)
target_mouse_ids = mouse_sessions.query('has_all_sessions').index
print(target_mouse_ids)

Int64Index([461946, 462468, 462544, 464204, 464207, 476067, 476631, 479458,
            491060, 512458, 513626, 533161, 539517, 539518, 548950],
           dtype='int64', name='mouse_id')


In [7]:
# Loop over sessions for this mouse
# note that there may be different subsets of cells in different sessions
# some sessions will have multiple experiments (imaging planes), some may only have one
#all_data = []
#for mouse_id in target_mouse_ids[:3]:
save_session = True
for mouse_id in target_mouse_ids[2:4]:
  print('--------------')
  ophys_session_ids = session_table.query('mouse_id == @mouse_id').index
  print(f'Mouse ID {mouse_id} includes {len(ophys_session_ids)} sessions')
  mouse_data = []
  for ophys_session_id in ophys_session_ids:
    if save_session:
      if f'{target_cre_lines[0][:3]}_mouse{mouse_id}_sess{ophys_session_id}.pkl' in os.listdir(save_data_directory):
        continue
      else:
        mouse_data = []
    # Get session metadata
    session_metadata = session_table.loc[ophys_session_id]
    # Download experiments for each session
    experiments = {} # clear experiments dictionary
    ophys_experiment_ids = session_metadata['ophys_experiment_id']
    print(f'Session ID {ophys_session_id} includes {len(ophys_experiment_ids)} experiments: {np.array(ophys_experiment_ids)}')
    for ophys_experiment_id in ophys_experiment_ids:
      experiments[ophys_experiment_id] = cache.get_behavior_ophys_experiment(ophys_experiment_id)
    # stimulus table is shared for all experiments in a session
    stimulus_table = experiments[ophys_experiment_ids[0]].stimulus_presentations.drop(columns = ['image_set'])  # 'image_set' is unnecessary
    # Download neural data for each experiment
    neural_data = []
    for ophys_experiment_id in experiments.keys():
      this_experiment = experiments[ophys_experiment_id]
      this_experiment_neural_data = ophys.build_tidy_cell_df(this_experiment)
      # add some columns with metadata for the experiment
      metadata_keys = [
          'ophys_experiment_id',
          'ophys_session_id',
          'targeted_structure',
          'imaging_depth',
          'equipment_name',
          'cre_line',
          'mouse_id',
          'sex',
      ]
      for metadata_key in metadata_keys:
        this_experiment_neural_data[metadata_key] = this_experiment.metadata[metadata_key]

      # append the data for this experiment to a list
      neural_data.append(this_experiment_neural_data)
      print(f'Experiment {ophys_experiment_id} includes {len(this_experiment_neural_data["cell_specimen_id"].unique())} cells.')
      #: {np.array(this_experiment_neural_data["cell_specimen_id"].unique())}')

    # concatate the list of dataframes into a single dataframe
    neural_data = pd.concat(neural_data)

    full_etr_l = []
    # Get the experiment-long timeseries for each cell
    for cell_specimen_id in tqdm(neural_data['cell_specimen_id'].unique()):
      # calculate the event triggered response for this cell to every stimulus
      full_etr_this_cell = mindscope_utilities.event_triggered_response(
          data=neural_data.query('cell_specimen_id == @cell_specimen_id'),
          t='timestamps',
          y='dff', #'dff', 'events', 'filtered_events'
          event_times=stimulus_table['start_time'],
          t_before=0.5,
          t_after=0.75,
          output_sampling_rate=30, # Hz (so 30 = every 33 ms)
      )
      # add a column identifying the cell_specimen_id
      full_etr_this_cell['cell_specimen_id'] = cell_specimen_id
      # append to our list
      full_etr_l.append(full_etr_this_cell)

    # concatenate our list of dataframes into a single dataframe
    full_etr = pd.concat(full_etr_l)

    # cast these numeric columns to int and float, respectively
    full_etr['event_number'] = full_etr['event_number'].astype(int)
    full_etr['event_time'] = full_etr['event_number'].astype(float)

    # Compute the average firing rate
    # Default is the average from 0 to 500 ms
    # We will also compute a baseline-corrected version 'dff_bc'
    average_0_to_500 = full_etr.query('time >= 0 and time <= .5').groupby(['cell_specimen_id', 'stimulus_presentations_id'])[['dff']].mean().reset_index()
    average_n133_to_n33 = full_etr.query('time > -.15 and time < 0').groupby(['cell_specimen_id', 'stimulus_presentations_id'])[['dff']].mean().reset_index()
    dff_bc = average_0_to_500['dff']-average_n133_to_n33['dff']

    average_responses = average_0_to_500.merge(
      stimulus_table,
      on='stimulus_presentations_id',
      how='left'
    )
    average_responses['dff_stim500'] = average_0_to_500['dff']
    average_responses['dff_bc'] = dff_bc
    average_responses['trace'] = full_etr.groupby(['cell_specimen_id','stimulus_presentations_id'])['dff'].apply(list).values
    average_responses['trace_timestamps'] = full_etr.groupby(['cell_specimen_id','stimulus_presentations_id'])['time'].apply(list).values
    additional_metadata = neural_data.groupby('cell_specimen_id')[metadata_keys].max().reset_index()
    average_responses = average_responses.merge(additional_metadata,on='cell_specimen_id',how='left')
    mouse_data.append(average_responses)

    if save_session:
      mouse_data = pd.concat(mouse_data)
      # Add some additional metadata
      lol = session_table.query('mouse_id == @mouse_id')
      lol = lol.groupby(['ophys_session_id']).max().reset_index()
      lol['exposure_level'] = lol['session_number']>3
      lol = lol.replace({'exposure_level': {False:'familiar',True:'novel'}})
      mouse_data = mouse_data.merge(
          lol[['mouse_id','ophys_session_id','session_number','exposure_level']],
          on=['mouse_id','ophys_session_id'],
          how='left'
      )

      # remove some unnecessary data
      unneeded = ['start_time','stop_time','duration','image_index','start_frame','end_frame','equipment_name','trace_timestamps']
      mouse_data = mouse_data.drop(unneeded,axis=1)

      # change data types for memory efficiency
      change_type_dict = {'cell_specimen_id': 'category',
                          'stimulus_presentations_id':'int16',
                          'dff': 'float16',
                          'dff_stim500': 'float16',
                          'image_name': 'category',
                          'dff_bc': 'float16',
                          'ophys_experiment_id': 'category',
                          'ophys_session_id': 'category',
                          'targeted_structure': 'category',
                          'imaging_depth': 'int16',
                          'cre_line': 'category',
                          'mouse_id': 'category',
                          'sex': 'category',
                          'session_number': 'int8',
                          'exposure_level': 'category'
                          }
      mouse_data = mouse_data.astype(change_type_dict)

      mouse_data.to_pickle(f'{save_data_directory}{target_cre_lines[0][:3]}_mouse{mouse_id}_sess{ophys_session_id}.pkl')

  # # Finished a mouse!
  # mouse_data = pd.concat(mouse_data)

  # # Add some additional metadata
  # lol = session_table.query('mouse_id == @mouse_id')
  # lol = lol.groupby(['ophys_session_id']).max().reset_index()
  # lol['exposure_level'] = lol['session_number']>3
  # lol = lol.replace({'exposure_level': {False:'familiar',True:'novel'}})
  # mouse_data = mouse_data.merge(
  #     lol[['mouse_id','ophys_session_id','session_number','exposure_level']],
  #     on=['mouse_id','ophys_session_id'],
  #     how='left'
  # )

  # # remove some unnecessary data
  # unneeded = ['start_time','stop_time','duration','image_index','start_frame','end_frame','equipment_name','trace_timestamps']
  # mouse_data = mouse_data.drop(unneeded,axis=1)

  # # change data types for memory efficiency
  # change_type_dict = {'cell_specimen_id': 'category',
  #                     'stimulus_presentations_id':'int16',
  #                     'dff': 'float16',
  #                     'dff_stim500': 'float16',
  #                     'image_name': 'category',
  #                     'dff_bc': 'float16',
  #                     'ophys_experiment_id': 'category',
  #                     'ophys_session_id': 'category',
  #                     'targeted_structure': 'category',
  #                     'imaging_depth': 'int16',
  #                     'cre_line': 'category',
  #                     'mouse_id': 'category',
  #                     'sex': 'category',
  #                     'session_number': 'int8',
  #                     'exposure_level': 'category'
  #                     }
  # mouse_data = mouse_data.astype(change_type_dict)

  # mouse_data.to_pickle(f'{save_data_directory}{target_cre_lines[0][:3]}_mouse{mouse_id}.pkl')
  # #all_data.append(mouse_data)

# Finished all mice!
#all_data = pd.concat(all_data)

--------------
Mouse ID 462544 includes 7 sessions
Session ID 914306708 includes 1 experiments: [914536934]


behavior_ophys_experiment_914536934.nwb: 100%|██████████| 1.07G/1.07G [00:43<00:00, 24.9MMB/s]   


Experiment 914536934 includes 181 cells.


100%|██████████| 181/181 [02:45<00:00,  1.10it/s]


Session ID 914797752 includes 1 experiments: [915150256]


behavior_ophys_experiment_915150256.nwb: 100%|██████████| 1.11G/1.11G [00:50<00:00, 22.1MMB/s]   


Experiment 915150256 includes 194 cells.


100%|██████████| 194/194 [03:00<00:00,  1.07it/s]


Session ID 915587736 includes 1 experiments: [916093599]


behavior_ophys_experiment_916093599.nwb: 100%|██████████| 1.06G/1.06G [00:41<00:00, 25.4MMB/s]   


Experiment 916093599 includes 172 cells.


100%|██████████| 172/172 [02:38<00:00,  1.08it/s]


Session ID 922743776 includes 1 experiments: [923100548]


behavior_ophys_experiment_923100548.nwb: 100%|██████████| 1.07G/1.07G [00:42<00:00, 25.2MMB/s]   


Experiment 923100548 includes 181 cells.


100%|██████████| 181/181 [02:49<00:00,  1.07it/s]


--------------
Mouse ID 464204 includes 6 sessions
Session ID 914163299 includes 1 experiments: [915136302]


behavior_ophys_experiment_915136302.nwb: 100%|██████████| 1.13G/1.13G [00:44<00:00, 25.2MMB/s]   


Experiment 915136302 includes 184 cells.


100%|██████████| 184/184 [02:41<00:00,  1.14it/s]


Session ID 918116930 includes 1 experiments: [918566488]


behavior_ophys_experiment_918566488.nwb: 100%|██████████| 872M/872M [00:34<00:00, 25.5MMB/s]    


Experiment 918566488 includes 152 cells.


100%|██████████| 152/152 [02:20<00:00,  1.08it/s]


Session ID 918718550 includes 1 experiments: [919325924]


behavior_ophys_experiment_919325924.nwb: 100%|██████████| 1.02G/1.02G [01:30<00:00, 11.3MMB/s]   


Experiment 919325924 includes 190 cells.


100%|██████████| 190/190 [02:57<00:00,  1.07it/s]


Session ID 928146339 includes 1 experiments: [929136322]


behavior_ophys_experiment_929136322.nwb: 100%|██████████| 1.01G/1.01G [00:39<00:00, 25.3MMB/s]   


Experiment 929136322 includes 190 cells.


100%|██████████| 190/190 [02:59<00:00,  1.06it/s]


Session ID 929255931 includes 1 experiments: [929591721]


behavior_ophys_experiment_929591721.nwb: 100%|██████████| 947M/947M [00:39<00:00, 24.0MMB/s]    


Experiment 929591721 includes 153 cells.


100%|██████████| 153/153 [02:26<00:00,  1.05it/s]


Session ID 929688369 includes 1 experiments: [930785836]


behavior_ophys_experiment_930785836.nwb: 100%|██████████| 877M/877M [00:35<00:00, 24.8MMB/s]    


Experiment 930785836 includes 141 cells.


100%|██████████| 141/141 [02:15<00:00,  1.04it/s]


In [8]:
pd.__version__

'1.4.3'

In [10]:
# To access categorical information
#all_data1['image_name'].cat.categories
#all_data1['image_name'].cat.codes

# The trace timestamps
#trace_timestamps = np.linspace(-0.5,0.7333333333333334,38)

obj = pd.read_pickle('Sst_mouse470784.pkl')