#### Using master branch on AllenSDK and master branch on VBA (current as of 5/18/20)

In [2]:
import os
import numpy as np
import pandas as pd

import seaborn as sns
sns.set_context('notebook', font_scale=1.5, rc={'lines.markeredgewidth': 2})

In [3]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

### get ophys experiments for analysis

The VBA <code>data_access</code> module provides useful functions for identifying and loading experiments to analyze. 

In [4]:
import visual_behavior.data_access.loading as loading 

The <code>get_filtered_ophys_experiment_table()</code> function returns a version of the SDK experiments table (aka manifest) after filtering out unwanted project codes and failed experiments, and adding additional useful columns

In [26]:
help(loading.get_filtered_ophys_experiment_table)

Help on function get_filtered_ophys_experiment_table in module visual_behavior.data_access.loading:

get_filtered_ophys_experiment_table(include_failed_data=False)
    get ophys experiments table from cache, filters based on a number of criteria
        and adds additional useful columns to the table
        Saves a reformatted version of the table with additional columns
        added for future loading speed.
            filtering criteria:
                 project codes: VisualBehavior, VisualBehaviorTask1B,
                                visualBheaviorMultiscope, VisualBheaviorMultiscope4areasx2d
                experiment_workflow_state: "passed"
                "session_type": OPHYS_1_images_A', 'OPHYS_1_images_B',  'OPHYS_1_images_G',
                            'OPHYS_2_images_A_passive',  'OPHYS_2_images_B_passive',  'OPHYS_2_images_G_passive'
                            'OPHYS_3_images_A',  'OPHYS_3_images_B', 'OPHYS_3_images_G',
                            'OPHYS_4_images_A

[autoreload of visual_behavior.data_access.loading failed: Traceback (most recent call last):
  File "C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\IPython\extensions\autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\IPython\extensions\autoreload.py", line 434, in superreload
    module = reload(module)
  File "C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\imp.py", line 314, in reload
    return importlib.reload(module)
  File "C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\importlib\__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 630, in _exec
  File "<frozen importlib._bootstrap_external>", line 724, in exec_module
  File "<frozen importlib._bootstrap_external>", line 860, in get_code
  File "<frozen importlib._bootstrap_external>", line 791, in source_to_code
  File "<f

In [5]:
experiments_table = loading.get_filtered_ophys_experiment_table()

In [6]:
# this table only includes project codes we want to analyze
experiments_table.project_code.unique()

array(['VisualBehaviorMultiscope', 'VisualBehaviorTask1B',
       'VisualBehavior', 'VisualBehaviorMultiscope4areasx2d'],
      dtype=object)

In [7]:
# this table only inclues experiments that passed QC 
experiments_table.experiment_workflow_state.unique()

array(['passed'], dtype=object)

In [8]:
# information available in the experiments table
experiments_table.keys()

Index(['ophys_experiment_id', 'ophys_session_id', 'behavior_session_id',
       'container_id', 'project_code', 'container_workflow_state',
       'experiment_workflow_state', 'session_name', 'session_type',
       'equipment_name', 'date_of_acquisition', 'isi_experiment_id',
       'specimen_id', 'sex', 'age_in_days', 'full_genotype', 'reporter_line',
       'driver_line', 'imaging_depth', 'targeted_structure', 'published_at',
       'super_container_id', 'cre_line', 'session_tags', 'failure_tags',
       'exposure_number', 'location'],
      dtype='object')

In [9]:
experiments_table.head()

Unnamed: 0,ophys_experiment_id,ophys_session_id,behavior_session_id,container_id,project_code,container_workflow_state,experiment_workflow_state,session_name,session_type,equipment_name,...,driver_line,imaging_depth,targeted_structure,published_at,super_container_id,cre_line,session_tags,failure_tags,exposure_number,location
0,953659743,952430817,952554548,1018028345,VisualBehaviorMultiscope,holding,passed,20190923_457841_2imagesA,OPHYS_2_images_A_passive,MESO.1,...,['Sst-IRES-Cre'],225,VISp,,850862430,Sst-IRES-Cre,,"z_drift_corr_um_diff,z_drift_corr_um_diff,z_dr...",0,Sst_VISp_225
3,953659749,952430817,952554548,1018028354,VisualBehaviorMultiscope,holding,passed,20190923_457841_2imagesA,OPHYS_2_images_A_passive,MESO.1,...,['Sst-IRES-Cre'],150,VISl,,850862430,Sst-IRES-Cre,,"z_drift_corr_um_diff,z_drift_corr_um_diff,z_dr...",0,Sst_VISl_150
4,953659752,952430817,952554548,1018028357,VisualBehaviorMultiscope,holding,passed,20190923_457841_2imagesA,OPHYS_2_images_A_passive,MESO.1,...,['Sst-IRES-Cre'],225,VISl,,850862430,Sst-IRES-Cre,,"z_drift_corr_um_diff,z_drift_corr_um_diff,z_dr...",0,Sst_VISl_225
6,953659745,952430817,952554548,1018028339,VisualBehaviorMultiscope,holding,passed,20190923_457841_2imagesA,OPHYS_2_images_A_passive,MESO.1,...,['Sst-IRES-Cre'],75,VISp,,850862430,Sst-IRES-Cre,,"z_drift_corr_um_diff,z_drift_corr_um_diff,z_dr...",0,Sst_VISp_75
8,958527481,954954402,953982960,1018028357,VisualBehaviorMultiscope,holding,passed,20190924_457841_3imagesA,OPHYS_3_images_A,MESO.1,...,['Sst-IRES-Cre'],225,VISl,,850862430,Sst-IRES-Cre,,,0,Sst_VISl_225


### describe the dataset - how many containers or mice per cre line and area?

VBA also provides a function to get a description of the dataset organized by container. This funciton relies on <code>get_filtered_ophys_experiment_table()</code>, so it includes all the experiments described in the <code>experiments_table</code> above.

#### get all containers, including ones in 'holding' (no final container QC, but individual experiments have passed QC)

In [19]:
# handy function in VBA to get a container summary dataframe
container_df = loading.build_container_df()

In [21]:
df = container_df.groupby(['project_code','cre_line','targeted_structure'], sort=True).count()[['container_id']].rename(columns={'container_id':'n_containers'})

# sort production project codes in prefered order
project_codes = ['VisualBehavior','VisualBehaviorTask1B','VisualBehaviorMultiscope','VisualBehaviorMultiscope4areasx2d']
df = df.reindex(project_codes, level=0)

df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,n_containers
project_code,cre_line,targeted_structure,Unnamed: 3_level_1
VisualBehavior,Slc17a7-IRES2-Cre,VISp,25
VisualBehavior,Sst-IRES-Cre,VISp,2
VisualBehavior,Vip-IRES-Cre,VISp,4
VisualBehaviorTask1B,Slc17a7-IRES2-Cre,VISp,9
VisualBehaviorTask1B,Sst-IRES-Cre,VISp,7
VisualBehaviorTask1B,Vip-IRES-Cre,VISp,6
VisualBehaviorMultiscope,Slc17a7-IRES2-Cre,VISl,35
VisualBehaviorMultiscope,Slc17a7-IRES2-Cre,VISp,35
VisualBehaviorMultiscope,Sst-IRES-Cre,VISl,20
VisualBehaviorMultiscope,Sst-IRES-Cre,VISp,20


#### how many mice per condition? 

In [22]:
mouse_summary = container_df.groupby(['project_code','cre_line','specimen_id'], sort=True).count().reset_index()
df = mouse_summary.groupby(['project_code','cre_line']).count()[['specimen_id']].rename(columns={'specimen_id':'n_mice'})
df = df.reindex(project_codes, level=0)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,n_mice
project_code,cre_line,Unnamed: 2_level_1
VisualBehavior,Slc17a7-IRES2-Cre,25
VisualBehavior,Sst-IRES-Cre,2
VisualBehavior,Vip-IRES-Cre,4
VisualBehaviorTask1B,Slc17a7-IRES2-Cre,9
VisualBehaviorTask1B,Sst-IRES-Cre,7
VisualBehaviorTask1B,Vip-IRES-Cre,6
VisualBehaviorMultiscope,Slc17a7-IRES2-Cre,9
VisualBehaviorMultiscope,Sst-IRES-Cre,5
VisualBehaviorMultiscope,Vip-IRES-Cre,9
VisualBehaviorMultiscope4areasx2d,Slc17a7-IRES2-Cre,5


### how many containers & mice have had final container QC completed? 

In [23]:
# QC'd containers have container_workflow_state of completed or container_qc
container_df = container_df[container_df.container_workflow_state.isin(['completed','container_qc'])]

In [24]:
df = container_df.groupby(['project_code','cre_line','targeted_structure'], sort=True).count()[['container_id']].rename(columns={'container_id':'n_containers'})
df = df.reindex(project_codes, level=0)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,n_containers
project_code,cre_line,targeted_structure,Unnamed: 3_level_1
VisualBehavior,Slc17a7-IRES2-Cre,VISp,25
VisualBehavior,Sst-IRES-Cre,VISp,1
VisualBehavior,Vip-IRES-Cre,VISp,4
VisualBehaviorTask1B,Slc17a7-IRES2-Cre,VISp,8
VisualBehaviorTask1B,Sst-IRES-Cre,VISp,4
VisualBehaviorTask1B,Vip-IRES-Cre,VISp,3


#### how many mice per condition?

In [25]:
mouse_summary = container_df.groupby(['project_code','cre_line','specimen_id'], sort=True).count().reset_index()
df = mouse_summary.groupby(['project_code','cre_line']).count()[['specimen_id']].rename(columns={'specimen_id':'n_mice'})
df = df.reindex(project_codes, level=0)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,n_mice
project_code,cre_line,Unnamed: 2_level_1
VisualBehavior,Slc17a7-IRES2-Cre,25
VisualBehavior,Sst-IRES-Cre,1
VisualBehavior,Vip-IRES-Cre,4
VisualBehaviorTask1B,Slc17a7-IRES2-Cre,8
VisualBehaviorTask1B,Sst-IRES-Cre,4
VisualBehaviorTask1B,Vip-IRES-Cre,3
