In [46]:
# # general imports 
import os
from pathlib import Path
import numpy as np
import pandas as pd 

# nwb specific imports 
import pynwb
from nwbwidgets import nwb2widget
from hdmf_zarr import NWBZarrIO 

In [106]:
# Load metadata CSV 

metadata = pd.read_csv('/data/metadata/V1DD_metadata.csv') # V1DD 
metadata.head()

Unnamed: 0,project_name,_id,name,subject_id,golden_mouse,genotype,date_of_birth,sex,modality,session_date,age,session_time,column,volume
0,V1 Deep Dive,3b85c659-20c8-438f-ab58-de1aac3b81cf,416296_2018-11-29_12-08-40_nwb_2025-08-08_16-2...,416296,False,Camk2a-tTA/wt;tetO-GCaMP6s/wt,2018-08-06,Female,"['Planar optical physiology', 'Behavior videos']",2018-11-29,115,12:08:40.014190,2,5
1,V1 Deep Dive,b98ce4a9-a66b-4b70-baa6-95ef451ae087,427836_2019-04-25_12-16-58_nwb_2025-08-08_16-2...,427836,False,Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai94(TITL-G...,2018-10-08,Female,"['Planar optical physiology', 'Behavior videos']",2019-04-25,199,12:16:58.240890,5,3
2,V1 Deep Dive,5cccd09c-4ae8-4d8e-ae92-23099d22bbe2,427836_2019-04-24_13-06-45_nwb_2025-08-08_16-2...,427836,False,Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai94(TITL-G...,2018-10-08,Female,"['Planar optical physiology', 'Behavior videos']",2019-04-24,198,13:06:45.257460,4,3
3,V1 Deep Dive,911d215f-dafa-4b39-86f7-b36336974da1,427836_2019-04-25_13-49-39_nwb_2025-08-08_16-2...,427836,False,Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai94(TITL-G...,2018-10-08,Female,"['Planar optical physiology', 'Behavior videos']",2019-04-25,199,13:49:39.163550,5,4
4,V1 Deep Dive,c918f874-d534-4341-b0dc-f1aba388af66,427836_2019-04-26_12-54-40_nwb_2025-08-08_16-2...,427836,False,Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai94(TITL-G...,2018-10-08,Female,"['Planar optical physiology', 'Behavior videos']",2019-04-26,200,12:54:40.332660,2,4


In [9]:
metadata.columns

Index(['project_name', '_id', 'name', 'subject_id', 'golden_mouse', 'genotype',
       'date_of_birth', 'sex', 'modality', 'session_date', 'age',
       'session_time', 'column', 'volume'],
      dtype='object')

<div style="background: #DFF0D8; border-radius: 3px; padding: 10px;">

**Exercise:** What modality was used?
</div>

In [3]:
metadata.modality.unique()

array(["['Planar optical physiology', 'Behavior videos']"], dtype=object)

<div style="background: #DFF0D8; border-radius: 3px; padding: 10px;">

**Exercise:** How many mice were used in this dataset?
</div>

In [6]:
len(metadata.subject_id.unique())

4

<div style="background: #DFF0D8; border-radius: 3px; padding: 10px;">

**Exercise:** How many sessions? How many sessions per mouse? 
</div>

In [7]:
len(metadata.name.unique())

100

In [8]:
metadata['subject_id'].value_counts()

subject_id
416296    25
427836    25
438833    25
409828    25
Name: count, dtype: int64

<div style="background: #DFF0D8; border-radius: 3px; padding: 10px;">

**Exercise:** What transgenic lines were used in this dataset? 
    
What cell types do these transgenic lines target? Refer to the [Data Book](https://allenswdb.github.io/background/transgenic-tools.html) for this. 
</div>

In [12]:
metadata.genotype.unique()

array(['Camk2a-tTA/wt;tetO-GCaMP6s/wt',
       'Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai94(TITL-GCaMP6s)/wt'],
      dtype=object)

In [31]:
# Load session data 

# Get the asset name 
session_name = metadata['name'].iloc[0]

# Find the filepath 
data_dir = os.path.join(r'/data/416296_V1DD/', session_name)
nwb_path_zarr = list(Path(data_dir).glob("*.nwb.zarr"))[0]

print('Found filepath:', nwb_path_zarr) 

Found filepath: /data/416296_V1DD/416296_2018-11-29_12-08-40_nwb_2025-08-08_16-27-42/416296_2018-11-29_12-08-40_nwb_2025-08-08_16-27-42.nwb.zarr


In [32]:
io = NWBZarrIO(nwb_path_zarr, mode = 'r') 
nwbfile_zarr = io.read()

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


In [33]:
nwb2widget(nwbfile_zarr)

VBox(children=(HBox(children=(Label(value='session_description:', layout=Layout(max_height='40px', max_width='…

<div style="background: #DFF0D8; border-radius: 3px; padding: 10px;">

**Exercise**: What units/neurons are available? How many neurons were recorded in this session? Has the data been pre-filtered based on quality control metrics? 

In [108]:
# Look at all extracted ROIs 
image_segmentation_plane0 = nwbfile_zarr.processing['plane-0'].data_interfaces['image_segmentation'].plane_segmentations['roi_table'].to_dataframe()
image_segmentation_plane1 = nwbfile_zarr.processing['plane-1'].data_interfaces['image_segmentation'].plane_segmentations['roi_table'].to_dataframe()
image_segmentation_plane2 = nwbfile_zarr.processing['plane-2'].data_interfaces['image_segmentation'].plane_segmentations['roi_table'].to_dataframe()
image_segmentation_plane3 = nwbfile_zarr.processing['plane-3'].data_interfaces['image_segmentation'].plane_segmentations['roi_table'].to_dataframe()
image_segmentation_plane4 = nwbfile_zarr.processing['plane-4'].data_interfaces['image_segmentation'].plane_segmentations['roi_table'].to_dataframe()
image_segmentation_plane5 = nwbfile_zarr.processing['plane-5'].data_interfaces['image_segmentation'].plane_segmentations['roi_table'].to_dataframe()

In [109]:
# Are all neurons available or is it pre-filtered? 
image_segmentation_plane0.is_soma.unique()

array([ True, False])

In [113]:
# Calculate total valid neurons (somas) across imaging planes 

print(len(image_segmentation_plane0[image_segmentation_plane0.is_soma == True]) + 
    len(image_segmentation_plane1[image_segmentation_plane1.is_soma == True]) + 
    len(image_segmentation_plane2[image_segmentation_plane2.is_soma == True]) + 
    len(image_segmentation_plane3[image_segmentation_plane3.is_soma == True]) + 
    len(image_segmentation_plane4[image_segmentation_plane4.is_soma == True]) + 
    len(image_segmentation_plane5[image_segmentation_plane5.is_soma == True]))

535


<div style="background: #DFF0D8; border-radius: 3px; padding: 10px;">

**Exercise**: What stimuli, behaviors, and conditions were applied or recorded in this experiment? 

In [85]:
epochs = nwbfile_zarr.intervals['epochs'].to_dataframe()
epochs.stim_name.unique()

array(['drifting_gratings_full', 'drifting_gratings_windowed',
       'locally_sparse_noise', 'spontaneous', 'natural_images_12',
       'natural_movie', 'natural_images'], dtype=object)

In [83]:
nwbfile_zarr.processing['behavior'].data_interfaces

{'running_speed': running_speed pynwb.base.TimeSeries at 0x139917829182640
 Fields:
   comments: no comments
   conversion: 1.0
   data: <zarr.core.Array '/processing/behavior/running_speed/data' (209040,) float32 read-only>
   description: Running speed of animal on wheel
   interval: 1
   offset: 0.0
   resolution: -1.0
   timestamps: <zarr.core.Array '/processing/behavior/running_speed/timestamps' (209040,) float64 read-only>
   timestamps_unit: seconds
   unit: cm/s,
 'corneal_reflection': corneal_reflection hdmf.common.table.DynamicTable at 0x139917829184752
 Fields:
   colnames: ['timestamps' 'ellipse_long_axis' 'ellipse_short_axis' 'area']
   columns: (
     timestamps <class 'hdmf.common.table.VectorData'>,
     ellipse_long_axis <class 'hdmf.common.table.VectorData'>,
     ellipse_short_axis <class 'hdmf.common.table.VectorData'>,
     area <class 'hdmf.common.table.VectorData'>
   )
   description: DLC corneal tracking data
   id: id <class 'hdmf.common.table.ElementIdentifie

<div style="background: #DFF0D8; border-radius: 3px; padding: 10px;">

**Exercise**: How long is a session? 

In [137]:
(epochs['stop_time'].iloc[10]-epochs['start_time'].iloc[0])/60

np.float64(58.04888973236084)

<div style="background: #DFF0D8; border-radius: 3px; padding: 10px;">

**Exercise**: What is the sampling rate? 

see data book: https://allenswdb.github.io/physiology/ophys/V1DD/V1DD-overview.html

6Hz/plane

<div style="background: #DFF0D8; border-radius: 3px; padding: 10px;">

**Exercise**: Did the mice receive behavior training before recording sessions? Are the recording sessions longitudinal? 

Looking at data names, mice were recorded for several days. Same area presumably? Question for data book 