In [4]:
from dandi.dandiapi import DandiAPIClient
from tqdm.notebook import tqdm

In [5]:
client = DandiAPIClient()
dandisets = list(client.get_dandisets())

# Identify NWB dandisets
Most dandisets hold NWB-formatted data, but DANDI also hold data of other formats. Let's start by filtering down to only the dandisets that contain at least one NWB file. We can do this by querying the metadata of each dandiset, which tells us the data formats within in `raw_metadata["assetsSummary"]["dataStandard"]`. We'll use the RRID of NWB, which is `"RRID:SCR_015242"` and return all dandisets where this RRID is in the list of data standards. If no data has been uploaded to that dandiset, the "dataStandard" field is not present. We handle this by using the `.get` method to iterate over an empty list.

In [8]:
nwb_dandisets = []

for dandiset in tqdm(dandisets):
    raw_metadata = dandiset.get_raw_metadata()
    if any(
        x['identifier'] == 'RRID:SCR_015242'  # this is the RRID for NWB
        for x in raw_metadata['assetsSummary'].get('dataStandard', [])
    ):
        nwb_dandisets.append(dandiset)
len(nwb_dandisets)

  0%|          | 0/210 [00:00<?, ?it/s]

119

# Filtering dandisets: species
Let's use the nwb_dandisets list from the previous recipe and filter down to only NWB dandisets that have mouse data. You can find this information in `raw_metadata["assetsSummary"]["species"]`. We'll use the same `.get` trick as above for if no data has been uploaded. This code will return dandisets that contain mouse data.

In [9]:
mouse_nwb_dandisets = []

for dandiset in tqdm(nwb_dandisets):
    raw_metadata = dandiset.get_raw_metadata()
    if any(
        "mouse" in x["name"]
        for x in raw_metadata["assetsSummary"].get("species", [])):
        mouse_nwb_dandisets.append(dandiset)
len(mouse_nwb_dandisets)

  0%|          | 0/119 [00:00<?, ?it/s]

58

# Filtering by session: species and sex
Let's say you have identified a dandiset of interest, "000005", and you want to identify all of the sessions on female mice. You can do this by querying asset-level metadata. Assets correspond to individual NWB files, and contain metadata extracted from those files. The metadata of each asset contains a `.wasAttributedTo` attribute, which is a list of `Participant` objects corresponding to the subjects for that session. Usually, there is only one subject, but we can iterate over the list in case there are more. Next we want to check the species and sex for each subject. We do that by first testing that attribute exists (is not `None`) and then checking the value of its `name` parameter. This code returns a list of all assets (NWB files) for which that is at least one subject that is a female mouse.

In [26]:
dandiset = client.get_dandiset("000005")
female_mouse_nwb_sessions = []
assets = list(dandiset.get_assets())
for asset in tqdm(assets):
    asset_metadata = asset.get_metadata()
    subjects = asset_metadata.wasAttributedTo
    if any(
        subject.species and "mouse" in subject.species.name.lower() \
        and subject.sex and subject.sex.name == "Female"
        for subject in subjects
    ):
        female_mouse_nwb_sessions.append(asset)
print(f"{len(male_mouse_nwb_sessions)}/{len(assets)}")

  0%|          | 0/148 [00:00<?, ?it/s]

69/148


# Going beyond
These examples show a few common queries, but there is much more. The metadata structures are quite rich on both the dandiset and asset level, and they enable many complex queries beyond the examples here. The `.get_raw_metadata` methods of each provides a nice view into the available fields. For any attribute, it is recommended to first check that it is not `None`, then checking for its value.

In [27]:
dandiset.get_raw_metadata()

{'id': 'DANDI:000005/0.220126.1853',
 'doi': '10.48324/dandi.000005/0.220126.1853',
 'url': 'https://dandiarchive.org/dandiset/000005/0.220126.1853',
 'name': 'Electrophysiology data from thalamic and cortical neurons during somatosensation',
 'about': [{'name': 'dorsal plus ventral thalamus',
   'schemaKey': 'Anatomy',
   'identifier': 'UBERON:0001897'}],
 'access': [{'status': 'dandi:OpenAccess',
   'schemaKey': 'AccessRequirements',
   'contactPoint': {'schemaKey': 'ContactPoint'}}],
 'license': ['spdx:CC-BY-4.0'],
 'version': '0.220126.1853',
 '@context': 'https://raw.githubusercontent.com/dandi/schema/master/releases/0.6.0/context.json',
 'citation': 'Yu, Jianing; Gutnisky, Diego A; Hires, S Andrew; Svoboda, Karel (2022) Electrophysiology data from thalamic and cortical neurons during somatosensation (Version 0.220126.1853) [Data set]. DANDI archive. https://doi.org/10.48324/dandi.000005/0.220126.1853',
 'keywords': [],
 'protocol': [],
 'schemaKey': 'Dandiset',
 'identifier': 'DA

In [31]:
asset.get_raw_metadata()

{'id': 'dandiasset:0d621aa7-077d-4977-acc9-f23e36d8fb9f',
 'path': 'sub-anm337496/sub-anm337496_ses-20160524_behavior+icephys+ogen.nwb',
 'access': [{'status': 'dandi:OpenAccess', 'schemaKey': 'AccessRequirements'}],
 'digest': {'dandi:sha2-256': 'ba86d2fb8333ea8dabeb4c13c1524f6d37f89b296abd2481a62b76491aca8000',
  'dandi:dandi-etag': '12fd426232fe7fc6001b9163e51b3707-17'},
 '@context': 'https://raw.githubusercontent.com/dandi/schema/master/releases/0.4.4/context.json',
 'approach': [{'name': 'electrophysiological approach',
   'schemaKey': 'ApproachType'}],
 'keywords': ['barrel cortex',
  'thalamus',
  'whiskers',
  'extracellular electrophysiology',
  'intracellular electrophysiology',
  'optogenetic perturbations'],
 'schemaKey': 'Asset',
 'contentUrl': ['https://api.dandiarchive.org/api/assets/0d621aa7-077d-4977-acc9-f23e36d8fb9f/download/',
  'https://dandiarchive.s3.amazonaws.com/blobs/406/fec/406feca8-9859-49c0-85dd-530896438179'],
 'identifier': '0d621aa7-077d-4977-acc9-f23e36