In [1]:
import pandas as pd 
from datetime import datetime, date

In [2]:
from aind_data_access_api.document_db import MetadataDbClient

API_GATEWAY_HOST = "api.allenneuraldynamics.org"
DATABASE = 'metadata_index'
COLLECTION = 'data_assets'

docdb_api_client = MetadataDbClient(
   host=API_GATEWAY_HOST,
   database=DATABASE,
   collection=COLLECTION,
)
print(docdb_api_client._base_url)

https://api.allenneuraldynamics.org/v1/metadata_index/data_assets


In [3]:
aggregate = [
  {
    "$match": {
      "session.session_type": "BCI single neuron stim",
      "data_description.data_level": "derived",
      "processing.processing_pipeline.data_processes.start_date_time":{"$gte":"2025-08-03"}
    }
  },
  {
    "$project": {
      "name": 1,
      "subject_id": "$data_description.subject_id",
      "genotype": "$subject.genotype",
      "virus": "$procedures.subject_procedures.procedures.injection_materials.name",
      "date_of_birth": "$subject.date_of_birth",
      "sex": "$subject.sex",
      "session_type": "$session.session_type",
      "session_time": "$session.session_start_time",
      "stimulus_epochs": "$session.stimulus_epochs.stimulus_name",
      "project_name": "$data_description.project_name",
      "modality": "$data_description.modality.name",
      "targeted_structure": "$session.data_streams.stack_parameters.targeted_structure",
      "session_number": {
        "$filter": {
          "input": "$session.stimulus_epochs",
          "as": "epoch",
          "cond": { "$eq": ["$$epoch.stimulus_name", "single neuron BCI conditioning"] }
        }
      },
      "ophys_fov": {
            '$map': {
                'input': '$session.data_streams',
                'as': 'stream',
                'in': {
                    '$map': {
                            'input': '$$stream.ophys_fovs',
                            'as': 'fov',
                            'in': '$$fov.notes'
                    }
                }
            }
        },
     "magnification": "$session.data_streams.stack_parameters.magnification",
    }
  },
  {
    "$project": {
      "name": 1,
      "subject_id": 1,
      "genotype": 1,
      "virus": 1,
      "date_of_birth": 1,
      "sex": 1,
      "session_type": 1,
      "session_time": 1,
      "stimulus_epochs": 1,
      "project_name": 1,
      "modality": 1,
      "targeted_structure": 1,
      "session_number": { "$arrayElemAt": ["$session_number.session_number", 0] },
      "ophys_fov": 1,
      "magnification": 1
    }
  },
  {'$unwind': {'path': '$ophys_fov', 'preserveNullAndEmptyArrays': False}},
  {'$unwind': {'path': '$ophys_fov', 'preserveNullAndEmptyArrays': False}},
  {'$unwind': {'path': '$virus', 'preserveNullAndEmptyArrays': False}},
  {'$unwind': {'path': '$virus', 'preserveNullAndEmptyArrays': False}},
  {'$unwind': {'path': '$virus', 'preserveNullAndEmptyArrays': False}},
  {'$unwind': {'path': '$modality', 'preserveNullAndEmptyArrays': False}},
  {'$unwind': {'path': '$targeted_structure', 'preserveNullAndEmptyArrays': False}},
  {'$unwind': {'path': '$magnification', 'preserveNullAndEmptyArrays': False}}

]

records = docdb_api_client.aggregate_docdb_records(
    pipeline = aggregate,
)

In [53]:
metadata = pd.DataFrame(records)
metadata = metadata.drop_duplicates(subset="name")

metadata['session_date'] = metadata.apply(lambda x: datetime.fromisoformat(x['session_time']).date(), axis=1)
metadata['session_time'] = metadata.apply(lambda x: datetime.fromisoformat(x['session_time']).time(), axis=1)
metadata['date_of_birth'] = metadata.apply(lambda x: datetime.strptime(x['date_of_birth'], '%Y-%m-%d').date(), axis=1)
metadata['age'] = metadata.apply(lambda x: (x['session_date'] - x['date_of_birth']).days, axis=1)

order = ['project_name','session_type','_id','name','subject_id','genotype','virus','date_of_birth',\
         'sex','modality','session_date','age','session_time','targeted_structure','ophys_fov','session_number']
metadata = metadata[order]
metadata

# 08/24/25 fix - Remove problem metadata rows
problem_assets = [
    "single-plane-ophys_731015_2025-01-28_17-40-57_processed_2025-08-04_04-38-08",
    "single-plane-ophys_731015_2025-01-28_17-40-57_processed_2025-08-08_15-30-22", 
    "single-plane-ophys_772414_2025-02-04_13-21-29_processed_2025-08-12_06-14-42",
    "single-plane-ophys_740369_2025-01-30_18-44-54_processed_2025-08-04_13-31-14"
]

metadata = metadata[~metadata["name"].isin(problem_assets)]
# Fix problem with 2 values in ophys_fov
metadata.loc[(metadata.subject_id == '731015') & (metadata.session_number == 18.0),'ophys_fov'] = "FOV_04"

In [54]:
metadata.sort_values(by=['subject_id', 'session_number']).to_csv('/data/metadata/bci_metadata.csv', index=False)