In [1]:
from aind_data_access_api.document_db import MetadataDbClient

API_GATEWAY_HOST = "api.allenneuraldynamics.org"
DATABASE = 'metadata_index'
COLLECTION = 'data_assets'

docdb_api_client = MetadataDbClient(
   host=API_GATEWAY_HOST,
   database=DATABASE,
   collection=COLLECTION,
)
print(docdb_api_client._base_url)

https://api.allenneuraldynamics.org/v1/metadata_index/data_assets


In [2]:
import pandas as pd
import numpy as np

In [3]:
def flatten_list(matrix):
    return sum(matrix, [])

In [4]:

aggregate = [
    {
        '$match': {
            'data_description.project_name': 'Thalamus in the middle', 
            'procedures.subject_procedures.procedures.procedure_type': 'Nanoject injection',
            'data_description.data_level': 'raw'
        }
    }, {
        '$project': {
            'name': '$name', 
            'data_level': '$data_description.data_level', 
            'subject_id': '$data_description.subject_id', 
            'genotype': '$subject.genotype', 
            'date_of_birth': '$subject.date_of_birth', 
            'sex': '$subject.sex', 
            'procedures': '$procedures.subject_procedures.procedures'
        }
    }
]

records = docdb_api_client.aggregate_docdb_records(
    pipeline = aggregate,
)

In [9]:
df = pd.DataFrame(columns=('name','subject_id','genotype','sex','virus','titer','ap','ml','dv','volume'))
for record in records:
    name = record['name']
    subject_id = record['subject_id']
    genotype = record['genotype']
    sex = record['sex']
    for proc in flatten_list(record['procedures']):
        if proc['procedure_type']=="Nanoject injection":
            for i in range(len(proc['injection_materials'])):
                virus = proc['injection_materials'][i]['name']
                ap = float(proc['injection_coordinate_ap'])
                ml = float(proc['injection_coordinate_ml'])
                dv = float(proc['injection_coordinate_depth'][0])
                volume = float(proc['injection_volume'][0])
                if proc['injection_materials'][i]['material_type']=='Virus':
                    titer = proc['injection_materials'][i]['titer']
                else:
                    titer = np.nan
                df.loc[len(df)] = [name, subject_id, genotype, sex, virus, titer, ap, ml, dv, volume]
                

In [10]:
df

Unnamed: 0,name,subject_id,genotype,sex,virus,titer,ap,ml,dv,volume
0,SmartSPIM_678704_2023-06-20_20-49-52_stitched_...,678704,wt/wt,Male,AAVrg-Syn-H2B-Turquoise,48000000000000,2.80,1.80,1.00,50.0
1,SmartSPIM_678704_2023-06-20_20-49-52_stitched_...,678704,wt/wt,Male,AAVrg-Syn-H2B-tdTomato,51000000000000,2.40,1.80,0.80,50.0
2,SmartSPIM_678703_2023-06-20_17-18-27_stitched_...,678703,wt/wt,Male,AAVrg-Syn-H2B-Turquoise,48000000000000,1.60,0.20,2.00,50.0
3,SmartSPIM_678703_2023-06-20_17-18-27_stitched_...,678703,wt/wt,Male,AAVrg-Syn-H2B-tdTomato,51000000000000,2.00,0.60,0.60,50.0
4,SmartSPIM_678706_2023-06-28_16-43-04_stitched_...,678706,wt/wt,Female,AAVrg-Syn-H2B-Turquoise,48000000000000,2.80,1.00,1.20,50.0
...,...,...,...,...,...,...,...,...,...,...
625,SmartSPIM_665261_2023-03-31_15-34-58_stitched_...,665261,wt/wt,Male,EnvA CVS-N2C-histone-GFP,10700000000,-1.06,1.15,3.35,200.0
626,SmartSPIM_652506_2023-01-09_10-18-12,652506,wt/wt,Male,SL1-hSyn-Cre,28700000000002,2.20,0.35,2.10,200.0
627,SmartSPIM_652506_2023-01-09_10-18-12,652506,wt/wt,Male,AAV1-CAG-H2B-mTurquoise2-WPRE,5040000000002,2.20,0.35,2.10,200.0
628,SmartSPIM_652506_2023-01-09_10-18-12,652506,wt/wt,Male,AAV-Syn-DIO-TVA66T-dTomato-CVS N2cG,,-0.60,2.90,3.60,200.0


In [6]:
df.genotype.unique()

array(['wt/wt', 'Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt'], dtype=object)

In [7]:
df.virus.unique()

array(['AAVrg-Syn-H2B-Turquoise', 'AAVrg-Syn-H2B-tdTomato',
       'AAVrg-Syn-iCre', 'AAVrg-Syn-Flpo', 'AAVrg-Syn-H2B-EGFP',
       'SL1-hSyn-Cre', 'AAV1-CAG-H2B-mTurquoise2-WPRE',
       'AAV-Syn-DIO-TVA66T-dTomato-CVS N2cG', 'EnvA CVS-N2C-histone-GFP',
       'CVS N2cdG-H2B-GFP', 'CVS N2cdG-H2B-tdTomato', 'CTB-647',
       'FluoSphere (Dark Red)'], dtype=object)

In [8]:
df[df.genotype.str.contains('Ai224')]

Unnamed: 0,name,subject_id,genotype,sex,virus,titer,ap,ml,dv,volume
6,SmartSPIM_679518_2023-08-25_12-08-11_stitched_...,679518,Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt,Male,AAVrg-Syn-iCre,75000000000000,1.6,0.2,1.0,50.0
7,SmartSPIM_679518_2023-08-25_12-08-11_stitched_...,679518,Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt,Male,AAVrg-Syn-Flpo,102000000000000,1.6,0.6,0.8,50.0
8,SmartSPIM_679519_2023-08-15_11-43-09_stitched_...,679519,Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt,Male,AAVrg-Syn-iCre,75000000000000,2.8,0.2,0.6,100.0
9,SmartSPIM_679519_2023-08-15_11-43-09_stitched_...,679519,Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt,Male,AAVrg-Syn-Flpo,102000000000000,2.8,1.0,0.6,100.0
15,SmartSPIM_679521_2023-08-15_17-08-13_stitched_...,679521,Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt,Female,AAVrg-Syn-iCre,75000000000000,2.8,0.2,0.6,200.0
16,SmartSPIM_679521_2023-08-15_17-08-13_stitched_...,679521,Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt,Female,AAVrg-Syn-Flpo,102000000000000,2.8,1.0,0.6,200.0
144,SmartSPIM_692907_2023-10-11_13-47-37_stitched_...,692907,Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt,Male,AAVrg-Syn-iCre,75000000000000,2.4,1.8,0.8,50.0
145,SmartSPIM_692907_2023-10-11_13-47-37_stitched_...,692907,Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt,Male,AAVrg-Syn-Flpo,102000000000000,2.4,0.2,1.0,50.0
148,SmartSPIM_692907_2023-10-11_13-47-37,692907,Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt,Male,AAVrg-Syn-iCre,75000000000000,2.4,1.8,0.8,50.0
149,SmartSPIM_692907_2023-10-11_13-47-37,692907,Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt,Male,AAVrg-Syn-Flpo,102000000000000,2.4,0.2,1.0,50.0
