In [1]:
from aind_data_access_api.document_db import MetadataDbClient

API_GATEWAY_HOST = "api.allenneuraldynamics.org"
DATABASE = 'metadata_index'
COLLECTION = 'data_assets'

docdb_api_client = MetadataDbClient(
   host=API_GATEWAY_HOST,
   database=DATABASE,
   collection=COLLECTION,
)
print(docdb_api_client._base_url)

https://api.allenneuraldynamics.org/v1/metadata_index/data_assets


In [2]:
import pandas as pd
import numpy as np
from datetime import datetime

In [3]:
def flatten_list(matrix):
    return sum(matrix, [])

In [4]:
aggregate = [
    {
        '$match': {
            'data_description.project_name': 'Thalamus in the middle', 
            'procedures.subject_procedures.procedures.procedure_type': 'Nanoject injection', 
            'quality_control.evaluations.name': 'Overall tissue quality'
        }
    }, {
        '$project': {
            'qc_evaluations': {
                '$filter': {
                    'input': '$quality_control.evaluations', 
                    'as': 'evaluation', 
                    'cond': {
                        '$and': [
                            {
                                '$eq': [
                                    '$$evaluation.name', 'Overall tissue quality'
                                ]
                            }, {
                                '$eq': [
                                    '$$evaluation.metrics.status_history.status', 'Pass'
                                ]
                            }
                        ]
                    }
                }
            }, 
            'evaluations': '$quality_control.evaluations', 
            'name': '$name', 
            'data_level': '$data_description.data_level', 
            'subject_id': '$data_description.subject_id', 
            'genotype': '$subject.genotype', 
            'sex': '$subject.sex', 
            'date_of_birth': '$subject.date_of_birth', 
            'date_of_surgery': '$procedures.subject_procedures.start_date', 
            'procedures': '$procedures.subject_procedures'
        }
    }
]

records = docdb_api_client.aggregate_docdb_records(
    pipeline = aggregate,
)

In [6]:
virus_dict = {}
virus_dict['AAVrg-Syn-H2B-Turquoise']='445'
virus_dict['AAVrg-Syn-H2B-EGFP']='488'
virus_dict['AAVrg-Syn-H2B-tdTomato']='561'
virus_dict['AAVrg-Syn-iCre']='488'
virus_dict['AAVrg-Syn-Flpo']='561'
virus_dict['CVS N2cdG-H2B-GFP']='488'
virus_dict['CVS N2cdG-H2B-tdTomato']='561'

In [7]:
df = pd.DataFrame(columns=('name','subject_id','genotype','sex','virus','titer','ap','ml','dv','volume','age_days','days_to_perfusion','qc_tissue_quality','qc_channel', 'ng_link','channel'))
for record in records:
    name = record['name']
    subject_id = record['subject_id']
    genotype = record['genotype']
    sex = record['sex']
    dob = datetime.strptime(record['date_of_birth'], '%Y-%m-%d').date()
    #qc
    for eval in record['evaluations']:
        if eval['name']=='Overall tissue quality':
            qc_tissue = eval['metrics'][0]['status_history'][-1]['status']
            ng_link = eval['metrics'][0]['reference']
        elif eval['name']=='Cell detection in channel: 445':
            qc_445 = eval['metrics'][0]['status_history'][-1]['status']
        elif eval['name']=='Cell detection in channel: 488':
            qc_488 = eval['metrics'][0]['status_history'][-1]['status'] 
        elif eval['name']=='Cell detection in channel: 561':
            qc_561 = eval['metrics'][0]['status_history'][-1]['status'] 
    #injections
    for proc in record['procedures']:
        if proc['procedures'][0]['procedure_type']=='Perfusion':
            perfusion_date = datetime.strptime(proc['start_date'], '%Y-%m-%d').date()
        if proc['procedures'][0]['procedure_type']=="Nanoject injection":
            surgery_date = datetime.strptime(proc['start_date'], '%Y-%m-%d').date()
            for inj in proc['procedures']:
                for i in range(len(inj['injection_materials'])):
                    virus = inj['injection_materials'][i]['name']
                    channel = virus_dict[virus]
                    if channel=='445':
                        qc_channel = qc_445
                    elif channel=='488':
                        qc_channel = qc_488
                    elif channel=='561':
                        qc_channel = qc_561
                    ap = float(inj['injection_coordinate_ap'])
                    ml = float(inj['injection_coordinate_ml'])
                    dv = float(inj['injection_coordinate_depth'][0])
                    volume = float(inj['injection_volume'][0])
                    if inj['injection_materials'][i]['material_type']=='Virus':
                        titer = inj['injection_materials'][i]['titer']
                    else:
                        titer = np.nan
                    age = (surgery_date - dob).days
                    days_to_perfusion = (perfusion_date - surgery_date).days
                    df.loc[len(df)] = [name, subject_id, genotype, sex, virus, titer, ap, ml, dv, volume, age, days_to_perfusion, qc_tissue, qc_channel, ng_link, channel]


In [10]:
df[df.qc_tissue_quality=='Pass']

Unnamed: 0,name,subject_id,genotype,sex,virus,titer,ap,ml,dv,volume,age_days,days_to_perfusion,qc_tissue_quality,qc_channel,ng_link,channel
6,SmartSPIM_679518_2023-08-25_12-08-11_stitched_...,679518,Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt,Male,AAVrg-Syn-iCre,75000000000000,1.6,0.2,1.0,50.0,60,28,Pass,Fail,https://aind-neuroglancer-sauujisjxq-uw.a.run....,488
7,SmartSPIM_679518_2023-08-25_12-08-11_stitched_...,679518,Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt,Male,AAVrg-Syn-Flpo,102000000000000,1.6,0.6,0.8,50.0,60,28,Pass,Fail,https://aind-neuroglancer-sauujisjxq-uw.a.run....,561
8,SmartSPIM_679519_2023-08-15_11-43-09_stitched_...,679519,Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt,Male,AAVrg-Syn-iCre,75000000000000,2.8,0.2,0.6,100.0,53,31,Pass,Fail,https://aind-neuroglancer-sauujisjxq-uw.a.run....,488
9,SmartSPIM_679519_2023-08-15_11-43-09_stitched_...,679519,Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt,Male,AAVrg-Syn-Flpo,102000000000000,2.8,1.0,0.6,100.0,53,31,Pass,Pass,https://aind-neuroglancer-sauujisjxq-uw.a.run....,561
10,SmartSPIM_689237_2023-08-30_19-04-37_stitched_...,689237,wt/wt,Female,AAVrg-Syn-H2B-Turquoise,296000000000000,1.6,0.2,1.0,50.0,52,21,Pass,Pass,https://aind-neuroglancer-sauujisjxq-uw.a.run....,445
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
146,SmartSPIM_687221_2023-08-29_00-00-20_stitched_...,687221,wt/wt,Female,AAVrg-Syn-H2B-tdTomato,51000000000000,2.8,0.2,1.0,50.0,59,28,Pass,Fail,https://aind-neuroglancer-sauujisjxq-uw.a.run....,561
147,SmartSPIM_687221_2023-08-29_00-00-20_stitched_...,687221,wt/wt,Female,AAVrg-Syn-H2B-EGFP,136000000000000,2.4,1.4,1.0,50.0,59,28,Pass,Fail,https://aind-neuroglancer-sauujisjxq-uw.a.run....,488
150,SmartSPIM_691387_2023-09-15_20-47-51_stitched_...,691387,wt/wt,Female,AAVrg-Syn-H2B-Turquoise,296000000000000,2.8,1.0,1.2,50.0,45,28,Pass,Pass,https://aind-neuroglancer-sauujisjxq-uw.a.run....,445
151,SmartSPIM_691387_2023-09-15_20-47-51_stitched_...,691387,wt/wt,Female,AAVrg-Syn-H2B-tdTomato,51000000000000,2.8,1.8,1.0,50.0,45,28,Pass,Pass,https://aind-neuroglancer-sauujisjxq-uw.a.run....,561


In [11]:
df.genotype.unique()

array(['wt/wt', 'Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt'], dtype=object)

In [12]:
df.virus.unique()

array(['AAVrg-Syn-H2B-Turquoise', 'AAVrg-Syn-H2B-tdTomato',
       'AAVrg-Syn-iCre', 'AAVrg-Syn-Flpo', 'AAVrg-Syn-H2B-EGFP',
       'CVS N2cdG-H2B-GFP', 'CVS N2cdG-H2B-tdTomato'], dtype=object)

In [14]:
df[df.genotype.str.contains('Ai224')].head()

Unnamed: 0,name,subject_id,genotype,sex,virus,titer,ap,ml,dv,volume,age_days,days_to_perfusion,qc_tissue_quality,qc_channel,ng_link,channel
6,SmartSPIM_679518_2023-08-25_12-08-11_stitched_...,679518,Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt,Male,AAVrg-Syn-iCre,75000000000000,1.6,0.2,1.0,50.0,60,28,Pass,Fail,https://aind-neuroglancer-sauujisjxq-uw.a.run....,488
7,SmartSPIM_679518_2023-08-25_12-08-11_stitched_...,679518,Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt,Male,AAVrg-Syn-Flpo,102000000000000,1.6,0.6,0.8,50.0,60,28,Pass,Fail,https://aind-neuroglancer-sauujisjxq-uw.a.run....,561
8,SmartSPIM_679519_2023-08-15_11-43-09_stitched_...,679519,Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt,Male,AAVrg-Syn-iCre,75000000000000,2.8,0.2,0.6,100.0,53,31,Pass,Fail,https://aind-neuroglancer-sauujisjxq-uw.a.run....,488
9,SmartSPIM_679519_2023-08-15_11-43-09_stitched_...,679519,Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt,Male,AAVrg-Syn-Flpo,102000000000000,2.8,1.0,0.6,100.0,53,31,Pass,Pass,https://aind-neuroglancer-sauujisjxq-uw.a.run....,561
15,SmartSPIM_679521_2023-08-15_17-08-13_stitched_...,679521,Ai224(TICL-NLS-EGFP-ICF-NLS-dT)-hyg/wt,Female,AAVrg-Syn-iCre,75000000000000,2.8,0.2,0.6,200.0,53,31,Pass,Fail,https://aind-neuroglancer-sauujisjxq-uw.a.run....,488


In [15]:
# exporting metadata df as a csv 

df.to_csv('/data/metadata.csv')

In [None]:
# Old logic

In [6]:
# df = pd.DataFrame(columns=('name','subject_id','genotype','sex','virus','titer','ap','ml','dv','volume','qc_tissue_quality','qc_channel', 'ng_link'))
# for record in records:
#     name = record['name']
#     subject_id = record['subject_id']
#     genotype = record['genotype']
#     sex = record['sex']
#     #qc
#     for eval in record['evaluations']:
#         if eval['name']=='Overall tissue quality':
#             qc_tissue = eval['metrics'][0]['status_history'][-1]['status']
#             ng_link = eval['metrics'][0]['reference']
#         elif eval['name']=='Cell detection in channel: 445':
#             qc_445 = eval['metrics'][0]['status_history'][-1]['status']
#         elif eval['name']=='Cell detection in channel: 488':
#             qc_488 = eval['metrics'][0]['status_history'][-1]['status'] 
#         elif eval['name']=='Cell detection in channel: 561':
#             qc_561 = eval['metrics'][0]['status_history'][-1]['status'] 
#     #injections
#     for proc in flatten_list(record['procedures']):
#         if proc['procedure_type']=="Nanoject injection":
#             for i in range(len(proc['injection_materials'])):
#                 virus = proc['injection_materials'][i]['name']
#                 ap = float(proc['injection_coordinate_ap'])
#                 ml = float(proc['injection_coordinate_ml'])
#                 dv = float(proc['injection_coordinate_depth'][0])
#                 volume = float(proc['injection_volume'][0])
#                 if proc['injection_materials'][i]['material_type']=='Virus':
#                     titer = proc['injection_materials'][i]['titer']
#                 else:
#                     titer = np.nan
#                 df.loc[len(df)] = [name, subject_id, genotype, sex, virus, titer, ap, ml, dv, volume, qc_tissue, None, ng_link]
                
# df["channel"] = df["virus"].map(virus_dict)  
# for index, row in df.iterrows():
#     if row.channel=='445':
#         df.loc[index, "qc_channel"] = qc_445
#     elif row.channel=='488':
#         df.loc[index, "qc_channel"] = qc_488
#     elif row.channel=='561':
#         df.loc[index, "qc_channel"] = qc_561

In [6]:
# df = pd.DataFrame(columns=('name','subject_id','genotype','sex','virus','titer','ap','ml','dv','volume'))
# for record in records:
#     name = record['name']
#     subject_id = record['subject_id']
#     genotype = record['genotype']
#     sex = record['sex']
#     for proc in flatten_list(record['procedures']):
#         if proc['procedure_type']=="Nanoject injection":
#             for i in range(len(proc['injection_materials'])):
#                 virus = proc['injection_materials'][i]['name']
#                 ap = float(proc['injection_coordinate_ap'])
#                 ml = float(proc['injection_coordinate_ml'])
#                 dv = float(proc['injection_coordinate_depth'][0])
#                 volume = float(proc['injection_volume'][0])
#                 if proc['injection_materials'][i]['material_type']=='Virus':
#                     titer = proc['injection_materials'][i]['titer']
#                 else:
#                     titer = np.nan
#                 df.loc[len(df)] = [name, subject_id, genotype, sex, virus, titer, ap, ml, dv, volume]
# df["channel"] = df["virus"].map(virus_dict)