In [1]:
import csv
import logging
import os
import time
import re
from dataclasses import dataclass
from typing import Union
import numpy as np
import pandas as pd
from pathlib import Path
import json

from aind_codeocean_pipeline_monitor.models import (CaptureSettings,
                                                    PipelineMonitorSettings)
from aind_data_access_api.document_db import MetadataDbClient
from codeocean import CodeOcean
from codeocean.computation import (ComputationState, DataAssetsRunParam,
                                   RunParams)
from dataclasses_json import dataclass_json

from lamf_analysis.code_ocean import docdb_utils
from lamf_analysis.code_ocean import capsule_data_utils as cdu

%load_ext autoreload
%autoreload 2

logging.basicConfig(
    filename="batch.log",
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)
# Set environment variables
API_GATEWAY_HOST = "api.allenneuraldynamics.org"
DATABASE = "metadata_index"
COLLECTION = "data_assets"
docdb_api_client = MetadataDbClient(
    host=API_GATEWAY_HOST,
    database=DATABASE,
    collection=COLLECTION,
)

# domain = os.getenv("CODEOCEAN_DOMAIN")
domain = "https://codeocean.allenneuraldynamics.org/"
token = os.getenv("API_SECRET")
co_client = CodeOcean(domain=domain, token=token)


# monitor_pipeline_capsule_id = os.getenv("CO_MONITOR_PIPELINE")
monitor_pipeline_capsule_id = os.getenv("CO_CAPSULE_ID")

DATE_FORMAT = '[0-9]{4}-[0-9]{2}-[0-9]{2}'
TIME_FORMAT = '[0-9]{2}-[0-9]{2}-[0-9]{2}'
SUBJECT_ID_FORMAT = '[0-9]{6}'

COMMIT_IDS_TO_KEEP = ['e61e887fbe5491035b35f8fd223101ad276622f1']

def get_asset_ids(docdb_api_client, asset_name, max_retry=100) -> str:
    """Get the asset ID from the data access api
    Parameters
    ----------
    docdb_api_client : MetadataDbClient
        The data access api client
    asset_name : str
        The asset name
    max_retry : int
        Maximum number of retries for querying the data access api
    Returns
    -------
    list
        List of the asset IDs
        Rarely, there are multiple assets with the same name
    """
    query = {"name": asset_name}
    projection = {"external_links": 1}
    retry_count = 0
    while retry_count < max_retry:
        try:
            response = docdb_api_client.retrieve_docdb_records(
                filter_query=query, projection=projection
            )
            break
        except Exception as e:
            logging.warning(f"Retry {retry_count+1}/{max_retry} after error: {e}")
            time.sleep(1)
            retry_count += 1
    external_links_list = []
    for res in response:        
        external_links = res.get("external_links", None)
        if type(external_links) is str:
            external_links = json.loads(external_links)
            external_links = external_links.get("Code Ocean", None)
        if type(external_links) is list and len(external_links) > 1:
            external_links = external_links[0]
            external_links = external_links.get("Code Ocean", None)
        if type(external_links) is dict:
            try:
                external_links = external_links.get("Code Ocean", None)[0]
            except IndexError:
                external_links = "None"
        if type(external_links) is list:
            try:
                external_links = external_links[0]
            except IndexError:
                external_links = "None"
        external_links_list.append(external_links)
    return external_links_list

In [13]:
mouse_id = 755252
target_long_window = 60

session_infos = docdb_utils.get_session_infos_from_docdb(mouse_id, docdb_api_client=docdb_api_client)
processed_infos = docdb_utils.get_processed_data_info(mouse_id, docdb_api_client=docdb_api_client)
processed_infos = docdb_utils.filter_data_asset_info_by_date(processed_infos)
processed_infos = docdb_utils.filter_data_asset_info_by_long_window(processed_infos, target_long_window)
dlc_infos = docdb_utils.get_dlc_eye_data_info(mouse_id, docdb_api_client=docdb_api_client)
session_infos = (
    session_infos
    .merge(processed_infos, left_on='raw_asset_name', right_on='raw_name', how='left')
    .merge(dlc_infos, left_on='raw_asset_name', right_on='raw_name', how='left', suffixes=('_proc', '_dlc'))
)

# Drop the join key columns originating from 'raw_name'
session_infos = session_infos.drop(
    columns=[c for c in session_infos.columns if c.startswith('raw_name_')]
)

In [11]:
session_infos

Unnamed: 0,acquisition_date,session_type,reward_consumed,rig_id,session_name,raw_asset_name,session_type_exposures,raw_name_proc,long_window,processed_asset_id,processed_date,processed_name,raw_name_dlc,dlc_asset_id,dlc_date,dlc_name
0,2024-11-12,TRAINING_0_gratings_autorewards_15min,,MESO.1,755252_2024-11-12,multiplane-ophys_755252_2024-11-12_09-43-51,1,multiplane-ophys_755252_2024-11-12_09-43-51,60.0,6747f968-d6dc-4db6-b6a6-941aac40425e,2025-09-05,multiplane-ophys_755252_2024-11-12_09-43-51_pr...,multiplane-ophys_755252_2024-11-12_09-43-51,1b773aac-a8c3-4748-99fd-e4ad2d136155,2025-08-29,multiplane-ophys_755252_2024-11-12_09-43-51_dl...
1,2024-11-13,TRAINING_1_gratings,,MESO.1,755252_2024-11-13,multiplane-ophys_755252_2024-11-13_09-16-29,1,multiplane-ophys_755252_2024-11-13_09-16-29,60.0,41d3bfb3-f969-4442-936d-58aaf02720d6,2025-09-05,multiplane-ophys_755252_2024-11-13_09-16-29_pr...,multiplane-ophys_755252_2024-11-13_09-16-29,13f82ed0-dc79-44b6-bb80-bdac7ebd6a28,2025-08-29,multiplane-ophys_755252_2024-11-13_09-16-29_dl...
2,2024-11-14,TRAINING_1_gratings,,MESO.1,755252_2024-11-14,multiplane-ophys_755252_2024-11-14_11-28-00,2,multiplane-ophys_755252_2024-11-14_11-28-00,60.0,15f5d46e-80d5-422f-8608-461484b4c86b,2025-09-05,multiplane-ophys_755252_2024-11-14_11-28-00_pr...,multiplane-ophys_755252_2024-11-14_11-28-00,0d39fe75-6cb5-4391-acb8-4964f0b6b106,2025-08-29,multiplane-ophys_755252_2024-11-14_11-28-00_dl...
3,2024-11-15,TRAINING_1_gratings,,MESO.1,755252_2024-11-15,multiplane-ophys_755252_2024-11-15_10-49-40,3,multiplane-ophys_755252_2024-11-15_10-49-40,60.0,f0c72bdf-844e-4d27-bb71-c6d9e879788c,2025-09-05,multiplane-ophys_755252_2024-11-15_10-49-40_pr...,multiplane-ophys_755252_2024-11-15_10-49-40,6f4d7d64-00f0-43cc-9797-0e43f5936588,2025-08-29,multiplane-ophys_755252_2024-11-15_10-49-40_dl...
4,2024-11-18,TRAINING_1_gratings,,MESO.1,755252_2024-11-18,multiplane-ophys_755252_2024-11-18_08-01-08,4,multiplane-ophys_755252_2024-11-18_08-01-08,60.0,bccb963f-a624-4ce9-8f67-7faafeb0846e,2025-09-05,multiplane-ophys_755252_2024-11-18_08-01-08_pr...,multiplane-ophys_755252_2024-11-18_08-01-08,9ca0b44a-c8b3-4cbc-95f9-e5a4cef2115b,2025-08-29,multiplane-ophys_755252_2024-11-18_08-01-08_dl...
5,2024-11-19,TRAINING_1_gratings,,MESO.1,755252_2024-11-19,multiplane-ophys_755252_2024-11-19_09-05-21,5,multiplane-ophys_755252_2024-11-19_09-05-21,60.0,5a6b9423-0dfc-4526-8480-4cea8a5745f3,2025-09-05,multiplane-ophys_755252_2024-11-19_09-05-21_pr...,multiplane-ophys_755252_2024-11-19_09-05-21,60ce72c2-020f-48ae-913d-98128870ea2e,2025-08-29,multiplane-ophys_755252_2024-11-19_09-05-21_dl...
6,2024-11-21,TRAINING_1_gratings,,MESO.1,755252_2024-11-21,multiplane-ophys_755252_2024-11-21_11-18-50,6,multiplane-ophys_755252_2024-11-21_11-18-50,60.0,f2b9b2c1-76ad-4ed7-9367-5e5aaf09bc93,2025-09-05,multiplane-ophys_755252_2024-11-21_11-18-50_pr...,multiplane-ophys_755252_2024-11-21_11-18-50,aa136aef-dd18-4555-a528-18a56705f522,2025-08-29,multiplane-ophys_755252_2024-11-21_11-18-50_dl...
7,2024-11-22,TRAINING_1_gratings,,MESO.1,755252_2024-11-22,multiplane-ophys_755252_2024-11-22_12-00-43,7,multiplane-ophys_755252_2024-11-22_12-00-43,60.0,6d4741b9-330a-473f-9efb-9d52736633fc,2025-09-05,multiplane-ophys_755252_2024-11-22_12-00-43_pr...,multiplane-ophys_755252_2024-11-22_12-00-43,a2aa468c-4d66-457b-af3c-af1af201a31f,2025-08-29,multiplane-ophys_755252_2024-11-22_12-00-43_dl...
8,2024-12-03,TRAINING_2_gratings_flashed,,MESO.1,755252_2024-12-03,multiplane-ophys_755252_2024-12-03_09-14-20,1,multiplane-ophys_755252_2024-12-03_09-14-20,60.0,d8f75106-9b98-4174-b69a-c982f309c1b1,2025-09-05,multiplane-ophys_755252_2024-12-03_09-14-20_pr...,multiplane-ophys_755252_2024-12-03_09-14-20,ad5867d8-ce00-42d9-9777-58fb4e3646f1,2025-08-29,multiplane-ophys_755252_2024-12-03_09-14-20_dl...
9,2024-12-04,TRAINING_3_images_A_10uL_reward,,MESO.1,755252_2024-12-04,multiplane-ophys_755252_2024-12-04_12-06-54,1,multiplane-ophys_755252_2024-12-04_12-06-54,60.0,842af35b-feda-4c71-ab30-b7f87dad63c5,2025-09-05,multiplane-ophys_755252_2024-12-04_12-06-54_pr...,multiplane-ophys_755252_2024-12-04_12-06-54,4260e571-3248-4768-ab41-f097d5028465,2025-08-29,multiplane-ophys_755252_2024-12-04_12-06-54_dl...


In [18]:
len(processing_json['processing_pipeline']['data_processes'])

48

In [15]:
processed_dir = Path('/root/capsule/data/multiplane-ophys_755252_2024-11-12_09-43-51_processed_2025-09-05_06-06-29')
processed_processing_json_fn = processed_dir / 'processing.json'
with open(processed_processing_json_fn) as f:
    processing_json = json.load(f)
dff_params = [dp for dp in processing_json['processing_pipeline']['data_processes'] if dp['name'] == "dF/F estimation"][0]['parameters']
assert "long_window" in dff_params.keys(), '"long_window" is not in dF/F estimation parameter - use another processed data asset'
dff_long_window = dff_params['long_window']

In [16]:
dff_long_window

60