# Purpose:
- session.json format has changed briefly. 
    - This prevents ophys data processing.
- Check how many session are affected.

# Result:
- among 11 mice, seems like just one session was affected
    - 767018_2025-01-31

In [1]:
import csv
import logging
import os
import time
import re
from dataclasses import dataclass
from typing import Union
import numpy as np
import pandas as pd
from pathlib import Path
import json

from aind_codeocean_pipeline_monitor.models import (CaptureSettings,
                                                    PipelineMonitorSettings)
from aind_data_access_api.document_db import MetadataDbClient
from codeocean import CodeOcean
from codeocean.computation import (ComputationState, DataAssetsRunParam,
                                   RunParams)
from dataclasses_json import dataclass_json

from lamf_analysis.code_ocean import docdb_utils

%load_ext autoreload
%autoreload 2

logging.basicConfig(
    filename="batch.log",
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)
# Set environment variables
API_GATEWAY_HOST = "api.allenneuraldynamics.org"
DATABASE = "metadata_index"
COLLECTION = "data_assets"
docdb_api_client = MetadataDbClient(
    host=API_GATEWAY_HOST,
    database=DATABASE,
    collection=COLLECTION,
)

# domain = os.getenv("CODEOCEAN_DOMAIN")
domain = "https://codeocean.allenneuraldynamics.org/"
token = os.getenv("API_SECRET")
co_client = CodeOcean(domain=domain, token=token)


# monitor_pipeline_capsule_id = os.getenv("CO_MONITOR_PIPELINE")
monitor_pipeline_capsule_id = os.getenv("CO_CAPSULE_ID")

DATE_FORMAT = '[0-9]{4}-[0-9]{2}-[0-9]{2}'
TIME_FORMAT = '[0-9]{2}-[0-9]{2}-[0-9]{2}'
SUBJECT_ID_FORMAT = '[0-9]{6}'

COMMIT_IDS_TO_KEEP = ['e61e887fbe5491035b35f8fd223101ad276622f1']

def get_asset_ids(docdb_api_client, asset_name, max_retry=100) -> str:
    """Get the asset ID from the data access api
    Parameters
    ----------
    docdb_api_client : MetadataDbClient
        The data access api client
    asset_name : str
        The asset name
    max_retry : int
        Maximum number of retries for querying the data access api
    Returns
    -------
    list
        List of the asset IDs
        Rarely, there are multiple assets with the same name
    """
    query = {"name": asset_name}
    projection = {"external_links": 1}
    retry_count = 0
    while retry_count < max_retry:
        try:
            response = docdb_api_client.retrieve_docdb_records(
                filter_query=query, projection=projection
            )
            break
        except Exception as e:
            logging.warning(f"Retry {retry_count+1}/{max_retry} after error: {e}")
            time.sleep(1)
            retry_count += 1
    external_links_list = []
    for res in response:        
        external_links = res.get("external_links", None)
        if type(external_links) is str:
            external_links = json.loads(external_links)
            external_links = external_links.get("Code Ocean", None)
        if type(external_links) is list and len(external_links) > 1:
            external_links = external_links[0]
            external_links = external_links.get("Code Ocean", None)
        if type(external_links) is dict:
            try:
                external_links = external_links.get("Code Ocean", None)[0]
            except IndexError:
                external_links = "None"
        if type(external_links) is list:
            try:
                external_links = external_links[0]
            except IndexError:
                external_links = "None"
        external_links_list.append(external_links)
    return external_links_list

In [25]:

mouse_ids = [755252, 767018, 767022, 775682, 779891, 782149, 783551, 785054, 788406, 790322, 797371]
all_tuples = []
for mouse_id in mouse_ids:

    query = {'subject.subject_id': str(mouse_id),
            'data_description.data_level': 'raw',
            }

    projection = {"external_links": 1,
                "name": 1,
                "session.data_streams": 1,
                "subject.subject_id": 1,
                "data_destription.modality": 1}
    max_retry = 5
    retry_count = 0
    while retry_count < max_retry:
        try:
            response = docdb_api_client.retrieve_docdb_records(
                filter_query=query, projection=projection
            )
            break
        except Exception as e:
            logging.warning(f"Retry {retry_count+1}/{max_retry} after error: {e}")
            time.sleep(1)
            retry_count += 1

    mouse_temp = [('_'.join(r['name'].split('_')[1:3]), type(r['session']['data_streams'][0]['ophys_fovs'][0]['targeted_structure'])) for r in response if r['name'].startswith('multiplane-ophys')]
    all_tuples.extend(mouse_temp)

In [26]:
all_tuples

[('755252_2024-12-05', str),
 ('755252_2024-11-15', str),
 ('755252_2024-12-04', str),
 ('755252_2024-11-21', str),
 ('755252_2024-11-12', str),
 ('755252_2024-12-19', str),
 ('755252_2025-01-14', str),
 ('755252_2025-01-13', str),
 ('755252_2024-12-11', str),
 ('755252_2024-12-20', str),
 ('755252_2024-12-12', str),
 ('755252_2024-11-19', str),
 ('755252_2024-12-13', str),
 ('755252_2024-11-13', str),
 ('755252_2024-12-03', str),
 ('755252_2024-12-09', str),
 ('755252_2024-12-10', str),
 ('755252_2024-11-14', str),
 ('755252_2024-11-22', str),
 ('755252_2024-11-18', str),
 ('755252_2024-12-06', str),
 ('755252_2025-01-08', str),
 ('755252_2025-01-07', str),
 ('755252_2025-01-03', str),
 ('755252_2024-12-17', str),
 ('755252_2024-12-18', str),
 ('755252_2024-12-16', str),
 ('767018_2025-02-18', str),
 ('767018_2025-02-17', str),
 ('767018_2025-01-28', str),
 ('767018_2025-02-19', str),
 ('767018_2025-01-24', str),
 ('767018_2025-02-10', str),
 ('767018_2025-01-31', dict),
 ('767018_202

In [28]:
[t[0] for t in all_tuples if t[1] == dict]

['767018_2025-01-31']