In [102]:
import os
from datetime import datetime
import zoneinfo
import numpy as np
import pprint

from codeocean import CodeOcean
from codeocean.computation import (ComputationState, DataAssetsRunParam,
                                   RunParams)
from codeocean.data_asset import (DataAsset, DataAssetSearchParams)

In [160]:
domain = "https://codeocean.allenneuraldynamics.org/"
token = os.getenv("API_SECRET")

client = CodeOcean(domain=domain, token=token)


pipeline_id = "4cbb3e55-83df-40ad-83ff-6d65839dc9c0" # Jinho - aind-multiplane-ophys-pipeline (https://codeocean.allenneuraldynamics.org/capsule/4412807/tree)


computations = client.capsules.list_computations(capsule_id=pipeline_id)


In [161]:
completed_computations = [c for c in computations if c.state == ComputationState.Completed]
error_computations = [] # based on json file check
success_computations = [] # based on json file check
for computation in completed_computations:
    try:
        result_paths = client.computations.list_computation_results(
            computation_id=computation.id,
        )
        folder_contents = [item.name for item in result_paths.items]
        json_files_list = ['data_description.json', 'procedures.json', 'processing.json', 'quality_control.json', 'rig.json', 'session.json', 'subject.json']
        if not np.all([jf in folder_contents for jf in json_files_list]):
            error_computations.append(computation)
        else:
            success_computations.append(computation)
    except:
        error_computations.append(computation)


exit0_computations = [c for c in completed_computations if c.exit_code == 0]  # supposed to be successful
exit1_computations = [c for c in completed_computations if c.exit_code == 1]  # indicating any error


In [5]:
# computation IDs indicating success but not having all json files
set([computation.id for computation in exit0_computations]) - set([computation.id for computation in success_computations])

{'07eb0b28-d3b8-40e9-b59a-f29ee0a4746d',
 '60810c42-ac7a-425e-80d7-ab3123279dc8',
 '6d40f1b3-5cfd-4877-8ac1-57cf35a92881',
 '84d00ed8-1303-4447-950f-9b278660b14e',
 '85922ef8-777d-4858-a09d-bfad61249979',
 '90a7c6c6-3f3f-4ea8-bff2-126608b47412',
 'a7fe4d13-2e4e-46f2-97c7-0a1b31498f9a',
 'b7bee4d1-03d7-44c8-9c90-8231b689771b',
 'bfb4cb5a-6ef1-4a7d-8205-71123e06f6ef',
 'f2fbb534-5627-4a9e-8fda-c6af5441ead5',
 'f74759fc-b924-4004-b595-aeba72241192'}

In [6]:
# computation IDs having all json files but not exit code 0
set([computation.id for computation in success_computations]) - set([computation.id for computation in exit0_computations])

{'de041fc9-bfeb-4eac-9489-a8122ce8275c'}

# This one was a test run by Arielle
- Not converted to data asset
- 'de041fc9-bfeb-4eac-9489-a8122ce8275c'

In [57]:
result_paths = client.computations.list_computation_results(
            # computation_id='f74759fc-b924-4004-b595-aeba72241192',
            computation_id='de041fc9-bfeb-4eac-9489-a8122ce8275c'
            # computation_id='85227710-e8b0-495e-8751-649a60a1c9c9'
        )
pprint.pprint([item.name for item in result_paths.items])

['VISp_0',
 'VISp_1',
 'VISp_2',
 'VISp_3',
 'VISp_4',
 'VISp_5',
 'VISp_6',
 'VISp_7',
 'data_description.json',
 'output',
 'pair0.txt',
 'pair1.txt',
 'pair2.txt',
 'pair3.txt',
 'procedures.json',
 'processing.json',
 'quality_control.json',
 'rig.json',
 'session.json',
 'subject.json']


In [68]:
pprint.pprint(computation)

Computation(id='de041fc9-bfeb-4eac-9489-a8122ce8275c',
            created=1754929717,
            name='Run With Parameters 4929717',
            run_time=684,
            state=<ComputationState.Completed: 'completed'>,
            cloud_workstation=None,
            data_assets=[InputDataAsset(id='35d1284e-4dfa-4ac3-9ba8-5ea1ae2fdaeb',
                                        mount='2p_roi_classifier'),
                         InputDataAsset(id='a77a2368-0077-4083-8eed-2eede398f560',
                                        mount='ophys_mount'),
                         InputDataAsset(id='fb4b5cef-4505-4145-b8bd-e41d6863d7a9',
                                        mount='schemas')],
            parameters=None,
            nextflow_profile=None,
            processes=[PipelineProcess(name='capsule_aind_ophys_decrosstalk_split_session_json_2',
                                       capsule_id='fc1b1e9a-fb4b-47e8-a223-b06d8eeb1462',
                                       version=1,
 

In [78]:
computation_id = '07eb0b28-d3b8-40e9-b59a-f29ee0a4746d'
# computation_id = '4988e2bc-f672-4994-b6b5-89340219994d'
computation = [c for c in computations if c.id == computation_id][0]
input_ophys_data_assest_id = [da for da in computation.data_assets if da.mount=='ophys_mount'][0].id
input_data_asset = client.data_assets.get_data_asset(data_asset_id=input_ophys_data_assest_id)
print(input_data_asset.name)
# created_datetime = datetime.fromtimestamp(computation.created).astimezone(zoneinfo.ZoneInfo("America/Los_Angeles")).strftime("%Y-%m-%d_%H-%M-%S")
created_datetime = datetime.fromtimestamp(computation.created).astimezone().strftime("%Y-%m-%d_%H-%M-%S")
print(created_datetime)


multiplane-ophys_767018_2025-02-10_13-04-43
2025-07-02_21-23-01


In [63]:
data_asset_id = '1b64db3e-c419-4c96-b099-bc8d17883da0'
pprint.pprint(client.data_assets.get_data_asset(data_asset_id))

DataAsset(id='1b64db3e-c419-4c96-b099-bc8d17883da0',
          created=1759274140,
          name='multiplane-ophys_790322_2025-08-05_08-54-42_processed_2025-09-30_22-51-59',
          mount='multiplane-ophys_790322_2025-08-05_08-54-42_processed_2025-09-30_22-51-59',
          state=<DataAssetState.Ready: 'ready'>,
          type=<DataAssetType.Result: 'result'>,
          last_used=1759274403,
          files=3782,
          size=345394286043,
          description='',
          tags=['derived', 'multiplane-ophys', '790322'],
          provenance=Provenance(commit=None,
                                run_script='code/run',
                                docker_image=None,
                                capsule='4cbb3e55-83df-40ad-83ff-6d65839dc9c0',
                                data_assets=['35d1284e-4dfa-4ac3-9ba8-5ea1ae2fdaeb',
                                             'a77a2368-0077-4083-8eed-2eede398f560',
                                             'fb4b5cef-4505-4145-b

In [83]:
# check date computed for those successful but not having json files
success_nojson_computations = list(set([computation.id for computation in exit0_computations]) - set([computation.id for computation in success_computations]))
created_datetime_list = []
for computation_id in success_nojson_computations:
    computation = [c for c in computations if c.id == computation_id][0]
    created_datetime = datetime.fromtimestamp(computation.created).astimezone().strftime("%Y-%m-%d_%H-%M-%S")
    created_datetime_list.append((computation_id, created_datetime))
# sort the list based on date
created_datetime_list = sorted(created_datetime_list, key=lambda x: x[1])
pprint.pprint(created_datetime_list)


[('f2fbb534-5627-4a9e-8fda-c6af5441ead5', '2025-06-12_20-07-51'),
 ('60810c42-ac7a-425e-80d7-ab3123279dc8', '2025-06-18_18-08-12'),
 ('f74759fc-b924-4004-b595-aeba72241192', '2025-06-25_02-29-15'),
 ('90a7c6c6-3f3f-4ea8-bff2-126608b47412', '2025-06-30_15-46-16'),
 ('84d00ed8-1303-4447-950f-9b278660b14e', '2025-07-01_22-13-28'),
 ('07eb0b28-d3b8-40e9-b59a-f29ee0a4746d', '2025-07-02_21-23-01'),
 ('6d40f1b3-5cfd-4877-8ac1-57cf35a92881', '2025-07-07_17-11-23'),
 ('b7bee4d1-03d7-44c8-9c90-8231b689771b', '2025-07-08_00-39-44'),
 ('85922ef8-777d-4858-a09d-bfad61249979', '2025-08-31_03-29-32'),
 ('a7fe4d13-2e4e-46f2-97c7-0a1b31498f9a', '2025-08-31_03-29-32'),
 ('bfb4cb5a-6ef1-4a7d-8205-71123e06f6ef', '2025-09-07_17-05-24')]


In [92]:
# check the ones on or after 2025-08-31

computation_id = 'f2fbb534-5627-4a9e-8fda-c6af5441ead5'
# computation_id = '4988e2bc-f672-4994-b6b5-89340219994d'
computation = [c for c in computations if c.id == computation_id][0]
input_ophys_data_assest_id = [da for da in computation.data_assets if da.mount=='ophys_mount'][0].id
input_data_asset = client.data_assets.get_data_asset(data_asset_id=input_ophys_data_assest_id)
print(input_data_asset.name)
# created_datetime = datetime.fromtimestamp(computation.created).astimezone(zoneinfo.ZoneInfo("America/Los_Angeles")).strftime("%Y-%m-%d_%H-%M-%S")
created_datetime = datetime.fromtimestamp(computation.created).astimezone().strftime("%Y-%m-%d_%H-%M-%S")
print(created_datetime)

multiplane-ophys_767018_2025-02-10_13-04-43
2025-06-12_20-07-51


# I archived all data assets from no json computations

# Now I need to go through all no json computations
- there are 151
- may need more automatic way

In [162]:
len(error_computations)

151

In [156]:
def get_input_raw_asset_name(computation):
    input_ophys_data_assest_id = [da for da in computation.data_assets if da.mount=='ophys_mount'][0].id
    input_data_asset = client.data_assets.get_data_asset(data_asset_id=input_ophys_data_assest_id)
    return input_data_asset.name


def get_computation_id_from_asset_id(asset_id):
    data_asset = client.data_assets.get_data_asset(asset_id)
    computation_id = data_asset.provenance.computation
    return computation_id

In [None]:
# removing all "processed" data assets associated with error_computations
for computation in error_computations:
    computation_id = computation.id
    input_data_asset_name = get_input_raw_asset_name(computation)

    search_name = '_'.join(input_data_asset_name.split('_')[:2])
    subject_id = input_data_asset_name.split('_')[1]
    query = f'name: {search_name}'
    search_param = DataAssetSearchParams(query=query, limit=1000, filters={'tags': ['derived']})

    data_asset_params = DataAssetSearchParams(
        offset=0,
        limit=1000,
        sort_order="desc",
        sort_field="name",
        archived=False,
        favorite=False,
        query="",
        filters=[
            SearchFilter(
                key="tags",
                value="derived"
            ), 
            SearchFilter(
                key="tags",
                value=subject_id
            )
        ]
    )

    data_asset_search_results = client.data_assets.search_data_assets(data_asset_params)
    print(len(data_asset_search_results.results))

    processed_data_asset_names = [da.name for da in data_asset_search_results.results if da.name.startswith(input_data_asset_name) and \
    ('processed' in da.name) and ('dlc-eye' not in da.name)]
    processed_data_asset_ids = [da.id for da in data_asset_search_results.results if da.name.startswith(input_data_asset_name) and \
        ('processed' in da.name) and ('dlc-eye' not in da.name)]
    computation_ids = [get_computation_id_from_asset_id(id) for id in processed_data_asset_ids]
    try:
        matched_computation_ind = computation_ids.index(computation_id)
        print(len(processed_data_asset_name))
        print(matched_computation_ind)

        matched_da_id = processed_data_asset_ids[matched_computation_ind]
        # matched_da = client.data_assets.get_data_asset(data_asset_id=matched_da_id)

        client.data_assets.archive_data_asset(
            data_asset_id=matched_da_id,
            archive=True,
        )
    except:
        continue

41
73
73
67
73
67
65
73
41
41
41
41
41
41
41
65
65
65
2
0
64
2
0
63
2
0
67
2
0
66
2
0
65
2
0
64
2
0
63
2
0
73
2
0
72
2
0
71
2
0
70
2
0
69
2
0
63
2
0
62
2
0
158
158
102
2
0
105
105
2
0
104
2
0
103
2
0
102
2
0
101
2
0
100
2
0
99
2
0
98
2
0
97
2
0
96
2
0
95
2
0
61
2
0
60
2
0
59
2
0
58
2
0
57
2
0
56
2
0
55
2
0
54
2
0
53
2
0
52
2
0
51
2
0
50
2
0
49
2
0
48
2
0
47
2
0
46
2
0
79
79
79
8
2
0
68
101
68
2
1
67
2
2
66
66
66
66
66
66
66
66
66
66
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
41
41
41
41
41
41
41
41
41
41
41
41
41
41
41
41
11
11
79
79
79
79
79
79
79
79
79
79
79
79
79
79
79
79
79
79
79
79
79
79
79
79
79
79
79
79
79
79
79
79


In [163]:
# after removing those data assets, check again
remaining_error_computations = []
remaining_error_data_asset_name = []
remaining_error_data_asset_id = []
for computation in error_computations:
    computation_id = computation.id
    input_data_asset_name = get_input_raw_asset_name(computation)

    search_name = '_'.join(input_data_asset_name.split('_')[:2])
    subject_id = input_data_asset_name.split('_')[1]
    query = f'name: {search_name}'
    search_param = DataAssetSearchParams(query=query, limit=1000, filters={'tags': ['derived']})

    data_asset_params = DataAssetSearchParams(
        offset=0,
        limit=1000,
        sort_order="desc",
        sort_field="name",
        archived=False,
        favorite=False,
        query="",
        filters=[
            SearchFilter(
                key="tags",
                value="derived"
            ), 
            SearchFilter(
                key="tags",
                value=subject_id
            )
        ]
    )

    data_asset_search_results = client.data_assets.search_data_assets(data_asset_params)

    processed_data_asset_names = [da.name for da in data_asset_search_results.results if da.name.startswith(input_data_asset_name) and \
    ('processed' in da.name) and ('dlc-eye' not in da.name)]
    processed_data_asset_ids = [da.id for da in data_asset_search_results.results if da.name.startswith(input_data_asset_name) and \
        ('processed' in da.name) and ('dlc-eye' not in da.name)]
    computation_ids = [get_computation_id_from_asset_id(id) for id in processed_data_asset_ids]

    # check if there is matched computation id
    try:
        matched_computation_ind = computation_ids.index(computation_id)
        remaining_error_computations.append(computation)
        remaining_error_data_asset_name.append(processed_data_asset_names[matched_computation_ind])
        remaining_error_data_asset_id.append(processed_data_asset_ids[matched_computation_ind])
    except:
        print("no matched computation id")
        continue


79
no matched computation id
41
no matched computation id
66
no matched computation id
66
no matched computation id
62
no matched computation id
66
no matched computation id
62
no matched computation id
62
no matched computation id
66
no matched computation id
41
no matched computation id
41
no matched computation id
41
no matched computation id
41
no matched computation id
41
no matched computation id
41
no matched computation id
41
no matched computation id
62
no matched computation id
62
no matched computation id
62
no matched computation id
62
no matched computation id
62
no matched computation id
62
no matched computation id
62
no matched computation id
62
no matched computation id
62
no matched computation id
62
no matched computation id
66
no matched computation id
66
no matched computation id
66
no matched computation id
66
no matched computation id
66
no matched computation id
45
no matched computation id
45
no matched computation id
158
no matched computation id
158
no matche

In [164]:
print(len(remaining_error_computations))

0
