# Prerequisite:
- Code Ocean APIs (co_test kernel)
- environment variables

In [2]:
# from aind_codeocean_api.codeocean import CodeOceanClient
# from aind_codeocean_api.credentials import CodeOceanCredentials
# from aind_data_access_api.document_db import MetadataDbClient
from lamf_analysis.code_ocean.code_ocean_data_explorer import CodeOceanDataExplorer

In [7]:
import os
from pathlib import Path
import pandas as pd
import yaml
import numpy as np

In [4]:
####
## This is not a good way to do it, but somehow I can't get the environment variables
## Even though I can see them in the environment variables window
####
os.environ['CODEOCEAN_TOKEN'] = 'cop_N2I4MmNkZjQ3NDM1NDFiNjg3ZWQ2MTdjNGM0NDIwZThyUHY4WTQyVHQ5MTM5dHRub2dXNnNnVG13VFBtNDlLN2FiZjdmNmMw'
os.environ['CODEOCEAN_DOMAIN'] = 'https://codeocean.allenneuraldynamics.org/'


In [5]:

def load_capsule_name_map():
    """
    """
    # load "useful_capsule.yml"
    fn = Path(r'\\allen\programs\mindscope\workgroups\learning') / 'useful_capsules.yml'
    with open(fn, 'r') as stream:
        try:
            capsule_yaml = yaml.safe_load(stream)
        except yaml.YAMLError as exc:
            print(exc)

    capsule_map = {}
    for name, capsule in capsule_yaml.items():
        capsule_map[capsule['capsule_id']] = name

    return capsule_map



def derived_assets_table(v4_sessions_only = False, parse_input_assets_col=True):
    

    code = CodeOceanDataExplorer(query="multiplane",verbose=True)
    df = pd.DataFrame(code.derived_assets)
    df = df.drop(columns=['description', 'files', 'last_used', 'mount', 'size', 'state', 'custom_metadata','type', 'created', 'source_bucket']) # 'source_bucket'
    # NOTE: provenance has NaNSs in response from API, investigate MJD 07/2024
    df = pd.concat([df.drop(['provenance'], axis=1), df['provenance'].apply(pd.Series)], axis=1)
    #df = pd.concat([df.drop(['source_bucket'], axis=1), df['source_bucket'].apply(pd.Series)], axis=1)
    
    df = df.drop(columns=['run_script', 0, 'docker_image'])
    df = df.rename(columns={'id': 'data_asset_id', 'name': 'data_asset_name', 'capsule': 'capsule_id', 'data_assets': 'input_data_assets'})
    capsule_name_map = load_capsule_name_map()
    df['capsule_name'] = df['capsule_id'].map(capsule_name_map)
    # these could be useful columns, drop for now 
    #df = df.drop(columns=['commit', 'tags', 'input_data_assets'])
    df = df.drop(columns=['commit', 'tags'])
    df = df.sort_values(by=['capsule_name'])

    df['prefix'] = df['data_asset_name'].apply(lambda x: '_'.join(x.split('_')[:4]))

    if v4_sessions_only:
        df = df[df.capsule_name == 'multiplane-ophys-v4'].reset_index(drop=True)

    # These data_assets were used as dummy mount points, and therefore
    # show up in the provenance column. We can filter them out of the 
    # input_data_assets col to get the true data_asset of interest.
    # these are just for v4 mounted data
    mounted_data_asset_ids = ['58452d49-05de-4b14-a9d6-c6915fe7bf6c', # multiplane-ophys_692478_2023-10-02_09-01-32
                             'f6019c2b-feb4-43d0-becd-2818db251d63', # cant find
                             '05529cfc-23fe-4ead-9490-71763e9f7c01',  #'universal_eye_tracking-peterl-2019-07-10'
                             '350148c1-ccfc-4e9c-96a8-e43b8a3a2fc9'  # v5 mount: multiplane-ophys_726433_2024-05-14_08-13-02
                             ]
    if parse_input_assets_col:
        # remove mounted data assets
        for i, row in df.iterrows():
            input_assets = row['input_data_assets']
            
            # skip NaNs
            if  type(input_assets) == list and len(input_assets) > 1:
                true_assets = []
                for asset_id in input_assets:
                    if asset_id not in mounted_data_asset_ids:
                        true_assets.append(asset_id)

                    
                    # if asset_id in mounted_data_asset_ids:
                    #     input_assets.remove(asset_id)
                    #     #print(len(input_assets))
                if len(true_assets) == 1:
                    df.at[i, 'input_data_assets'] = input_assets[0]
                else:
                    print(f"More than one data asset in input_data_assets: {input_assets}")
            else:
                df.at[i, 'input_data_assets'] = input_assets[0]

        # rename input_data_assets to raw_data_asset_id
        df = df.rename(columns={'input_data_assets': 'provenance_raw_data_asset_id'})

    print(f"Number of derived ophys assets: {len(df)}")
    
    return df


def raw_assets_table():
    code = CodeOceanDataExplorer(query="multiplane", verbose=False)
    df = pd.DataFrame(code.raw_assets)
    df = df.drop(columns=['description', 'files', 'last_used', 'mount', 'size', 'state', 
                          'custom_metadata','type', 'created',
                          'tags','source_bucket']) # 'source_bucket'
    
    df = df.rename(columns={'id': 'raw_data_asset_id', 'name': 'raw_data_asset_name'})
    return df

def get_asset_session_table(v4_sessions_only = True):
    raw_table = raw_assets_table()
    derived_table = derived_assets_table(v4_sessions_only = v4_sessions_only)

    print(f"Number of raw ophys assets: {len(raw_table)}")
    # merge raw into derived
    derived_table = derived_table.merge(raw_table, how = 'left', 
                                        left_on = 'provenance_raw_data_asset_id', 
                                        right_on = 'raw_data_asset_id', suffixes = ('_derived', '_raw'))

    # confirm prefix column matches raw_data_asset_name make new column for bool match
    derived_table['prefix_match'] = derived_table['raw_data_asset_name'] == derived_table['prefix']

    # validated means there is a clear provenance relationship between derived and raw
    # ambiguous means that could not be determined
    validated_assets = derived_table[derived_table['prefix_match'] == True]
    ambiguous_assets = derived_table[derived_table['prefix_match'] == False]
    print(f"Number of validated ophys assets: {len(validated_assets)} (multiplane_session_table)")
    print(f"Number of ambiguous ophys assets: {len(ambiguous_assets)} (multiplane_session_table)")

    # drop 'provenance_raw_data_asset_id', 'prefix', 'prefix_match'
    validated_assets = validated_assets.drop(columns = ['provenance_raw_data_asset_id', 'prefix', 'prefix_match'])

    return validated_assets.reset_index(drop=True)

In [9]:
asset_table = get_asset_session_table(v4_sessions_only = True)

CodeOceanDataExplorer initialized
---------------------------------
Query: multiplane
Number of assets: 1027 

Number of derived ophys assets: 128
Number of raw ophys assets: 656
Number of validated ophys assets: 122 (multiplane_session_table)
Number of ambiguous ophys assets: 6 (multiplane_session_table)


In [6]:
load_dir = Path(r'\\allen\programs\mindscope\workgroups\learning\pilots\GCaMP8')
load_fn = load_dir / 'gcamp_pilot_data_240822.csv'
gcamp_info = pd.read_csv(load_fn)

In [8]:
# Select unique sessions for session table, because data assets are per session
multiplane_session_table = gcamp_info.drop_duplicates(subset="ophys_session_id", keep="first").copy() # sessions

# Getting all data assets from CodeOcean
asset_session_table = get_asset_session_table()

multiplane_session_table['co_session_name'] = multiplane_session_table.apply(lambda x: '_'.join(f'multiplane-ophys_{x.mouse_id}_{x.date_of_acquisition}'.split('_')[:3]), axis=1)
asset_session_table['raw_data_asset_name_truncated'] = asset_session_table.apply(lambda x: '_'.join(x.raw_data_asset_name.split('_')[:3]), axis=1)
asset_session_table = asset_session_table.sort_values(by='data_asset_name', ascending=False)
asset_session_table_no_dup = asset_session_table.drop_duplicates(subset='raw_data_asset_name_truncated', keep='first')

# # Then merge
# merged_data_table = asset_session_table_no_dup.merge(multiplane_session_table, how='inner', left_on='raw_data_asset_name_truncated', right_on='co_session_name')

CodeOceanDataExplorer initialized
---------------------------------
Query: multiplane
Number of assets: 1031 

Number of derived ophys assets: 128
Number of raw ophys assets: 658
Number of validated ophys assets: 122 (multiplane_session_table)
Number of ambiguous ophys assets: 6 (multiplane_session_table)


In [9]:
not_uploaded_table = multiplane_session_table[~multiplane_session_table['co_session_name'].isin(asset_session_table_no_dup['raw_data_asset_name_truncated'])]
not_uploaded_table

Unnamed: 0,ophys_experiment_id,ophys_session_id,gcamp,mouse_id,date_of_acquisition,session_storage_directory,target_depth,target_depth_pair,session_type,full_genotype,zdrift,abs_zdrift,monitor_sync,equipment_name,specimen_storage_directory,co_session_name
0,1385868351,1385674656,oi4_homo,741863,2024-08-09_16-23-17,\\allen\programs\mindscope\production\learning...,275,175_275,STAGE_1,Oi4(TIT2L-jGCaMP8s-RiboL1-WPRE-ICL-IRES-tTA2-W...,12.75,12.75,0.036348,MESO.1,\\allen\programs\mindscope\production\learning...,multiplane-ophys_741863_2024-08-09
1,1388325284,1388162048,oi4_homo,741865,2024-08-20_16-27-14,\\allen\programs\mindscope\production\learning...,75,75_375,STAGE_1,Oi4(TIT2L-jGCaMP8s-RiboL1-WPRE-ICL-IRES-tTA2-W...,8.25,8.25,0.036329,MESO.1,\\allen\programs\mindscope\production\learning...,multiplane-ophys_741865_2024-08-20
3,1386662705,1386569652,oi4_homo,741863,2024-08-13_16-26-46,\\allen\programs\mindscope\production\learning...,375,75_375,STAGE_1,Oi4(TIT2L-jGCaMP8s-RiboL1-WPRE-ICL-IRES-tTA2-W...,7.5,7.5,0.036341,MESO.1,\\allen\programs\mindscope\production\learning...,multiplane-ophys_741863_2024-08-13
5,1386881926,1386727055,oi4_homo,741865,2024-08-14_16-40-58,\\allen\programs\mindscope\production\learning...,275,175_275,STAGE_1,Oi4(TIT2L-jGCaMP8s-RiboL1-WPRE-ICL-IRES-tTA2-W...,19.5,19.5,0.036324,MESO.1,\\allen\programs\mindscope\production\learning...,multiplane-ophys_741865_2024-08-14
8,1385884086,1385695661,ribo_aav_icv,738332,2024-08-09_18-28-22,\\allen\programs\mindscope\production\learning...,275,175_275,STAGE_1,wt/wt,7.5,7.5,1.036851,MESO.2,\\allen\programs\mindscope\production\learning...,multiplane-ophys_738332_2024-08-09
9,1385134367,1385027556,ribo_aav_icv,738332,2024-08-06_18-42-08,\\allen\programs\mindscope\production\learning...,75,75_375,STAGE_1,wt/wt,12.0,12.0,1.036841,MESO.2,\\allen\programs\mindscope\production\learning...,multiplane-ophys_738332_2024-08-06
10,1385592902,1385475019,ribo_aav_icv,738332,2024-08-08_18-11-55,\\allen\programs\mindscope\production\learning...,75,75_375,OPHYS_2_images_A_passive,wt/wt,12.75,12.75,1.036874,MESO.2,\\allen\programs\mindscope\production\learning...,multiplane-ophys_738332_2024-08-08
11,1381145892,1380878733,ribo_aav_icv,738331,2024-07-18_17-40-56,\\allen\programs\mindscope\production\learning...,275,175_275,STAGE_1,wt/wt,4.5,4.5,1.036921,MESO.2,\\allen\programs\mindscope\production\learning...,multiplane-ophys_738331_2024-07-18
12,1385127581,1383667067,ribo_aav_icv,738332,2024-07-31_17-52-29,\\allen\programs\mindscope\production\learning...,275,175_275,STAGE_1,wt/wt,15.75,15.75,1.036867,MESO.2,\\allen\programs\mindscope\production\learning...,multiplane-ophys_738332_2024-07-31
13,1382143753,1382053206,ribo_aav_icv,738331,2024-07-23_18-44-17,\\allen\programs\mindscope\production\learning...,275,175_275,OPHYS_2_images_A_passive,wt/wt,8.25,8.25,1.036913,MESO.2,\\allen\programs\mindscope\production\learning...,multiplane-ophys_738331_2024-07-23


In [15]:
# save to csv
save_dir = Path(r'\\allen\programs\mindscope\workgroups\learning\pilots\GCaMP8')
save_fn = save_dir / 'uploading_list_gcamp_pilot_data_240822.csv'
not_uploaded_table.to_csv(save_fn)

# previous results


In [10]:
results_fn = 'D:/Downloads_chrome/results_table_20240822.pkl'
results_table = pd.read_pickle(results_fn)

In [14]:
np.intersect1d(results_table.ophys_session_id.values, not_uploaded_table.ophys_session_id.values)

array([], dtype=int64)

In [13]:
results_table.mouse_id.unique()

array([687001, 726433, 729088, 730932, 726087, 719363, 692478, 693996,
       724567, 687000, 730929], dtype=int64)

In [16]:
def get_merged_data_table(gcamp_data_table):
    # Select unique sessions for session table, because data assets are per session
    multiplane_session_table = gcamp_data_table.drop_duplicates(subset="ophys_session_id", keep="first").copy() # sessions

    # Getting all data assets from CodeOcean
    asset_session_table = get_asset_session_table()

    # Merge csv and CodeOcean, by the session name
    # First, assign session name using mouse ID and date of acquisition
    multiplane_session_table['co_session_name'] = multiplane_session_table.apply(lambda x: '_'.join(f'multiplane-ophys_{x.mouse_id}_{x.date_of_acquisition}'.split('_')[:3]), axis=1)
    asset_session_table['raw_data_asset_name_truncated'] = asset_session_table.apply(lambda x: '_'.join(x.raw_data_asset_name.split('_')[:3]), axis=1)
    asset_session_table = asset_session_table.sort_values(by='data_asset_name', ascending=False)
    asset_session_table_no_dup = asset_session_table.drop_duplicates(subset='raw_data_asset_name_truncated', keep='first')

    # Then merge
    merged_data_table = asset_session_table_no_dup.merge(multiplane_session_table, how='inner', left_on='raw_data_asset_name_truncated', right_on='co_session_name')

    # Select drifting grating planes
    plane_asset_table = gcamp_data_table[gcamp_data_table.ophys_session_id.isin(merged_data_table.ophys_session_id)]
    co_info_columns = np.setdiff1d(merged_data_table.columns, plane_asset_table.columns)
    co_info_columns = np.append(co_info_columns, 'ophys_session_id')
    plane_asset_table = plane_asset_table.merge(merged_data_table[co_info_columns], how='left', left_on='ophys_session_id', right_on='ophys_session_id')
    plane_asset_table = plane_asset_table[plane_asset_table.session_type=='STAGE_1']

    return merged_data_table, plane_asset_table

In [17]:
merged_data_table, plane_asset_table = get_merged_data_table(gcamp_info)

CodeOceanDataExplorer initialized
---------------------------------
Query: multiplane
Number of assets: 1029 

Number of derived ophys assets: 128
Number of raw ophys assets: 658
Number of validated ophys assets: 122 (multiplane_session_table)
Number of ambiguous ophys assets: 6 (multiplane_session_table)


In [18]:
merged_data_table

Unnamed: 0,data_asset_id,data_asset_name,capsule_id,capsule_name,raw_data_asset_id,raw_data_asset_name,raw_data_asset_name_truncated,ophys_experiment_id,ophys_session_id,gcamp,...,target_depth,target_depth_pair,session_type,full_genotype,zdrift,abs_zdrift,monitor_sync,equipment_name,specimen_storage_directory,co_session_name
0,5a1debb1-dd36-4efc-980a-1be9276d5835,multiplane-ophys_730932_2024-07-08_11-40-23_pr...,cd3897a2-bc35-4d9a-9bf2-d86fd9c850ab,multiplane-ophys-v4,e0456142-c399-4921-aa21-64f8cccf43c4,multiplane-ophys_730932_2024-07-08_11-40-23,multiplane-ophys_730932_2024-07-08,1378882390,1378705369,ribo_aav_ro,...,275,175_275,STAGE_1,wt/wt,3.00,3.00,1.036859,MESO.2,\\allen\programs\mindscope\production\learning...,multiplane-ophys_730932_2024-07-08
1,003f6e7b-e309-4f1f-8565-6bb1f64cfaa2,multiplane-ophys_730929_2024-06-20_13-28-01_pr...,cd3897a2-bc35-4d9a-9bf2-d86fd9c850ab,multiplane-ophys-v4,cd38c2a7-5ed7-4bf7-a138-e0f3243ff394,multiplane-ophys_730929_2024-06-20_13-28-01,multiplane-ophys_730929_2024-06-20,1374732829,1374600699,ribo_aav_ro,...,375,75_375,STAGE_1,wt/wt,16.50,16.50,1.036813,MESO.2,\\allen\programs\mindscope\production\learning...,multiplane-ophys_730929_2024-06-20
2,9a91c530-dcee-4151-a6ac-374c35923e9f,multiplane-ophys_730929_2024-06-14_12-23-57_pr...,cd3897a2-bc35-4d9a-9bf2-d86fd9c850ab,multiplane-ophys-v4,44f03c06-e660-4747-9c2c-cdd5b7521d8b,multiplane-ophys_730929_2024-06-14_12-23-57,multiplane-ophys_730929_2024-06-14,1373430357,1373231491,ribo_aav_ro,...,275,175_275,STAGE_1,wt/wt,13.50,13.50,1.036823,MESO.2,\\allen\programs\mindscope\production\learning...,multiplane-ophys_730929_2024-06-14
3,1d53f2c7-9c3c-455c-9975-ed36e18af6d5,multiplane-ophys_729088_2024-06-11_08-34-49_pr...,cd3897a2-bc35-4d9a-9bf2-d86fd9c850ab,multiplane-ophys-v4,607bc4cc-1018-4753-a32c-0b2ce7cf6bf9,multiplane-ophys_729088_2024-06-11_08-34-49,multiplane-ophys_729088_2024-06-11,1372472098,1372240290,slc32a1_oi4,...,375,75_375,STAGE_1,Slc32a1-IRES-Cre/wt;Oi4(TIT2L-jGCaMP8s-RiboL1-...,9.00,9.00,1.036863,MESO.2,\\allen\programs\mindscope\production\learning...,multiplane-ophys_729088_2024-06-11
4,c9257f7c-2306-47a3-9bc6-1258ab761084,multiplane-ophys_729088_2024-06-10_09-47-15_pr...,cd3897a2-bc35-4d9a-9bf2-d86fd9c850ab,multiplane-ophys-v4,741137b9-b5d2-4f96-bd07-e4dda4c6e78d,multiplane-ophys_729088_2024-06-10_09-47-15,multiplane-ophys_729088_2024-06-10,1372159256,1371955818,slc32a1_oi4,...,375,75_375,OPHYS_2_images_A_passive,Slc32a1-IRES-Cre/wt;Oi4(TIT2L-jGCaMP8s-RiboL1-...,7.50,7.50,1.036847,MESO.2,\\allen\programs\mindscope\production\learning...,multiplane-ophys_729088_2024-06-10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,034cb223-63f3-43f5-b1ef-dbc36fb7cb02,multiplane-ophys_687000_2023-10-18_10-28-37_pr...,cd3897a2-bc35-4d9a-9bf2-d86fd9c850ab,multiplane-ophys-v4,a9e65454-f9ff-410f-96cb-b0b3b2f2ce7c,multiplane-ophys_687000_2023-10-18_10-28-37,multiplane-ophys_687000_2023-10-18,1304736265,1304601605,slc32a1_oi1,...,75,75_375,STAGE_1,Slc32a1-IRES-Cre/wt;Oi1(TIT2L-jGCaMP8s-WPRE-IC...,14.25,14.25,0.036498,MESO.1,\\allen\programs\braintv\production\neuralcodi...,multiplane-ophys_687000_2023-10-18
58,7f708e45-b7f8-4022-97ef-026a7ea06ccb,multiplane-ophys_687000_2023-09-28_13-53-05_pr...,cd3897a2-bc35-4d9a-9bf2-d86fd9c850ab,multiplane-ophys-v4,eed1d09a-17d6-4c9a-a1c6-e028bf987012,multiplane-ophys_687000_2023-09-28_13-53-05,multiplane-ophys_687000_2023-09-28,1300330368,1300230526,slc32a1_oi1,...,75,75_375,OPHYS_2_images_A_passive,Slc32a1-IRES-Cre/wt;Oi1(TIT2L-jGCaMP8s-WPRE-IC...,15.00,15.00,0.035941,MESO.2,\\allen\programs\braintv\production\neuralcodi...,multiplane-ophys_687000_2023-09-28
59,e7e08e19-344d-4437-b7dc-21374a1c29d3,multiplane-ophys_687000_2023-09-27_13-29-18_pr...,cd3897a2-bc35-4d9a-9bf2-d86fd9c850ab,multiplane-ophys-v4,2dd386e0-35fe-4d0e-be7c-c6b64a3e7fef,multiplane-ophys_687000_2023-09-27_13-29-18,multiplane-ophys_687000_2023-09-27,1300119997,1300000845,slc32a1_oi1,...,275,175_275,OPHYS_2_images_A_passive,Slc32a1-IRES-Cre/wt;Oi1(TIT2L-jGCaMP8s-WPRE-IC...,12.00,12.00,0.035941,MESO.2,\\allen\programs\braintv\production\neuralcodi...,multiplane-ophys_687000_2023-09-27
60,9b78c4de-9982-479d-a718-30c91e29e413,multiplane-ophys_687000_2023-09-26_08-56-32_pr...,cd3897a2-bc35-4d9a-9bf2-d86fd9c850ab,multiplane-ophys-v4,166c01a3-7e31-41c4-b761-a21a222f16fd,multiplane-ophys_687000_2023-09-26_08-56-32,multiplane-ophys_687000_2023-09-26,1299885636,1299688535,slc32a1_oi1,...,75,75_375,STAGE_1,Slc32a1-IRES-Cre/wt;Oi1(TIT2L-jGCaMP8s-WPRE-IC...,18.00,18.00,0.035916,MESO.2,\\allen\programs\braintv\production\neuralcodi...,multiplane-ophys_687000_2023-09-26
