# Purpose:
- To query lims data.
- Until CO docDB search and metadata are well established (e.g., which sessions are drifting gratings?)

# Pre-requisites:
- brain_observatory_qc - updated version (7/10/2024; from_lims_update branch)
- For comparison: AllenSDK
# Misc:
- Copied from jinho_data_analysis/pilots/231106_GCaMP8s_vs_8m_from_lims.ipynb

In [4]:
from pathlib import Path
import numpy as np
import pandas as pd
import h5py
from brain_observatory_qc.data_access import from_lims

from allensdk.brain_observatory.behavior.behavior_project_cache import VisualBehaviorOphysProjectCache as bpc


In [None]:
# helper function
def get_session_info_per_group_of_mice(mouse_ids, gcamp_type):
    experiment_table = pd.DataFrame()
    problem_experiments = []
    for mouse_id in mouse_ids:
        mouse_data = from_lims.get_imaging_ids_for_mouse_id(mouse_id)
        if len(mouse_data) > 0:
            ophys_experiment_ids = mouse_data.ophys_experiment_id.values
            for ophys_experiment_id in ophys_experiment_ids: 
                try: 
                    expt_info = from_lims.get_general_info_for_ophys_experiment_id(ophys_experiment_id)
                    if len(expt_info) > 0:
                        genotype = from_lims.get_genotype_for_ophys_experiment_id(ophys_experiment_id)
                        expt_info.insert(0, "mouse_id", mouse_id ) # expt_info only contains `donor_id` and `specimen_id`, need to add mouse_id here
                        expt_info.insert(1, "full_genotype", genotype) # base function doesnt get genotype, add it
                        experiment_table = pd.concat([experiment_table, expt_info])
                except: 
                    # print('problem for mouse_id: ', mouse_id, ', expt_id: ', ophys_experiment_id)
                    problem_experiments.append(ophys_experiment_id)
    experiment_table['gcamp'] = gcamp_type
    return experiment_table, problem_experiments


# Manual log of mouse IDs

In [167]:
mids_ribo_aav_local = [719364, 726087, 719363]
mids_ribo_aav_ro = [730929, 730932, 730933]
mids_ribo_aav_icv = []
mids_snap25_oi4_dox = [726433]
mids_slc32a1_oi4 = [724567, 729088]
mids_slc17a7_oi4 = []
mids_slc32a1_oi1 = [687000, 693996, 692478, 687001] # need to exclude osid 1303235340 
mids_slc17a7_oi1_dox = [733794]
mids_cux2_oi1 = []

remove_session_ids = [1303235340]

# Using from_lims_utilities
- updated version (currently in from_lims_update branch; 07/10/2024)

In [105]:
gcamp_table = pd.DataFrame()
problems_table = []

temp_table, temp_problems = get_session_info_per_group_of_mice(mids_ribo_aav_local, 'ribo_aav_local')
gcamp_table = pd.concat([gcamp_table, temp_table])
problems_table.extend(temp_problems)

temp_table, temp_problems = get_session_info_per_group_of_mice(mids_ribo_aav_ro, 'ribo_aav_ro')
gcamp_table = pd.concat([gcamp_table, temp_table])
problems_table.extend(temp_problems)

temp_table, temp_problems = get_session_info_per_group_of_mice(mids_ribo_aav_icv, 'ribo_aav_icv')
gcamp_table = pd.concat([gcamp_table, temp_table])
problems_table.extend(temp_problems)

temp_table, temp_problems = get_session_info_per_group_of_mice(mids_snap25_oi4_dox, 'snap25_oi4_dox')
gcamp_table = pd.concat([gcamp_table, temp_table])
problems_table.extend(temp_problems)

temp_table, temp_problems = get_session_info_per_group_of_mice(mids_slc32a1_oi4, 'slc32a1_oi4')
gcamp_table = pd.concat([gcamp_table, temp_table])
problems_table.extend(temp_problems)

temp_table, temp_problems = get_session_info_per_group_of_mice(mids_slc17a7_oi4, 'slc17a7_oi4')
gcamp_table = pd.concat([gcamp_table, temp_table])
problems_table.extend(temp_problems)

temp_table, temp_problems = get_session_info_per_group_of_mice(mids_slc32a1_oi1, 'slc32a1_oi1')
gcamp_table = pd.concat([gcamp_table, temp_table])
problems_table.extend(temp_problems)

temp_table, temp_problems = get_session_info_per_group_of_mice(mids_slc17a7_oi1_dox, 'slc17a7_oi1_dox')
gcamp_table = pd.concat([gcamp_table, temp_table])
problems_table.extend(temp_problems)

temp_table, temp_problems = get_session_info_per_group_of_mice(mids_cux2_oi1, 'cux2_oi1')
gcamp_table = pd.concat([gcamp_table, temp_table])
problems_table.extend(temp_problems)



In [110]:
assert gcamp_table.ophys_session_id.isna().sum() == 0
assert len(problems_table) == 0

In [70]:
gcamp_table.keys()

Index(['mouse_id', 'full_genotype', 'ophys_experiment_id', 'ophys_session_id',
       'behavior_session_id', 'foraging_id', 'ophys_container_id',
       'supercontainer_id', 'experiment_workflow_state',
       'session_workflow_state', 'container_workflow_state', 'specimen_id',
       'donor_id', 'specimen_name', 'date_of_acquisition', 'session_type',
       'targeted_structure', 'depth', 'equipment_name', 'project',
       'experiment_storage_directory', 'behavior_storage_directory',
       'session_storage_directory', 'container_storage_directory',
       'supercontainer_storage_directory', 'specimen_storage_directory',
       'gcamp'],
      dtype='object')

In [168]:
# post-processing
# Changing IDs to integers
column_names_to_int = ['ophys_session_id', 'ophys_experiment_id', 'mouse_id', 'depth']
for cn in column_names_to_int:
    gcamp_table[cn] = gcamp_table[cn].astype(int)
# Get sessions with 2 planes only (others are errors or tests)
plane_counts = gcamp_table.groupby('ophys_session_id').count().ophys_experiment_id
correct_session_ids = plane_counts[plane_counts==2].index.values

# Add target depths and target depth pairs
depth_divider = [0, 125, 225, 325, 500]
depth_labels = [75, 175, 275, 375]
gcamp_table['target_depth'] = pd.cut(gcamp_table.depth, bins=depth_divider, labels=depth_labels, right=False)
gcamp_table['target_depth_pair'] = gcamp_table.groupby('ophys_session_id').target_depth.transform(lambda x: '_'.join(x.astype(str)))

# Select columns to save
session_info_columns = ['ophys_session_id', 'gcamp', 'mouse_id', 'date_of_acquisition', 'session_storage_directory',
                        'target_depth_pair', 'session_type', 'full_genotype',
                        'equipment_name', 'session_workflow_state', 'specimen_storage_directory']
gcamp_session_info = gcamp_table[session_info_columns].set_index('ophys_session_id').drop_duplicates()

# changing date_of_acquisition type to show up correctly in csv
gcamp_session_info.date_of_acquisition = gcamp_session_info.date_of_acquisition.dt.strftime('%Y-%m-%d_%H-%M-%S')


# Manual removal of erraneous sessions
gcamp_session_info = gcamp_session_info.drop(remove_session_ids, errors='ignore')

In [164]:
gcamp_session_info.head()

Unnamed: 0_level_0,gcamp,mouse_id,date_of_acquisition,session_storage_directory,target_depth_pair,session_type,full_genotype,equipment_name,session_workflow_state,specimen_storage_directory
ophys_session_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1363258688,ribo_aav_local,719364,2024-04-30_18-18-26,\\allen\programs\mindscope\production\learning...,275_175,STAGE_1,wt/wt,MESO.2,uploaded,\\allen\programs\mindscope\production\learning...
1346243082,ribo_aav_local,719364,2024-04-18_20-01-34,\\allen\programs\mindscope\production\learning...,375_75,OPHYS_2_images_A_passive,wt/wt,MESO.2,uploaded,\\allen\programs\mindscope\production\learning...
1345660871,ribo_aav_local,719364,2024-04-16_16-34-01,\\allen\programs\mindscope\production\learning...,175_275,OPHYS_2_images_A_passive,wt/wt,MESO.2,uploaded,\\allen\programs\mindscope\production\learning...
1345413618,ribo_aav_local,719364,2024-04-15_16-36-50,\\allen\programs\mindscope\production\learning...,375_75,STAGE_1,wt/wt,MESO.2,uploaded,\\allen\programs\mindscope\production\learning...
1344701509,ribo_aav_local,719364,2024-04-12_16-34-45,\\allen\programs\mindscope\production\learning...,175_275,STAGE_1,wt/wt,MESO.2,uploaded,\\allen\programs\mindscope\production\learning...


## Saving the result

In [170]:
save_dir = Path(r'\\allen\programs\mindscope\workgroups\learning\pilots\GCaMP8')
save_fn = save_dir / 'gcamp_pilot_data_240710.csv'
gcamp_session_info.to_csv(save_fn)

### Comparing between those in behaviorProjectCache tables

In [None]:
# Getting results from allensdk
cache = bpc.from_lims()
table = cache.get_ophys_experiment_table(passed_only=False)

lims_path_info = pd.DataFrame()

temp = table.query('mouse_id in @mids_ribo_aav_local and session_type=="STAGE_1"').set_index('ophys_session_id')[['mouse_id', 'date_of_acquisition']].drop_duplicates()
temp['lims_path'] = temp.index.map(lambda x: from_lims.get_session_h5_filepath(x).parent)
temp['gcamp'] = 'ribo_aav_local'

lims_path_info = pd.concat([lims_path_info, temp])

temp = table.query('mouse_id in @mids_ribo_aav_ro and session_type=="STAGE_1"').set_index('ophys_session_id')[['mouse_id', 'date_of_acquisition']].drop_duplicates()
temp['lims_path'] = temp.index.map(lambda x: from_lims.get_session_h5_filepath(x).parent)
temp['gcamp'] = 'ribo_aav_ro'

lims_path_info = pd.concat([lims_path_info, temp])

temp = table.query('mouse_id in @mids_snap25_oi4_dox and session_type=="STAGE_1"').set_index('ophys_session_id')[['mouse_id', 'date_of_acquisition']].drop_duplicates()
temp['lims_path'] = temp.index.map(lambda x: from_lims.get_session_h5_filepath(x).parent)
temp['gcamp'] = 'snap25_oi4_dox'

lims_path_info = pd.concat([lims_path_info, temp])

temp = table.query('mouse_id in @mids_slc32a1_oi4 and session_type=="STAGE_1"').set_index('ophys_session_id')[['mouse_id', 'date_of_acquisition']].drop_duplicates()
temp['lims_path'] = temp.index.map(lambda x: from_lims.get_session_h5_filepath(x).parent)
temp['gcamp'] = 'slc32a1_oi4'

lims_path_info = pd.concat([lims_path_info, temp])

temp = table.query('mouse_id in @mids_slc17a7_oi1_dox and session_type=="STAGE_1"').set_index('ophys_session_id')[['mouse_id', 'date_of_acquisition']].drop_duplicates()
temp['lims_path'] = temp.index.map(lambda x: from_lims.get_session_h5_filepath(x).parent)
temp['gcamp'] = 'slc17a7_oi1_dox'

lims_path_info = pd.concat([lims_path_info, temp])

temp = table.query('mouse_id in @mids_slc32a1_oi1 and session_type=="STAGE_1"').set_index('ophys_session_id')[['mouse_id', 'date_of_acquisition']].drop_duplicates()
temp['lims_path'] = temp.index.map(lambda x: from_lims.get_session_h5_filepath(x).parent)
temp['gcamp'] = 'slc32a1_oi1'
temp = temp.drop(1303235340)

lims_path_info = pd.concat([lims_path_info, temp])

In [165]:
lims_path_info.head()

Unnamed: 0_level_0,mouse_id,date_of_acquisition,lims_path,gcamp
ophys_session_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1363436007,726087,2024-05-01 16:00:28.374,\\allen\programs\mindscope\production\learning...,ribo_aav_local
1363660075,726087,2024-05-02 17:40:50.123,\\allen\programs\mindscope\production\learning...,ribo_aav_local
1373890769,726087,2024-06-17 16:01:47.183,\\allen\programs\mindscope\production\learning...,ribo_aav_local
1374103167,726087,2024-06-18 17:34:38.386,\\allen\programs\mindscope\production\learning...,ribo_aav_local
1347962182,719363,2024-04-26 16:40:02.748,\\allen\programs\mindscope\production\learning...,ribo_aav_local


In [158]:
len(lims_path_info)

38

In [160]:
len(np.setdiff1d(gcamp_session_info.index.values, lims_path_info.index.values))

38

In [161]:
gcamp_session_info[~gcamp_session_info.index.isin(lims_path_info.index.values)]

Unnamed: 0_level_0,gcamp,mouse_id,date_of_acquisition,session_storage_directory,target_depth_pair,session_type,full_genotype,equipment_name,session_workflow_state,specimen_storage_directory
ophys_session_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1346243082,ribo_aav_local,719364,2024-04-18 20:01:34.113,\\allen\programs\mindscope\production\learning...,375_75,OPHYS_2_images_A_passive,wt/wt,MESO.2,uploaded,\\allen\programs\mindscope\production\learning...
1345660871,ribo_aav_local,719364,2024-04-16 16:34:01.738,\\allen\programs\mindscope\production\learning...,175_275,OPHYS_2_images_A_passive,wt/wt,MESO.2,uploaded,\\allen\programs\mindscope\production\learning...
1373167919,ribo_aav_local,726087,2024-06-14 15:32:17.526,\\allen\programs\mindscope\production\learning...,75_375,OPHYS_2_images_A_passive,wt/wt,MESO.2,uploaded,\\allen\programs\mindscope\production\learning...
1372856980,ribo_aav_local,726087,2024-06-13 17:47:50.392,\\allen\programs\mindscope\production\learning...,175_275,OPHYS_2_images_A_passive,wt/wt,MESO.2,uploaded,\\allen\programs\mindscope\production\learning...
1364932026,ribo_aav_local,726087,2024-05-08 17:06:03.137,\\allen\programs\mindscope\production\learning...,375_75,OPHYS_2_images_A_passive,wt/wt,MESO.2,uploaded,\\allen\programs\mindscope\production\learning...
1364667229,ribo_aav_local,726087,2024-05-07 17:49:47.971,\\allen\programs\mindscope\production\learning...,275_175,OPHYS_2_images_A_passive,wt/wt,MESO.2,uploaded,\\allen\programs\mindscope\production\learning...
1370881864,ribo_aav_local,719363,2024-06-05 15:40:39.832,\\allen\programs\mindscope\production\learning...,375_75,OPHYS_2_images_A_passive,wt/wt,MESO.2,uploaded,\\allen\programs\mindscope\production\learning...
1370649434,ribo_aav_local,719363,2024-06-04 15:41:08.712,\\allen\programs\mindscope\production\learning...,275_175,OPHYS_2_images_A_passive,wt/wt,MESO.2,uploaded,\\allen\programs\mindscope\production\learning...
1365176380,ribo_aav_local,719363,2024-05-09 15:53:25.895,\\allen\programs\mindscope\production\learning...,175_275,OPHYS_2_images_A_passive,wt/wt,MESO.2,uploaded,\\allen\programs\mindscope\production\learning...
1366195973,ribo_aav_local,719363,2024-05-13 18:55:45.091,\\allen\programs\mindscope\production\learning...,375_75,OPHYS_2_images_A_passive,wt/wt,MESO.2,uploaded,\\allen\programs\mindscope\production\learning...


## No noticeable difference between those included in allensdk and those not included in allensdk