In [1]:
import visual_behavior_glm.GLM_analysis_tools as gat
import pandas as pd
from multiprocessing import Pool
import numpy as np
from visual_behavior.data_access import loading
import os
pd.set_option('display.max_columns', 500)

In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [3]:
versions = [
    '16_events_engaged_L2_optimize_by_session',
    '16_events_disengaged_L2_optimize_by_session',
    '16_events_all_L2_optimize_by_session'
]

In [4]:
results = []
for version in versions:
    print('loading results for version {}'.format(version))
    results.append(gat.retrieve_results({'glm_version': version}))
results = pd.concat(results, sort=True)

loading results for version 16_events_engaged_L2_optimize_by_session
loading results for version 16_events_disengaged_L2_optimize_by_session
loading results for version 16_events_all_L2_optimize_by_session


In [5]:
def get_fraction_engaged(bsid):
    try:
        model_outputs = loading.load_behavior_model_outputs(bsid)
        fraction_engaged = model_outputs['engaged'].mean()
    except:
        fraction_engaged = np.nan
    return {'behavior_session_id': bsid, 'fraction_engaged': fraction_engaged}

In [6]:
%%time
with Pool(32) as pool:
    engagement_df = pd.DataFrame(pool.map(get_fraction_engaged, results['behavior_session_id'].drop_duplicates()))



CPU times: user 56.7 ms, sys: 205 ms, total: 262 ms
Wall time: 373 ms


In [15]:
indices = [
    'ophys_experiment_id',
    'behavior_session_id',
    'session_type',
    'fraction_engaged',
    'glm_version',
]

results.groupby(['ophys_experiment_id','glm_version'])[['Full__avg_cv_var_train']].mean().reset_index().merge(
    results[['ophys_experiment_id','behavior_session_id','session_type','glm_version']],
    left_on = ['ophys_experiment_id', 'glm_version'],
    right_on = ['ophys_experiment_id', 'glm_version'],
    how='left'
).merge(
    engagement_df,
    left_on = 'behavior_session_id',
    right_on = 'behavior_session_id',
).drop_duplicates(subset = ['ophys_experiment_id', 'glm_version']).sort_values(by=['fraction_engaged','ophys_experiment_id','glm_version']).set_index(indices)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Full__avg_cv_var_train
ophys_experiment_id,behavior_session_id,session_type,fraction_engaged,glm_version,Unnamed: 5_level_1
833631914,832981154,OPHYS_6_images_B,0.0,16_events_all_L2_optimize_by_session,0.016006
833631914,832981154,OPHYS_6_images_B,0.0,16_events_disengaged_L2_optimize_by_session,0.016403
833631914,832981154,OPHYS_6_images_B,0.0,16_events_engaged_L2_optimize_by_session,
836258957,835820832,OPHYS_3_images_A,0.022056,16_events_all_L2_optimize_by_session,0.012166
836258957,835820832,OPHYS_3_images_A,0.022056,16_events_disengaged_L2_optimize_by_session,0.012079
836258957,835820832,OPHYS_3_images_A,0.022056,16_events_engaged_L2_optimize_by_session,
993369861,993267224,OPHYS_3_images_A,0.441226,16_events_all_L2_optimize_by_session,0.052366
993369861,993267224,OPHYS_3_images_A,0.441226,16_events_disengaged_L2_optimize_by_session,0.057196
993369861,993267224,OPHYS_3_images_A,0.441226,16_events_engaged_L2_optimize_by_session,0.055538
957759564,957032492,OPHYS_4_images_B,0.586617,16_events_all_L2_optimize_by_session,0.047866


In [8]:
from visual_behavior.data_access.from_lims import get_id_type
get_id_type(914639324)

'ophys_session_id'

In [9]:
def search_file(filepath,search_phrase):
    '''
    search a file for a search_phrase
    return True if search_phrase is found in file, False otherwise
    '''
    searchfile = open(filepath, "r")
    for line in searchfile:
        if search_phrase in line:
            searchfile.close()
            return True
    searchfile.close()
    return False

In [10]:
def find_filename_containing_string(search_phrase, search_path, filename_extension, verbose=False):
    '''
    search all files in a directory for a given string
    returns the filename if found, None otherwise
    '''
    files_to_search = [f for f in os.listdir(search_path) if f.endswith(filename_extension)]
    for ii,filename in enumerate(np.sort(files_to_search)[::-1]):
        if verbose:
            print('searching file #{}, name: {}'.format(ii, filename), end='\r')
        if search_file(os.path.join(search_path, filename), search_phrase):
            return filename

In [11]:
def search_for_oeid(oeid, glm_version,search_path='/allen/programs/braintv/workgroups/nc-ophys/visual_behavior/cluster_jobs/ophys_glm', verbose=False):
    '''
    search all .out files in a directory for a given oeid/glm_version STDOUT file
    '''
    if verbose:
        print('searching for oeid = {}, glm_version = {}'.format(oeid, glm_version))
    search_phrase = 'oeid_{}_fit_glm_v_{}'.format(oeid, glm_version)
    filename = find_filename_containing_string(search_phrase, search_path, filename_extension='.out', verbose=verbose)
    return os.path.join(search_path, filename)

In [12]:
search_for_oeid(
    oeid=833631914, 
    glm_version='16_events_disengaged_L2_optimize_by_session',
    search_path='/allen/programs/braintv/workgroups/nc-ophys/visual_behavior/ophys_glm/cluster_records_v12'
)

'/allen/programs/braintv/workgroups/nc-ophys/visual_behavior/ophys_glm/cluster_records_v12/29588847.qmaster2.corp.alleninstitute.org.out'