In [14]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
import pandas as pd 
import sys
import pickle
import itertools
import seaborn as sns
import matplotlib.pyplot as plt

workingdirectory = os.popen('git rev-parse --show-toplevel').read()[:-1]
sys.path.append(workingdirectory)
os.chdir(workingdirectory)

import allensdk.core.json_utilities as ju
from allensdk.core.mouse_connectivity_cache import MouseConnectivityCache

from mcmodels.core import Mask,ModelData,VoxelModelCache
from mcmodels.core.utils import get_structure_id, get_ordered_summary_structures,get_minorstructures,get_loss_paper
from mcmodels.utils import nonzero_unique, unionize
from mcmodels.core.experiment import get_voxeldata_msvd
from mcmodels.models.crossvalidation import get_best_hyperparameters,get_loss_best_hyp,get_loocv_predictions,get_loss
from mcmodels.core.utils import get_cre_status,get_minorstructure_dictionary,get_leaves_ontologicalorder
from mcmodels.core.utils import get_regionalized_normalized_data
from mcmodels.core.utils import get_connectivity
from mcmodels.core.utils import get_ontological_order_leaf
from mcmodels.core.utils import get_nw_loocv,get_wt_inds
from mcmodels.core.utils import get_countvec, get_twoormore

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


[autoreload of mcmodels.core.utils failed: Traceback (most recent call last):
  File "/Users/samsonkoelle/anaconda3/envs/allen_010719_5/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/Users/samsonkoelle/anaconda3/envs/allen_010719_5/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 394, in superreload
    module = reload(module)
  File "/Users/samsonkoelle/anaconda3/envs/allen_010719_5/lib/python3.7/imp.py", line 314, in reload
    return importlib.reload(module)
  File "/Users/samsonkoelle/anaconda3/envs/allen_010719_5/lib/python3.7/importlib/__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 630, in _exec
  File "<frozen importlib._bootstrap_external>", line 728, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "/Users/samsonkoelle/alleninstitute/sambranch/mouse_conn

ImportError: cannot import name 'get_twoormore' from 'mcmodels.core.utils' (/Users/samsonkoelle/alleninstitute/sambranch/mouse_connectivity_models/mcmodels/core/utils.py)

In [2]:
#read data
TOP_DIR = '/Users/samsonkoelle/alleninstitute/mcm_2020/mcm_updated/'
INPUT_JSON = os.path.join(TOP_DIR, 'input_011520.json')
EXPERIMENTS_EXCLUDE_JSON = os.path.join(TOP_DIR, 'experiments_exclude.json')
FILE_DIR = '/Users/samsonkoelle/alleninstitute/mcm_2020/mcm_updated/'
OUTPUT_DIR = os.path.join(FILE_DIR, 'output')

input_data = ju.read(INPUT_JSON)
manifest_file = input_data.get('manifest_file')
manifest_file = os.path.join(TOP_DIR, manifest_file)
experiments_exclude = ju.read(EXPERIMENTS_EXCLUDE_JSON)

#its unclear why the hyperparameters are loaded from the output directory
cache = VoxelModelCache(manifest_file=manifest_file)
major_structures = input_data.get('structures')
major_structure_ids = [get_structure_id(cache, s) for s in major_structures]
data_info = pd.read_excel('/Users/samsonkoelle/alleninstitute/Whole Brain Cre Image Series_curation only.xlsx', 'all datasets curated_070919pull')
data_info.set_index("id", inplace=True)
ontological_order = get_ordered_summary_structures(cache)

mcc = MouseConnectivityCache(manifest_file = '../connectivity/mouse_connectivity_manifest.json')
st = mcc.get_structure_tree()
ai_map = st.get_id_acronym_map()
ia_map = {value: key for key, value in ai_map.items()}

#regionalize voxel model: compare with regional model
#regional parameters
cre = None
eid_set=None
high_res=False
threshold_injection = False

COARSE_STRUCTURE_SET_ID = 2
DEFAULT_STRUCTURE_SET_IDS = tuple([COARSE_STRUCTURE_SET_ID])
tree = cache.get_structure_tree()
default_structures = tree.get_structures_by_set_id(DEFAULT_STRUCTURE_SET_IDS)
default_structure_ids = [st['id'] for st in default_structures if st['id'] != 934]
#cre= True

In [3]:
#load data
msvds = {}
for sid in major_structure_ids:
    print(sid)
    voxel_data = ModelData(cache, sid)
    experiment_ids = voxel_data.get_experiment_ids(experiments_exclude=experiments_exclude, cre=cre)
    experiment_ids = np.asarray(list(experiment_ids))    
    #get injections and projections on the voxel level.
    #Note that a preprocessing screen is applied in AllenSDK to mask at projection and injection boundaries
    #Voxels intensity not in these regions in the corresponding projection vector will be 0.
    msvd = get_voxeldata_msvd(cache, sid,experiments_exclude,default_structure_ids,cre)
    msvds[sid]  = msvd

512
None
703
None
1089
None
1097
None
315
None
313
None
354
None
698
None
771
None
803
None
477
None
549
None


In [5]:
#get dictionary of creline by experiment
creline = get_cre_status(data_info, msvds)

#get dictionary of minor structures for each experiment in each major division
#major division segregation is legacy code but convenient for fast cross validation in major division model
experiments_minor_structures = get_minorstructure_dictionary(msvds, data_info)

#get leaves in ontological order.  Where leafs don't exist, uses summary structure
ontological_order_leaves = get_leaves_ontologicalorder(msvd, ontological_order)

#Key isn't affected by which experiment we choose.  This allows default masking to be inherited from the AllenSDK.
key = list(msvd.experiments.keys())[0]

#Identify keys denoting which voxels correspond to which structure in the ipsi and contra targets.
contra_targetkey = msvd.experiments[list(msvd.experiments.keys())[0]].projection_mask.get_key(structure_ids=ontological_order_leaves, hemisphere_id=1)
ipsi_targetkey = msvd.experiments[list(msvd.experiments.keys())[0]].projection_mask.get_key(structure_ids=ontological_order_leaves, hemisphere_id=2)
#contra_key = msvd.experiments[key].projection_mask.get_key(structure_ids=ontological_order, hemisphere_id=1)
#ipsi_key = msvd.experiments[key].projection_mask.get_key(structure_ids=ontological_order, hemisphere_id=2)

#get average intensities of projection structures given ipsi and contra keys
source_key = ontological_order #only relevant here when injection needs to be unionized, but currently a required argument
msvds = get_regionalized_normalized_data(msvds,cache, source_key,ipsi_targetkey,contra_targetkey)

#wt_2ormore = get_wt_inds(creline)

In [13]:
len(leavves)

541

In [None]:
#Set gammas for crossvalidation
gammas = np.asarray([0.1,.5,1,2,10])

#get indices of two or more wild type in same leaf
wt_2ormore = get_wt_inds(creline)

for sid in major_structure_ids:
    print(sid)
    #print(msvds[sid].projections.shape[0], len(wt_2ormore[sid]))
    msvds[sid].loocv_predictions_all = get_loocv_predictions_code(projections = msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                 centroids = msvds[sid].centroids,
                                                                 gammas = gammas)    
    msvds[sid].loocv_predictions_leaf = get_loocv_predictions_code(projections = msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                 centroids = msvds[sid].centroids,
                                                                 gammas = gammas,
                                                                 codes = np.expand_dims(leafs[sid], axis = 1))
    msvds[sid].loocv_predictions_cre = get_loocv_predictions_code(projections = msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                 centroids = msvds[sid].centroids,
                                                                 gammas = gammas,
                                                                 codes=np.expand_dims(np.asarray(creline[sid], dtype = str), axis = 1))
    msvds[sid].loocv_predictions_creleaf = get_loocv_predictions_code(projections = msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                 centroids = msvds[sid].centroids,
                                                                 gammas = gammas,
                                                                 codes=np.asarray(np.vstack([leafs[sid], creline[sid]]), dtype = str).transpose())
    