In [2]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
import pandas as pd 
import sys
import pickle
import itertools
import seaborn as sns
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import random
from sklearn.metrics import pairwise_distances
from sklearn.kernel_ridge import KernelRidge
import math

workingdirectory = os.popen('git rev-parse --show-toplevel').read()[:-1]
sys.path.append(workingdirectory)
os.chdir(workingdirectory)

import allensdk.core.json_utilities as ju
from allensdk.core.mouse_connectivity_cache import MouseConnectivityCache

from mcmodels.core import VoxelModelCache
from mcmodels.core.utils import get_structure_id,get_ordered_summary_structures
from mcmodels.core.connectivity_data import get_connectivity_data
from mcmodels.models.crossvalidation import get_nwloocv_predictions_multimodel_merge_dists
from mcmodels.utils import nonzero_unique #, unionize
from mcmodels.models.crossvalidation import get_best_hyperparameters,get_loss_best_hyp,get_loss#get_loocv_predictions,get_loss#get_best_hyperparameters,get_loss_best_hyp,get_loocv_predictions,get_loss
from mcmodels.core.utils import get_leaves_ontologicalorder, get_indices, get_indices2,get_eval_indices,screen_index_matrices,screen_index_matrices2,screen_index_matrices3#get_cre_status,get_minorstructure_dictionary,get_leaves_ontologicalorder
from mcmodels.core.utils import get_indices_2ormore
from mcmodels.regressors import NadarayaWatson
from mcmodels.core.plotting import plot_loss_surface,plot_loss_scatter



In [3]:
#read data
TOP_DIR = '/Users/samsonkoelle/alleninstitute/mcm_2020/mcm_updated/'
INPUT_JSON = os.path.join(TOP_DIR, 'input_011520.json')
EXPERIMENTS_EXCLUDE_JSON = os.path.join(TOP_DIR, 'experiments_exclude.json')
FILE_DIR = '/Users/samsonkoelle/alleninstitute/mcm_2020/mcm_updated/'
OUTPUT_DIR = os.path.join(FILE_DIR, 'output')

input_data = ju.read(INPUT_JSON)
manifest_file = input_data.get('manifest_file')
manifest_file = os.path.join(TOP_DIR, manifest_file)
experiments_exclude = ju.read(EXPERIMENTS_EXCLUDE_JSON)

#its unclear why the hyperparameters are loaded from the output directory
cache = VoxelModelCache(manifest_file=manifest_file)
major_structures = input_data.get('structures')
major_structure_ids = [get_structure_id(cache, s) for s in major_structures]
data_info = pd.read_excel('/Users/samsonkoelle/alleninstitute/Whole Brain Cre Image Series_curation only.xlsx', 'all datasets curated_070919pull')
data_info.set_index("id", inplace=True)
ontological_order = get_ordered_summary_structures(cache)

mcc = MouseConnectivityCache(manifest_file = '../connectivity/mouse_connectivity_manifest.json')
st = mcc.get_structure_tree()
ai_map = st.get_id_acronym_map()
ia_map = {value: key for key, value in ai_map.items()}

#regionalize voxel model: compare with regional model
#regional parameters
cre = None
eid_set=None
high_res=False
threshold_injection = False

COARSE_STRUCTURE_SET_ID = 2#3#2#167587189#3#2(old)
DEFAULT_STRUCTURE_SET_IDS = tuple([COARSE_STRUCTURE_SET_ID])
tree = cache.get_structure_tree()
default_structures = tree.get_structures_by_set_id(DEFAULT_STRUCTURE_SET_IDS)
default_structure_ids = [st['id'] for st in default_structures if st['id'] != 934]
#cre= True

In [4]:
connectivity_data = get_connectivity_data(cache, major_structure_ids, experiments_exclude, remove_injection = False,structure_set_id = 167587189)

connectivity_data.get_injection_hemisphere_ids()
connectivity_data.align()
connectivity_data.get_centroids()
connectivity_data.get_data_matrices(default_structure_ids)
connectivity_data.get_crelines(data_info)
with open('data/info/leafs.pickle', 'rb') as handle:
    leafs = pickle.load(handle)
    
connectivity_data.ai_map = ai_map
connectivity_data.get_summarystructures(data_info)
connectivity_data.leafs = leafs

512
703
1089
1097
315
313
354
698
771
803
477
549


In [16]:
#major division segregation is legacy code but convenient for fast cross validation in major division model
#experiments_minor_structures = get_summarystructure_dictionary(connectivity_data, data_info)
#get leaves in ontological order.  Where leafs don't exist, uses summary structure
ontological_order_leaves = get_leaves_ontologicalorder(connectivity_data, ontological_order)
#Key isn't affected by which experiment we choose. This allows default masking to be inherited from the AllenSDK.
sid0 = list(connectivity_data.structure_datas.keys())[0]
eid0 = list(connectivity_data.structure_datas[sid0].experiment_datas.keys())[0]
#Identify keys denoting which voxels correspond to which structure in the ipsi and contra targets.
contra_targetkey = connectivity_data.structure_datas[sid0].projection_mask.get_key(structure_ids=ontological_order_leaves, hemisphere_id=1)
ipsi_targetkey = connectivity_data.structure_datas[sid0].projection_mask.get_key(structure_ids=ontological_order_leaves, hemisphere_id=2)

#contra_targetkey = connectivity_data.structure_datas[sid0].projection_mask.get_key(structure_ids=ontological_order, hemisphere_id=1)
#ipsi_targetkey = connectivity_data.structure_datas[sid0].projection_mask.get_key(structure_ids=ontological_order, hemisphere_id=2)
#get average intensities of projection structures given ipsi and contra keys
#source_key = ontological_order #only relevant here when injection needs to be unionized, but currently a required argument
ipsi_target_regions, ipsi_target_counts = nonzero_unique(ipsi_targetkey, return_counts=True)
contra_target_regions, contra_target_counts = nonzero_unique(contra_targetkey, return_counts=True)

target_order = lambda x: np.array(ontological_order_leaves)[np.isin(ontological_order_leaves, x)]
permutation = lambda x: np.argsort(np.argsort(target_order(x)))
targ_ids = np.concatenate([ipsi_target_regions[permutation(ipsi_target_regions)],
                           contra_target_regions[permutation(contra_target_regions)]])

In [None]:
import matplotlib.patches as mpatches
import seaborn as sns

reo = [4,7,2,1,10,9,11,3,5,8,6,0]
handles = [mpatches.Patch(color =  cs_col_ipsi_dict[major_structures[i]], label = major_structures[i]) for i in reo]

In [6]:
ontological_order.shape #2

(291,)

In [12]:
ontological_order.shape #3

(291,)

In [17]:
ontological_order.shape #3

(291,)

In [19]:
len(default_structures)

316

In [21]:
len(default_structures)

12

In [23]:
len(default_structures)

51

In [68]:
COARSE_STRUCTURE_SET_ID = 2#3#2#167587189#3#2(old)
DEFAULT_STRUCTURE_SET_IDS = tuple([COARSE_STRUCTURE_SET_ID])
tree = cache.get_structure_tree()
default_structures = tree.get_structures_by_set_id(DEFAULT_STRUCTURE_SET_IDS)
print(len(default_structures)), default_structures

12


(None,
 [{'acronym': 'Isocortex',
   'graph_id': 1,
   'graph_order': 5,
   'id': 315,
   'name': 'Isocortex',
   'structure_id_path': [997, 8, 567, 688, 695, 315],
   'structure_set_ids': [2,
    112905828,
    691663206,
    12,
    184527634,
    112905813,
    687527670,
    114512891,
    114512892],
   'rgb_triplet': [112, 255, 113]},
  {'acronym': 'OLF',
   'graph_id': 1,
   'graph_order': 379,
   'id': 698,
   'name': 'Olfactory areas',
   'structure_id_path': [997, 8, 567, 688, 695, 698],
   'structure_set_ids': [2,
    3,
    112905828,
    691663206,
    12,
    184527634,
    112905813,
    687527670,
    114512891,
    114512892],
   'rgb_triplet': [154, 210, 189]},
  {'acronym': 'HPF',
   'graph_id': 1,
   'graph_order': 454,
   'id': 1089,
   'name': 'Hippocampal formation',
   'structure_id_path': [997, 8, 567, 688, 695, 1089],
   'structure_set_ids': [2,
    112905828,
    691663206,
    12,
    184527634,
    112905813,
    687527670,
    114512891,
    114512892],
  

In [70]:
help(tree.get_structures_by_set_id)

Help on method get_structures_by_set_id in module allensdk.core.structure_tree:

get_structures_by_set_id(structure_set_ids) method of allensdk.core.structure_tree.StructureTree instance
    Obtain a list of brain structures from by the sets that contain 
    them.
    
    Parameters
    ----------
    structure_set_ids : list of int
        Get structures belonging to these structure sets.
        
    Returns
    -------
    list of dict : 
        Each item describes a structure.



In [74]:
COARSE_STRUCTURE_SET_ID = 687527945#2#167587189#3#2(old)
DEFAULT_STRUCTURE_SET_IDS = tuple([COARSE_STRUCTURE_SET_ID])
tree = cache.get_structure_tree()
default_structures = tree.get_structures_by_set_id(DEFAULT_STRUCTURE_SET_IDS)
print(len(default_structures)), default_structures

293


(None,
 [{'acronym': 'FRP',
   'graph_id': 1,
   'graph_order': 6,
   'id': 184,
   'name': 'Frontal pole, cerebral cortex',
   'structure_id_path': [997, 8, 567, 688, 695, 315, 184],
   'structure_set_ids': [3,
    112905828,
    688152357,
    691663206,
    687527945,
    12,
    184527634,
    167587189,
    112905813,
    114512891],
   'rgb_triplet': [38, 143, 69]},
  {'acronym': 'MOp',
   'graph_id': 1,
   'graph_order': 18,
   'id': 985,
   'name': 'Primary motor area',
   'structure_id_path': [997, 8, 567, 688, 695, 315, 500, 985],
   'structure_set_ids': [112905828,
    688152357,
    691663206,
    687527945,
    12,
    184527634,
    167587189,
    112905813,
    114512891,
    114512892],
   'rgb_triplet': [31, 157, 90]},
  {'acronym': 'MOs',
   'graph_id': 1,
   'graph_order': 24,
   'id': 993,
   'name': 'Secondary motor area',
   'structure_id_path': [997, 8, 567, 688, 695, 315, 500, 993],
   'structure_set_ids': [112905828,
    688152357,
    691663206,
    687527945,

In [75]:
default_structures_old = default_structures.copy()

In [71]:
COARSE_STRUCTURE_SET_ID = 687527670#2#167587189#3#2(old)
DEFAULT_STRUCTURE_SET_IDS = tuple([COARSE_STRUCTURE_SET_ID])
tree = cache.get_structure_tree()
default_structures = tree.get_structures_by_set_id(DEFAULT_STRUCTURE_SET_IDS)
print(len(default_structures)), default_structures

12


(None,
 [{'acronym': 'Isocortex',
   'graph_id': 1,
   'graph_order': 5,
   'id': 315,
   'name': 'Isocortex',
   'structure_id_path': [997, 8, 567, 688, 695, 315],
   'structure_set_ids': [2,
    112905828,
    691663206,
    12,
    184527634,
    112905813,
    687527670,
    114512891,
    114512892],
   'rgb_triplet': [112, 255, 113]},
  {'acronym': 'OLF',
   'graph_id': 1,
   'graph_order': 379,
   'id': 698,
   'name': 'Olfactory areas',
   'structure_id_path': [997, 8, 567, 688, 695, 698],
   'structure_set_ids': [2,
    3,
    112905828,
    691663206,
    12,
    184527634,
    112905813,
    687527670,
    114512891,
    114512892],
   'rgb_triplet': [154, 210, 189]},
  {'acronym': 'HPF',
   'graph_id': 1,
   'graph_order': 454,
   'id': 1089,
   'name': 'Hippocampal formation',
   'structure_id_path': [997, 8, 567, 688, 695, 1089],
   'structure_set_ids': [2,
    112905828,
    691663206,
    12,
    184527634,
    112905813,
    687527670,
    114512891,
    114512892],
  

In [76]:
COARSE_STRUCTURE_SET_ID = 167587189#2#167587189#3#2(old)
DEFAULT_STRUCTURE_SET_IDS = tuple([COARSE_STRUCTURE_SET_ID])
tree = cache.get_structure_tree()
default_structures = tree.get_structures_by_set_id(DEFAULT_STRUCTURE_SET_IDS)
print(len(default_structures)), default_structures

316


(None,
 [{'acronym': 'FRP',
   'graph_id': 1,
   'graph_order': 6,
   'id': 184,
   'name': 'Frontal pole, cerebral cortex',
   'structure_id_path': [997, 8, 567, 688, 695, 315, 184],
   'structure_set_ids': [3,
    112905828,
    688152357,
    691663206,
    687527945,
    12,
    184527634,
    167587189,
    112905813,
    114512891],
   'rgb_triplet': [38, 143, 69]},
  {'acronym': 'MOp',
   'graph_id': 1,
   'graph_order': 18,
   'id': 985,
   'name': 'Primary motor area',
   'structure_id_path': [997, 8, 567, 688, 695, 315, 500, 985],
   'structure_set_ids': [112905828,
    688152357,
    691663206,
    687527945,
    12,
    184527634,
    167587189,
    112905813,
    114512891,
    114512892],
   'rgb_triplet': [31, 157, 90]},
  {'acronym': 'MOs',
   'graph_id': 1,
   'graph_order': 24,
   'id': 993,
   'name': 'Secondary motor area',
   'structure_id_path': [997, 8, 567, 688, 695, 315, 500, 993],
   'structure_set_ids': [112905828,
    688152357,
    691663206,
    687527945,

In [77]:
default_structures_new = default_structures.copy()


In [82]:
ds_id_new = [default_structures_new[i]['id'] for i in range(len(default_structures_new))]

ds_id_old = [default_structures_old[i]['id'] for i in range(len(default_structures_old))]

In [84]:
len(np.intersect1d(ds_id_new, ds_id_old))

292

In [87]:
new_id = np.setdiff1d(ds_id_new,ds_id_old)
old_id = np.setdiff1d(ds_id_old,ds_id_new)

In [88]:
old_id

array([934])

In [89]:
ia_map[934]

'ENTmv'

In [91]:
[ia_map[i] for i in new_id],new_id

(['MT',
  'DT',
  'MDRN',
  'ME',
  'ProS',
  'APr',
  'MA3',
  'P5',
  'Acs5',
  'PC5',
  'I5',
  'Xi',
  'PIL',
  'PoT',
  'IntG',
  'VMPO',
  'PeF',
  'HATA',
  'Pa5',
  'VeCB',
  'SCO',
  'PDTg',
  'Pa4',
  'PN'],
 array([       58,        75,       395,     10671, 484682470, 484682508,
        549009211, 549009215, 549009219, 549009223, 549009227, 560581559,
        560581563, 563807435, 563807439, 576073699, 576073704, 589508447,
        589508451, 589508455, 599626923, 599626927, 606826663, 607344830]))

In [69]:
COARSE_STRUCTURE_SET_ID = 3#2#167587189#3#2(old)
DEFAULT_STRUCTURE_SET_IDS = tuple([COARSE_STRUCTURE_SET_ID])
tree = cache.get_structure_tree()
default_structures = tree.get_structures_by_set_id(DEFAULT_STRUCTURE_SET_IDS)
print(len(default_structures)), default_structures

51


(None,
 [{'acronym': 'FRP',
   'graph_id': 1,
   'graph_order': 6,
   'id': 184,
   'name': 'Frontal pole, cerebral cortex',
   'structure_id_path': [997, 8, 567, 688, 695, 315, 184],
   'structure_set_ids': [3,
    112905828,
    688152357,
    691663206,
    687527945,
    12,
    184527634,
    167587189,
    112905813,
    114512891],
   'rgb_triplet': [38, 143, 69]},
  {'acronym': 'MO',
   'graph_id': 1,
   'graph_order': 12,
   'id': 500,
   'name': 'Somatomotor areas',
   'structure_id_path': [997, 8, 567, 688, 695, 315, 500],
   'structure_set_ids': [3,
    112905828,
    691663206,
    12,
    184527634,
    112905813,
    114512891,
    114512892],
   'rgb_triplet': [31, 157, 90]},
  {'acronym': 'SS',
   'graph_id': 1,
   'graph_order': 30,
   'id': 453,
   'name': 'Somatosensory areas',
   'structure_id_path': [997, 8, 567, 688, 695, 315, 453],
   'structure_set_ids': [3,
    112905828,
    691663206,
    10,
    12,
    184527634,
    112905813,
    114512891,
    114512892

In [104]:
data_info['primary-injection-structure']

(2875,)

In [95]:
np.isin(ds_id_old , info['primary_injection_structure'])[0]

True

In [101]:
len(np.where(np.isin(info['primary_injection_structure'],ds_id_new ))[0])

1751

In [100]:
len(np.where(np.isin(info['primary_injection_structure'],ds_id_old ))[0])

1745

In [106]:
#.shape
len(np.where(np.isin(info['primary_injection_structure'],ontological_order ))[0])

1745

In [None]:
#experiments_minor_structures_sid = experiments_minor_structures[sid]
def get_summary_structure(structure_data):
    
    structure_data
    #nexp = len(experiments_minor_structures_sid)
    minors = [ai_map[ms] for ms in experiments_minor_structures_sid]
    leaves = [st.child_ids([minors[i]]) for i in range(len(minors))]
    minor_structures = np.zeros(nexp)
    for i in range(nexp):
        print(i)
        exp = msvd.experiments[list(msvd.experiments.keys())[i]]
        if len(leaves[i][0]) > 0:
            #print(i)
            nebdist = np.zeros(len(leaves[i][0]))
            for j in range(len(leaves[i][0])):
                #print(j)
                #print(leaves[i][0][j])
                m = Mask.from_cache(cache,structure_ids=[leaves[i][0][j]],hemisphere_id=3)
                try:
                    nebdist[j] = np.linalg.norm(exp.centroid - m.coordinates, axis = 1).min()
                except Exception:
                    pass
                #print(m.coordinates)
                #print(nebdist)
                #nebdist[j] = np.linalg.norm(exp.centroid - m.coordinates, axis = 1).min()
            nebdist[np.where(nebdist == 0.)[0]] = 1000
            nearestneb = nebdist.argmin()
            if nebdist.min() != 1000:
                minor_structures[i] = leaves[i][0][nearestneb]
            else: 
                minor_structures[i] = minors[i]
        else:
            minor_structures[i] = minors[i]
    return(np.asarray(minor_structures, dtype = int))

In [None]:
experiments_minor_structures_sid = experiments_minor_structures[sid]
def get_leaf_structure(msvd, experiments_minor_structures_sid):
    
    nexp = len(experiments_minor_structures_sid)
    minors = [ai_map[ms] for ms in experiments_minor_structures_sid]
    leaves = [st.child_ids([minors[i]]) for i in range(len(minors))]
    minor_structures = np.zeros(nexp)
    for i in range(nexp):
        print(i)
        exp = msvd.experiments[list(msvd.experiments.keys())[i]]
        if len(leaves[i][0]) > 0:
            #print(i)
            nebdist = np.zeros(len(leaves[i][0]))
            for j in range(len(leaves[i][0])):
                #print(j)
                #print(leaves[i][0][j])
                m = Mask.from_cache(cache,structure_ids=[leaves[i][0][j]],hemisphere_id=3)
                try:
                    nebdist[j] = np.linalg.norm(exp.centroid - m.coordinates, axis = 1).min()
                except Exception:
                    pass
                #print(m.coordinates)
                #print(nebdist)
                #nebdist[j] = np.linalg.norm(exp.centroid - m.coordinates, axis = 1).min()
            nebdist[np.where(nebdist == 0.)[0]] = 1000
            nearestneb = nebdist.argmin()
            if nebdist.min() != 1000:
                minor_structures[i] = leaves[i][0][nearestneb]
            else: 
                minor_structures[i] = minors[i]
        else:
            minor_structures[i] = minors[i]
    return(np.asarray(minor_structures, dtype = int))

In [29]:
#connectivity_data.structure_datas[315].eids

In [64]:
cache.get_experiment_structure_unionizes(159832064).shape

(2548, 19)

In [39]:
cache.get_experiment_structure_unionizes(159832064).iloc[:5]

Unnamed: 0,hemisphere_id,id,is_injection,max_voxel_density,max_voxel_x,max_voxel_y,max_voxel_z,normalized_projection_volume,projection_density,projection_energy,projection_intensity,projection_volume,experiment_id,structure_id,sum_pixel_intensity,sum_pixels,sum_projection_pixel_intensity,sum_projection_pixels,volume
0,3,628243119,False,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0,159832064,187,123017200000.0,490786200.0,0.0,0.0,0.601213
1,1,628239030,False,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0,159832064,903,11777890000.0,92125800.0,0.0,0.0,0.112854
2,3,628243224,False,0.697898,5710,6220,7170,0.012065,0.003072,3.014907,981.354163,0.001897,159832064,226,98192680000.0,503978400.0,1519448000.0,1548318.0,0.617374
3,3,628244185,False,0.625754,6460,5410,8120,0.004168,0.000451,0.530254,1175.085737,0.000655,159832064,536,274510200000.0,1185300000.0,628510500.0,534863.6,1.451992
4,2,628236463,False,0.984967,6850,4530,8110,0.983053,0.075864,137.78743,1816.247925,0.154537,159832064,784,383421600000.0,1662886000.0,229124800000.0,126152800.0,2.037035


In [44]:
cache.get_experiments(dataframe= [628243119]).shape

(1751, 15)

In [92]:
info = cache.get_experiments(dataframe= [628236463])

In [57]:
cache = VoxelModelCache(manifest_file=manifest_file,ccf_version  = 167587189)
a= cache.get_experiments(520336173)['primary_injection_structure']

In [58]:
cache = VoxelModelCache(manifest_file=manifest_file,ccf_version  = 687527945)
b= cache.get_experiments(520336173)['primary_injection_structure']

In [62]:
(a - b).min()

0

In [63]:
cache.get_experiments(520336173)['primary_injection_structure']

id
301875966     574
520336173       1
177904363     136
602828622     993
178489574     475
             ... 
126860974     731
114400640    1039
112791318     337
156545918     385
308027576     879
Name: primary_injection_structure, Length: 1751, dtype: int64

In [36]:
help(cache.get_experiment_structure_unionizes)

Help on method get_experiment_structure_unionizes in module allensdk.core.mouse_connectivity_cache:

get_experiment_structure_unionizes(experiment_id, file_name=None, is_injection=None, structure_ids=None, include_descendants=False, hemisphere_ids=None) method of mcmodels.core.voxel_model_cache.VoxelModelCache instance
    Retrieve the structure unionize data for a specific experiment.  Filter by
    structure, injection status, and hemisphere.
    
    Parameters
    ----------
    
    experiment_id: int
        ID of the experiment of interest.  Corresponds to section_data_set_id in the API.
    
    file_name: string
        File name to save/read the experiments list.  If file_name is None,
        the file_name will be pulled out of the manifest.  If caching
        is disabled, no file will be saved. Default is None.
    
    is_injection: boolean
        If True, only return unionize records that disregard non-injection pixels.
        If False, only return unionize records tha