In [1]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
import pandas as pd 
import sys
import pickle
import itertools
import seaborn as sns
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import random
from sklearn.metrics import pairwise_distances
from sklearn.kernel_ridge import KernelRidge
import math

workingdirectory = os.popen('git rev-parse --show-toplevel').read()[:-1]
sys.path.append(workingdirectory)
os.chdir(workingdirectory)

import allensdk.core.json_utilities as ju
from allensdk.core.mouse_connectivity_cache import MouseConnectivityCache

from mcmodels.core import VoxelModelCache
from mcmodels.core.utils import get_structure_id,get_ordered_summary_structures
from mcmodels.core.connectivity_data import get_connectivity_data
#from mcmodels.models.crossvalidation import get_nwloocv_predictions_multimodel_merge_dists
from mcmodels.utils import nonzero_unique #, unionize
from mcmodels.models.crossvalidation import get_best_hyperparameters,get_loss_best_hyp,get_loss#get_loocv_predictions,get_loss#get_best_hyperparameters,get_loss_best_hyp,get_loocv_predictions,get_loss
from mcmodels.core.utils import get_leaves_ontologicalorder, get_indices, get_indices2,get_eval_indices,screen_index_matrices,screen_index_matrices2,screen_index_matrices3#get_cre_status,get_minorstructure_dictionary,get_leaves_ontologicalorder
from mcmodels.core.utils import get_indices_2ormore
from mcmodels.connectivity.creation import get_connectivity_matrices3
from mcmodels.utils import get_aligned_ids

from mcmodels.regressors import NadarayaWatson
from mcmodels.core.plotting import plot_loss_surface,plot_loss_scatter
from mcmodels.core import Mask



In [2]:
#read data
TOP_DIR = '/Users/samsonkoelle/alleninstitute/mcm_2020/mcm_updated/'
INPUT_JSON = os.path.join(TOP_DIR, 'input_011520.json')
EXPERIMENTS_EXCLUDE_JSON = os.path.join(TOP_DIR, 'experiments_exclude.json')
FILE_DIR = '/Users/samsonkoelle/alleninstitute/mcm_2020/mcm_updated/'
OUTPUT_DIR = os.path.join(FILE_DIR, 'output')

input_data = ju.read(INPUT_JSON)
manifest_file = input_data.get('manifest_file')
manifest_file = os.path.join(TOP_DIR, manifest_file)
experiments_exclude = ju.read(EXPERIMENTS_EXCLUDE_JSON)

#its unclear why the hyperparameters are loaded from the output directory
cache = VoxelModelCache(manifest_file=manifest_file)
major_structures = input_data.get('structures')
major_structure_ids = [get_structure_id(cache, s) for s in major_structures]
data_info = pd.read_excel('/Users/samsonkoelle/alleninstitute/Whole Brain Cre Image Series_curation only.xlsx', 'all datasets curated_070919pull')
data_info.set_index("id", inplace=True)

#switch from v2 structures
#ontological_order = get_ordered_summary_structures(cache)

#to v3
ontological_order = get_ordered_summary_structures(cache,167587189)
#due to redundancy in ccfv3, remove 'MDRNv', 'MDRNd' from summary structures (include as leafs of MDRN)
#retain ordering....
ontological_order = ontological_order[~np.in1d(ontological_order,[1098, 1107])]#np.setdiff1d(ontological_order, [1098, 1107])

#ontological_order[~np.in1d(ontological_order,[1098, 1107])]


mcc = MouseConnectivityCache(manifest_file = '../connectivity/mouse_connectivity_manifest.json')
st = mcc.get_structure_tree()
ai_map = st.get_id_acronym_map()
ia_map = {value: key for key, value in ai_map.items()}

#regionalize voxel model: compare with regional model
#regional parameters
cre = None
eid_set=None
high_res=False
threshold_injection = False

COARSE_STRUCTURE_SET_ID = 2
DEFAULT_STRUCTURE_SET_IDS = tuple([COARSE_STRUCTURE_SET_ID])
tree = cache.get_structure_tree()
default_structures = tree.get_structures_by_set_id(DEFAULT_STRUCTURE_SET_IDS)
default_structure_ids = [st['id'] for st in default_structures if st['id'] != 934]
#cre= True

annotation/ccf_2017


In [6]:
#ontological_order = get_ordered_summary_structures(cache,167587189)
#ontological_order
#ontological_order[~np.in1d(ontological_order,[1098, 1107])]

In [7]:
connectivity_data = get_connectivity_data(cache, major_structure_ids, experiments_exclude, remove_injection = False)

512
703
1089
1097
315
313
354
698
771
803
477
549


In [8]:
connectivity_data.get_injection_hemisphere_ids()
connectivity_data.align()
connectivity_data.get_centroids()
connectivity_data.get_data_matrices(default_structure_ids)
connectivity_data.get_crelines(data_info)

In [9]:
#major division segregation is legacy code but convenient for fast cross validation in major division model
#experiments_minor_structures = get_summarystructure_dictionary(connectivity_data, data_info)
#get leaves in ontological order.  Where leafs don't exist, uses summary structure
ontological_order_leaves = get_leaves_ontologicalorder(connectivity_data, ontological_order)
#Key isn't affected by which experiment we choose. This allows default masking to be inherited from the AllenSDK.
sid0 = list(connectivity_data.structure_datas.keys())[0]
eid0 = list(connectivity_data.structure_datas[sid0].experiment_datas.keys())[0]
#Identify keys denoting which voxels correspond to which structure in the ipsi and contra targets.
#contra_targetkey = msvd.experiments[list(msvd.experiments.keys())[0]].projection_mask.get_key(structure_ids=ontological_order_leaves, hemisphere_id=1)
#ipsi_targetkey = msvd.experiments[list(msvd.experiments.keys())[0]].projection_mask.get_key(structure_ids=ontological_order_leaves, hemisphere_id=2)
contra_targetkey = connectivity_data.structure_datas[sid0].projection_mask.get_key(structure_ids=ontological_order_leaves, hemisphere_id=1)
ipsi_targetkey = connectivity_data.structure_datas[sid0].projection_mask.get_key(structure_ids=ontological_order_leaves, hemisphere_id=2)
#get average intensities of projection structures given ipsi and contra keys
#source_key = ontological_order #only relevant here when injection needs to be unionized, but currently a required argument
ipsi_target_regions, ipsi_target_counts = nonzero_unique(ipsi_targetkey, return_counts=True)
contra_target_regions, contra_target_counts = nonzero_unique(contra_targetkey, return_counts=True)

target_order = lambda x: np.array(ontological_order_leaves)[np.isin(ontological_order_leaves, x)]
permutation = lambda x: np.argsort(np.argsort(target_order(x)))
targ_ids = np.concatenate([ipsi_target_regions[permutation(ipsi_target_regions)],
                           contra_target_regions[permutation(contra_target_regions)]])

IndexError: index 565 is out of bounds for axis 0 with size 564

In [None]:
connectivity_data.ai_map = ai_map
connectivity_data.get_summarystructures(data_info)



In [None]:
with open('data/info/leafs.pickle', 'rb') as handle:
    leafs = pickle.load(handle)
    
connectivity_data.leafs = leafs
summary_structures = {}
for sid in major_structure_ids:
    summary_structures[sid] = connectivity_data.structure_datas[sid].summary_structures
connectivity_data.summary_structures = summary_structures#get_indices_2ormore(connectivity_data.leafs)

connectivity_data.get_regionalized_normalized_data(ontological_order_leaves, ipsi_targetkey, contra_targetkey)
connectivity_data.get_creleaf_combos()
connectivity_data.leaf2_index_matrices = get_indices_2ormore(connectivity_data.leafs)
connectivity_data.creleaf2_index_matrices = get_indices_2ormore(connectivity_data.creleaf_combos)

connectivity_data.get_regionalized_normalized_data(ontological_order_leaves, ipsi_targetkey, contra_targetkey)



In [None]:

connectivity_data.sum2_index_matrices = get_indices_2ormore(connectivity_data.summary_structures)


In [None]:




connectivity_data.get_cresum_combos()
connectivity_data.cresum2_index_matrices = get_indices_2ormore(connectivity_data.cresum_combos)
connectivity_data.cresum2_evalindices = get_eval_indices(connectivity_data.cresum2_index_matrices)

summary_structures = {}

for sid in major_structure_ids:
    summary_structures[sid] = connectivity_data.structure_datas[sid].summary_structures
    
connectivity_data.summary_structures = summary_structures#get_indices_2ormore(connectivity_data.leafs)


In [None]:
import numpy as np
import dill as pickle

#gamma = np.load('/Users/samsonkoelle/alleninstitute/sambranch/mouse_connectivity_models/paper/trainedmodels/ELsummary_surface_gamma_0218.npy')
with open('/Users/samsonkoelle/alleninstitute/sambranch/mouse_connectivity_models/paper/trainedmodels/ELsummary_surface_0401_leafleaf.pickle', 'rb') as handle:
    surfaces = pickle.load(handle)

In [None]:
crelist = ['C57BL/6J']#, 'Cux2-IRES-Cre','Ntsr1-Cre_GN220','Rbp4-Cre_KL100','Tlx3-Cre_PL56']
eval_cres = ['C57BL/6J']#, 'Cux2-IRES-Cre','Ntsr1-Cre_GN220','Rbp4-Cre_KL100','Tlx3-Cre_PL56']

eval_cres = ['C57BL/6J', 'Cux2-IRES-Cre','Ntsr1-Cre_GN220','Rbp4-Cre_KL100','Tlx3-Cre_PL56']
ontological_order_leaves_majors = get_aligned_ids(st,ontological_order_leaves,major_structure_ids)
ontological_order_leaves_summary = get_aligned_ids(st,ontological_order_leaves,ontological_order)


# model_ordering_leaf = np.asarray([ontological_order_leaves_majors,ontological_order_leaves]).transpose()
# model_ordering_summary = np.asarray([ontological_order_leaves_majors,ontological_order_leaves_summary]).transpose()
model_ordering_leafs = np.asarray([ontological_order_leaves_majors,ontological_order_leaves]).transpose()
model_ordering_summaries = np.asarray([ontological_order_leaves_majors,ontological_order_leaves_summary]).transpose()


In [None]:
with open('data/info/leafs.pickle', 'rb') as handle:
    leafs = pickle.load(handle)
   

In [6]:
connectivity_data = connectivity_data
surfaces = surfaces
cres = crelist
experiment_sids_surfaces = summary_structures
experiment_sids_nws = leafs#[315][210:215]
model_ordering = ontological_order_leaves_majors#[:5]#[210:215]
source_ordering_surface = ontological_order_leaves_summary#[:5]#[210:215]
source_ordering_nw = ontological_order_leaves#[:5]#[210:215]
source_ordering = ontological_order_leaves#[:5]#[210:215]
target_ordering = ontological_order_leaves
#eval_cres = crelist
#crelist = ['Cux2-IRES-Cre','Ntsr1-Cre_GN220','Rbp4-Cre_KL100','Tlx3-Cre_PL56']#['C57BL/6J']#, 'Cux2-IRES-Cre','Ntsr1-Cre_GN220','Rbp4-Cre_KL100','Tlx3-Cre_PL56']
crelist= ['C57BL/6J']
#eval_cres = ['C57BL/6J']#, 'Cux2-IRES-Cre','Ntsr1-Cre_GN220','Rbp4-Cre_KL100','Tlx3-Cre_PL56']
eval_cres = crelist

NameError: name 'surfaces' is not defined

In [18]:
# connectivity_data.get_crelines(data_info)
# for sid in major_structure_ids:
#     connectivity_data.structure_datas[sid].crelines = connectivity_data.creline[sid]

# connectivity_data.ipsi_target_regions = ipsi_target_regions
# connectivity_data.contra_target_regions = contra_target_regions

In [19]:
conn_v3 = get_connectivity_matrices3(connectivity_data, surfaces, experiment_sids_surfaces,experiment_sids_nws, model_ordering, source_ordering_surface, source_ordering_nw, source_ordering, target_ordering, eval_cres)

0 1.0
1 811.0
2 820.0
3 828.0
4 7.0
5 12.0
6 15.0
7 19.0
8 23.0
9 27.0
10 30.0
11 35.0
12 38.0
13 935.0
14 211.0
15 1015.0
16 919.0
17 927.0
18 707.0
19 556.0
20 827.0
21 1054.0
22 1081.0
23 588.0
24 296.0
25 772.0
26 810.0
27 819.0
28 56.0
29 58.0
30 59.0
31 63.0
32 64.0
33 66.0
34 72.0
35 75.0
36 83.0
37 88.0
38 91.0
39 96.0
40 607344834.0
41 607344838.0
42 607344842.0
43 607344846.0
44 607344850.0
45 607344854.0
46 607344858.0
47 607344862.0
48 101.0
49 996.0
50 328.0
51 1101.0
52 783.0
53 831.0
54 106.0
55 120.0
56 163.0
57 344.0
58 314.0
59 355.0
60 115.0
61 118.0
62 704.0
63 694.0
64 800.0
65 675.0
66 699.0
67 126.0
68 1096.0
69 1104.0
70 128.0
71 131.0
72 133.0
73 939.0
74 143.0
75 136.0
76 146.0
77 147.0
78 149.0
79 188.0
80 196.0
81 204.0
82 155.0
83 159.0
84 162.0
85 169.0
86 496345664.0
87 496345668.0
88 496345672.0
89 173.0
90 177.0
91 178.0
92 181.0
93 68.0
94 667.0
95 526157192.0
96 526157196.0
97 526322264.0
98 186.0
99 189.0
100 194.0
101 197.0
102 202.0
103 203.0
104 2

In [22]:
connectivity_matrices = pd.DataFrame(conn_v3[0], columns = cnam_multi, index=rnames)

In [25]:
#connectivity_matrices = pd.DataFrame(conn_v3[0], columns = cnam_multi, index=rnames)
connectivity_matrices.to_csv('/Users/samsonkoelle/alleninstitute/sambranch/mouse_connectivity_models/analyses/paper/connectivities/elsummarymodel_leafsource_leaftarget_wt_v3_00401_l1norm.csv')

In [9]:
#load here for reordering


  interactivity=interactivity, compiler=compiler, result=result)


In [11]:
loaded_csv = pd.read_csv('/Users/samsonkoelle/alleninstitute/sambranch/mouse_connectivity_models/analyses/paper/connectivities/elsummarymodel_leafsource_leaftarget_wt_v3_00401_l1norm.csv',  header=[0,1])

def fix_pdcsv(csv):
    
    #wt_conn = pd.read_csv('/Users/samsonkoelle/wt_leafsmooth_leafsurface_sumtarget.csv',  header=[0,1])
    csv_rownames = np.asarray(csv.iloc[:,0])
    csv = csv.iloc[:,1:]
    csv.index = csv_rownames
    return(csv)

loaded_fixed_csv = fix_pdcsv(loaded_csv)

In [25]:
#major division segregation is legacy code but convenient for fast cross validation in major division model
#experiments_minor_structures = get_summarystructure_dictionary(connectivity_data, data_info)
#get leaves in ontological order.  Where leafs don't exist, uses summary structure
ontological_order_leaves = get_leaves_ontologicalorder(connectivity_data, ontological_order)
#Key isn't affected by which experiment we choose. This allows default masking to be inherited from the AllenSDK.
sid0 = list(connectivity_data.structure_datas.keys())[0]
eid0 = list(connectivity_data.structure_datas[sid0].experiment_datas.keys())[0]
#Identify keys denoting which voxels correspond to which structure in the ipsi and contra targets.
#contra_targetkey = msvd.experiments[list(msvd.experiments.keys())[0]].projection_mask.get_key(structure_ids=ontological_order_leaves, hemisphere_id=1)
#ipsi_targetkey = msvd.experiments[list(msvd.experiments.keys())[0]].projection_mask.get_key(structure_ids=ontological_order_leaves, hemisphere_id=2)
contra_targetkey = connectivity_data.structure_datas[sid0].projection_mask.get_key(structure_ids=ontological_order_leaves, hemisphere_id=1)
ipsi_targetkey = connectivity_data.structure_datas[sid0].projection_mask.get_key(structure_ids=ontological_order_leaves, hemisphere_id=2)
#get average intensities of projection structures given ipsi and contra keys
#source_key = ontological_order #only relevant here when injection needs to be unionized, but currently a required argument
ipsi_target_regions, ipsi_target_counts = nonzero_unique(ipsi_targetkey, return_counts=True)
contra_target_regions, contra_target_counts = nonzero_unique(contra_targetkey, return_counts=True)

target_order = lambda x: np.array(ontological_order_leaves)[np.isin(ontological_order_leaves, x)]
permutation = lambda x: np.argsort(np.argsort(target_order(x)))
targ_ids = np.concatenate([ipsi_target_regions[permutation(ipsi_target_regions)],
                           contra_target_regions[permutation(contra_target_regions)]])

In [26]:
# connectivity_data.get_injection_hemisphere_ids()
# connectivity_data.align()
# connectivity_data.get_centroids()
# connectivity_data.get_data_matrices(default_structure_ids)
# connectivity_data.get_crelines(data_info)

In [27]:
connectivity_data.get_crelines(data_info)
for sid in major_structure_ids:
    connectivity_data.structure_datas[sid].crelines = connectivity_data.creline[sid]

connectivity_data.ipsi_target_regions = ipsi_target_regions
connectivity_data.contra_target_regions = contra_target_regions

In [28]:
contra_target_regions.shape

(559,)

In [32]:
ia_map[ontological_order_leaves[0]]

'TMv'

In [33]:
ontological_order_names = np.asarray([ia_map[ontological_order[i]] for i in range(len(ontological_order))])
ontological_order_leaves_names = np.asarray([ia_map[ontological_order_leaves[i]] for i in range(len(ontological_order_leaves))])


In [29]:
target_ordering= ontological_order_leaves

rnames = np.asarray([ia_map[ontological_order_leaves[i]] for i in range(len(ontological_order_leaves))])
ipsi_target_regions = connectivity_data.ipsi_target_regions
contra_target_regions = connectivity_data.contra_target_regions                               
ipsi_indices= np.asarray([])
contra_indices = np.asarray([])
for iy in target_ordering: 
    ipsi_indices = np.concatenate([ipsi_indices, np.where(ipsi_target_regions==iy)[0]] )
    contra_indices = np.concatenate([contra_indices, np.where(contra_target_regions==iy)[0]] )
ipsi_indices = np.asarray(ipsi_indices, dtype = int)   
contra_indices = np.asarray(contra_indices, dtype = int)    
reorder = np.concatenate([ipsi_indices, len(ipsi_indices) + contra_indices])  
ntarget = len(reorder)

colids = np.concatenate([ipsi_target_regions, contra_target_regions])[reorder]
cnames = np.asarray([ia_map[colids[i]] for i in range(len(colids))])


ccomb = np.vstack([np.concatenate([np.repeat('ipsi',connectivity_data.ipsi_target_regions.shape[0]),
                                   np.repeat('contra',connectivity_data.contra_target_regions.shape)]), cnames])
ccomb = np.asarray(ccomb)
tuples2 = list(zip(*ccomb))
cnam_multi = pd.MultiIndex.from_tuples(tuples2, names=['first', 'second'])

In [30]:
cnam_multi

MultiIndex([(  'ipsi',  'TMv'),
            (  'ipsi',  'ICc'),
            (  'ipsi',  'ICd'),
            (  'ipsi',  'ICe'),
            (  'ipsi',  'PSV'),
            (  'ipsi',   'IF'),
            (  'ipsi',   'PT'),
            (  'ipsi',   'IG'),
            (  'ipsi',  'AAA'),
            (  'ipsi',  'IGL'),
            ...
            ('contra', 'IntG'),
            ('contra', 'VMPO'),
            ('contra',  'PeF'),
            ('contra', 'HATA'),
            ('contra',  'Pa5'),
            ('contra', 'VeCB'),
            ('contra',  'SCO'),
            ('contra', 'PDTg'),
            ('contra',  'Pa4'),
            ('contra',   'PN')],
           names=['first', 'second'], length=1123)

In [23]:
connectivity_data.contra_target_regions.shape

(308,)