In [1]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
import pandas as pd 
import sys
import pickle
import itertools
import seaborn as sns
import matplotlib.pyplot as plt

workingdirectory = os.popen('git rev-parse --show-toplevel').read()[:-1]
sys.path.append(workingdirectory)
os.chdir(workingdirectory)

import allensdk.core.json_utilities as ju
from allensdk.core.mouse_connectivity_cache import MouseConnectivityCache

from mcmodels.core import Mask,ModelData,VoxelModelCache
from mcmodels.core.utils import get_structure_id, get_ordered_summary_structures,get_minorstructures,get_loss_paper
from mcmodels.utils import nonzero_unique, unionize
from mcmodels.core.experiment import get_voxeldata_msvd
from mcmodels.models.crossvalidation import get_best_hyperparameters,get_loss_best_hyp,get_loss, get_loocv_predictions_code
from mcmodels.core.utils import get_cre_status,get_minorstructure_dictionary,get_leaves_ontologicalorder
from mcmodels.core.utils import get_regionalized_normalized_data
from mcmodels.core.utils import get_connectivity
from mcmodels.core.utils import get_ontological_order_leaf
from mcmodels.core.utils import get_nw_loocv,get_wt_inds
from mcmodels.core.utils import get_countvec, get_twoormore



In [2]:
#read data
TOP_DIR = '/Users/samsonkoelle/alleninstitute/mcm_2020/mcm_updated/'
INPUT_JSON = os.path.join(TOP_DIR, 'input_011520.json')
EXPERIMENTS_EXCLUDE_JSON = os.path.join(TOP_DIR, 'experiments_exclude.json')
FILE_DIR = '/Users/samsonkoelle/alleninstitute/mcm_2020/mcm_updated/'
OUTPUT_DIR = os.path.join(FILE_DIR, 'output')

input_data = ju.read(INPUT_JSON)
manifest_file = input_data.get('manifest_file')
manifest_file = os.path.join(TOP_DIR, manifest_file)
experiments_exclude = ju.read(EXPERIMENTS_EXCLUDE_JSON)

#its unclear why the hyperparameters are loaded from the output directory
cache = VoxelModelCache(manifest_file=manifest_file)
major_structures = input_data.get('structures')
major_structure_ids = [get_structure_id(cache, s) for s in major_structures]
data_info = pd.read_excel('/Users/samsonkoelle/alleninstitute/Whole Brain Cre Image Series_curation only.xlsx', 'all datasets curated_070919pull')
data_info.set_index("id", inplace=True)
ontological_order = get_ordered_summary_structures(cache)

mcc = MouseConnectivityCache(manifest_file = '../connectivity/mouse_connectivity_manifest.json')
st = mcc.get_structure_tree()
ai_map = st.get_id_acronym_map()
ia_map = {value: key for key, value in ai_map.items()}

#regionalize voxel model: compare with regional model
#regional parameters
cre = None
eid_set=None
high_res=False
threshold_injection = False

COARSE_STRUCTURE_SET_ID = 2
DEFAULT_STRUCTURE_SET_IDS = tuple([COARSE_STRUCTURE_SET_ID])
tree = cache.get_structure_tree()
default_structures = tree.get_structures_by_set_id(DEFAULT_STRUCTURE_SET_IDS)
default_structure_ids = [st['id'] for st in default_structures if st['id'] != 934]
#cre= True

In [3]:
#load data
msvds = {}
for sid in major_structure_ids:
    print(sid)
    voxel_data = ModelData(cache, sid)
    experiment_ids = voxel_data.get_experiment_ids(experiments_exclude=experiments_exclude, cre=cre)
    experiment_ids = np.asarray(list(experiment_ids))    
    #get injections and projections on the voxel level.
    #Note that a preprocessing screen is applied in AllenSDK to mask at projection and injection boundaries
    #Voxels intensity not in these regions in the corresponding projection vector will be 0.
    msvd = get_voxeldata_msvd(cache, sid,experiments_exclude,default_structure_ids,cre)
    msvds[sid]  = msvd

512
703
1089
1097
315
313
354
698
771
803
477
549


In [4]:
#get dictionaries of creline and leaf by experiment
creline = get_cre_status(data_info, msvds)
with open('data/info/leafs.pickle', 'rb') as handle:
    leafs = pickle.load(handle)
    
#get dictionary of minor structures for each experiment in each major division
#major division segregation is legacy code but convenient for fast cross validation in major division model
experiments_minor_structures = get_minorstructure_dictionary(msvds, data_info)

#get leaves in ontological order.  Where leafs don't exist, uses summary structure
ontological_order_leaves = get_leaves_ontologicalorder(msvd, ontological_order)

#Key isn't affected by which experiment we choose. This allows default masking to be inherited from the AllenSDK.
key = list(msvd.experiments.keys())[0]

#Identify keys denoting which voxels correspond to which structure in the ipsi and contra targets.
contra_targetkey = msvd.experiments[list(msvd.experiments.keys())[0]].projection_mask.get_key(structure_ids=ontological_order_leaves, hemisphere_id=1)
ipsi_targetkey = msvd.experiments[list(msvd.experiments.keys())[0]].projection_mask.get_key(structure_ids=ontological_order_leaves, hemisphere_id=2)
#contra_key = msvd.experiments[key].projection_mask.get_key(structure_ids=ontological_order, hemisphere_id=1)
#ipsi_key = msvd.experiments[key].projection_mask.get_key(structure_ids=ontological_order, hemisphere_id=2)

#get average intensities of projection structures given ipsi and contra keys
source_key = ontological_order #only relevant here when injection needs to be unionized, but currently a required argument
msvds = get_regionalized_normalized_data(msvds,cache, source_key,ipsi_targetkey,contra_targetkey)

#wt_2ormore = get_wt_inds(creline)

In [5]:
#Set gammas for crossvalidation
gammas = np.asarray([0.1,.5,1,2,10])

for sid in major_structure_ids:
    print(sid)
    msvds[sid].loocv_predictions_all = get_loocv_predictions_code(projections = msvds[sid].regional_projection_vcount_norm_renorm, 
                                                                 centroids = msvds[sid].centroids,
                                                                 gammas = gammas)    
    msvds[sid].loocv_predictions_leaf = get_loocv_predictions_code(projections = msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                 centroids = msvds[sid].centroids,
                                                                 gammas = gammas,
                                                                 codes = np.expand_dims(leafs[sid], axis = 1))
    msvds[sid].loocv_predictions_cre = get_loocv_predictions_code(projections = msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                 centroids = msvds[sid].centroids,
                                                                 gammas = gammas,
                                                                 codes=np.expand_dims(np.asarray(creline[sid], dtype = str), axis = 1))
    msvds[sid].loocv_predictions_creleaf = get_loocv_predictions_code(projections = msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                 centroids = msvds[sid].centroids,
                                                                 gammas = gammas,
                                                                 codes=np.asarray(np.vstack([leafs[sid], creline[sid]]), dtype = str).transpose())
    
#this function is not suitable for generating predictions based off of a class that shouldn't itself be used for the prediction
#the codes are used to segreate
#we need to have two codes.
#the first code denotes the different levels of model
#the second should be a dictionary with key of the first code saying whos model we should use

512


AttributeError: 'VoxelDataset' object has no attribute 'regional_projection_vcount_norm_renorm'

In [None]:
#every experiment can be specified by an include 1 0 in model and include in evaluation 1 0 

In [None]:

def get_loocv_predictions_code(projections, centroids, gammas, codes=None):

    ngam = len(gammas)
    if codes is None:
        # print('yehh')
        codes = np.zeros((projections.shape[0], 1))

    # print(codes)
    unique_codes = np.unique(codes, axis=1)
    predictions = np.empty(np.append(ngam, projections.shape))

    for c in range(len(unique_codes)):
        print(unique_codes[c])
        code_ind = np.where(codes == unique_codes[c])[0]
        if len(code_ind) > 1:
            predictions[:, code_ind] = np.asarray(
                [get_loocv_predictions(projections[code_ind], centroids[code_ind], gammas[g]) for g in range(ngam)])

    return (predictions)



In [19]:
#codes are the levels for models and evals
#modelcodes are to be included in the model
#evalcodes are to be included in evalcodes
def generate_model_eval_indices(codes, modelcodes = None, evalcodes = None):
    
    if modelcodes is None:
        modelcodes = np.unique(np.asarray(list(codes.values())))
    if evalcodes is None:
        evalcodes = np.unique(np.asarray(list(codes.values())))
        
    for sid in np.asarray(list(codes.keys())):
        nexp = len(codes)
        #nmodels = len(modelcodes[sid])
        model_indices = np.zeros((nexp)) #np.asarray((nmodels, nexp))
        eval_indices = np.zeros((nexp)) #np.asarray((nmodels, nexp))
        #for c in range(nmodels):
        model_indices[np.where(np.isin(codes, modelcodes))[0]] = 1
        eval_indices[np.where(np.isin(codes, evalcodes))[0]] = 1
    
    return(model_indices, eval_indices)

In [None]:
def get_indices(codelist, testcode):
    
    output = np.zeros(len(codelist))
    output[np.where(np.isin(codelist, testcode))[0]] = 1
    
    return(output)

In [None]:
# def get_indices(codelist, testcode, codelist2, testcode2):
    
#     output = np.zeros(len(codelist))
#     output[np.where(np.isin(codelist, testcode))[0]] = 1
    
#     return(output)

In [None]:
for sid in major_structure_ids

    creline_options = np.unique(creline[sid])c
    for creline_option in creline_options:
        model_indices = get_indices(creline[sid], creline_option)
        eval_indices = get_indices(creline[sid], creline[sid][i])

In [None]:
#can

In [None]:
model_indices is matrix of dimension nmodels x nexperiments
eval_indices is matrix of dimension nmodels x nexperiments
each leaf gets its own model

In [None]:
#get the indices of different leafs
eval_indices = get_indices(leafs[sid])
#get the indices of the wts in that leaf
model_indices = get_indices2(creline[sid], np.asarray(['C57BL/6J']),leafs[sid])

In [41]:
creleafs = {}
creleafs_merged = {}
for sid in major_structure_ids:
    creleafs[sid] = np.asarray(np.vstack([leafs[sid], creline[sid]]), dtype = str).transpose()
    creleafs_merged[sid] = [creleafs[sid][:,0][i]  + creleafs[sid][:,1][i] for i in range(creleafs[sid].shape[0])]

In [287]:
indices_leaf = {}
indices_wtinleaf = {}
indices_wtleaf = {}
indices_summary = {}
indices_summaryinleaf = {}
indices_major = {}
indices_majorinleaf = {}
indices_leaf2ormore = {}
indices_wtinleaf2ormore = {}
for sid in major_structure_ids:
    
    #wt_leaf on leaf

    #get the indices of experiments sharing leafs (nmodels is number of leafs)
    indices_leaf[sid] = get_indices(leafs[sid]) #eval_indices
    indices_creleaf = get_indices(leafs[sid])
    
    #get the indices of the wts in the leaf (nmodels is number of leafs)
    indices_wtinleaf[sid] = get_indices2(creline[sid], np.asarray(['C57BL/6J']),leafs[sid]) #model_indices
    
    #get indices of experiments sharing summary structure x cre combination (nmodel is number of cre x leaf combinations)
    #indices_wtleaf[sid] = get_indices(creleafs_merged[sid])
    
    #get indices of experiments sharing summary structure(nmodel is number of summary structures)
    indices_summary[sid] = get_indices(experiments_minor_structures[sid])
    
    #get indices of experiments sharing major structure(nmodel is number of summary structures)
    indices_major[sid] = np.ones((experiments_minor_structures[sid].shape[0],1))
    
    #get indices of experiments sharing same major structure as a leaf (nmodel is number of leafs)
    indices_majorinleaf[sid] = get_indices2(np.ones(len(leafs[sid])), np.asarray([1]),leafs[sid]) #model_indices
    
    #get indices of experiments in same summary structure as a leaf (nmodel is number of leafs)
    indices_summaryinleaf[sid] = get_indices_summaryinleaf(experiments_minor_structures[sid], leafs[sid])
    
    #evaluate models on leafs
    #model_indices, eval_indices = indices_majorinleaf, indices_leaf
    #model_indices, eval_indices = indices_summaryinleaf, indices_leaf
    #this is the most restrictive of these 3, so eval_indices_leaf2ormore is the smallest eval set
    indices_leaf2ormore[sid] = screen_index_matrices(indices_leaf[sid], indices_leaf[sid])
    
    indices_wtinleaf2ormore[sid] = screen_index_matrices(indices_wtinleaf[sid], indices_wtinleaf[sid])
    #need to find explicitly so can be used in other experiments
    #how do we line up with leaf model...
    #reduced modelset.  also
    #indices_wtinleaf_reduced, indices_leaf_reduced = screen_index_matrices2(indices_wtinleaf[sid], indices_leaf[sid])
    
    #if i want to only use indices_leaf_reduced as target in a different experiment, do i need to eliminate  
    #
    
    
    #leaf on leaf
    #eval_indices = indices_leaf
    #model_indices = indices_leaf

    #leaf on wt_leaf
    #model_indices = indices_leaf
    #eval_indices = indices_wtinleaf

    #summary on summary
    #model_indices = get_indices(experiments_minor_structures[sid])
    #eval_indices = get_indices(experiments_minor_structures[sid])

    #creleaf precise (nmodels is number of creleaf combinations)
    
    #indices_wtleaf = get_indices(creleafs[sid])

In [288]:
for sid in major_structure_ids:
    print(sid)
    msvds[sid].loocv_predictions_major_leaf2 = get_nwloocv_predictions_multimodel_merge(msvds[sid].reg_proj_vcount_norm_renorm,
                                                                                        msvds[sid].centroids,
                                                                                        gammas, 
                                                                                        indices_majorinleaf[sid], 
                                                                                        indices_leaf2ormore[sid])
    msvds[sid].loocv_predictions_summary_leaf2 = get_nwloocv_predictions_multimodel_merge(msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                                          msvds[sid].centroids, 
                                                                                          gammas, 
                                                                                          indices_summaryinleaf[sid], 
                                                                                          indices_leaf2ormore[sid])
    msvds[sid].loocv_predictions_leaf_leaf2 = get_nwloocv_predictions_multimodel_merge(msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                                       msvds[sid].centroids, 
                                                                                       gammas, 
                                                                                       indices_leaf[sid], 
                                                                                       indices_leaf2ormore[sid])
    msvds[sid].loocv_predictions_creleaf_leaf2 = get_nwloocv_predictions_multimodel_merge(msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                                       msvds[sid].centroids, 
                                                                                       gammas, 
                                                                                       indices_wtinleaf[sid], 
                                                                                       indices_leaf2ormore[sid])
    #say we wish to predict wild types
    msvds[sid].loocv_predictions_creleaf_creleaf2 = get_nwloocv_predictions_multimodel_merge(msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                                       msvds[sid].centroids, 
                                                                                       gammas, 
                                                                                       indices_wtinleaf2ormore[sid], 
                                                                                        indices_wtinleaf2ormore[sid])
                                                                                          
    msvds[sid].loocv_predictions_leaf_creleaf2 = get_nwloocv_predictions_multimodel_merge(msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                                       msvds[sid].centroids, 
                                                                                       gammas, 
                                                                                       indices_leaf[sid], 
                                                                                        indices_wtinleaf2ormore[sid])
    #for comparison with prev exp
    msvds[sid].loocv_predictions_leaf_creleaf = get_nwloocv_predictions_multimodel_merge(msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                                       msvds[sid].centroids, 
                                                                                       gammas, 
                                                                                       indices_leaf[sid], 
                                                                                        indices_wtinleaf[sid])
    #predict all leafs using cre in that leaf 
    #indices_leaf2ormore is a reduced set of indices_leaf so that we dont have the evaluator be the only predictor
    #if we are evaluating leaf2ormore with cres we wont have this issue any more than already.
    #however, we have to not evaluate when there are no cres in the leaf

512
703






1089






1097




315


































313




354




698




771
803




477




549




In [292]:
nwloocv_leaf_leaf2 = {}
nwloocv_summary_leaf2 = {}
nwloocv_major_leaf2 = {}
nwloocv_wtleaf_wtleaf2 = {}
nwloocv_leaf_wtleaf2 = {}
nwloocv_leaf_wtleaf = {}
reg_proj_vcount_norm_renorms= {}

for sid in major_structure_ids:
    nwloocv_leaf_leaf2[sid] = msvds[sid].loocv_predictions_leafleaf2
    reg_proj_vcount_norm_renorms[sid ] = msvds[sid].reg_proj_vcount_norm_renorm
    nwloocv_summary_leaf2[sid]= msvds[sid].loocv_predictions_summaryleaf2
    nwloocv_major_leaf2[sid] = msvds[sid].loocv_predictions_majorleaf2
    nwloocv_wtleaf_wtleaf2 = msvds[sid].loocv_predictions_wtleaf_wtleaf2
    nwloocv_leaf_wtleaf2 = msvds[sid].loocv_predictions_leaf_wtleaf2
    nwloocv_leaf_wtleaf = msvds[sid].loocv_predictions_leaf_wtleaf
    
inds_good = {}
for sid in major_structure_ids:
    inds_good[sid] = np.asarray(list(range(msvds[sid].injections.shape[0]))) 

a= [list(range(5))]
keys = np.asarray(list(itertools.product(*a)))

AttributeError: 'VoxelDataset' object has no attribute 'loocv_predictions_wtleaf_wtleaf2'

In [265]:
#get where we actually modelled
def get_eval_indices(eval_index_matrices):
    eval_indices = {}
    major_structure_ids = np.asarray(list(eval_index_matrices.keys()))
    for sid in major_structure_ids:
        eval_indices[sid] = np.where(eval_index_matrices[sid].sum(axis = 0) > 0)[0]
    return(eval_indices)    

In [290]:
eval_indices_leaf2ormore = get_eval_indices(indices_leaf2ormore)
eval_indices_wtinleaf2ormore = get_eval_indices(indices_wtinleaf2ormore)
eval_indices_wtinleaf = get_eval_indices(indices_wtinleaf)

In [271]:
losses_finest_finest2 = get_loss(reg_proj_vcount_norm_renorms, nwloocv_leaf_leaf2,pred_ind = eval_indices_leaf2ormore, true_ind = eval_indices_leaf2ormore,keys = keys)
losses_summary_finest2 = get_loss(reg_proj_vcount_norm_renorms, nwloocv_leaf_leaf2,pred_ind = eval_indices_leaf2ormore, true_ind = eval_indices_leaf2ormore,keys = keys)
losses_major_finest2 = get_loss(reg_proj_vcount_norm_renorms, nwloocv_leaf_leaf2,pred_ind = eval_indices_leaf2ormore, true_ind = eval_indices_leaf2ormore,keys = keys)

losses_wtleaf_wtleaf2 = get_loss(reg_proj_vcount_norm_renorms, nwloocv_wtleaf_wtleaf2,pred_ind = eval_indices_wtinleaf2ormore, true_ind = eval_indices_wtinleaf2ormore,keys = keys)
losses_leaf_wtleaf2 = get_loss(reg_proj_vcount_norm_renorms, nwloocv_leaf_wtleaf2,pred_ind = eval_indices_wtinleaf2ormore, true_ind = eval_indices_wtinleaf2ormore,keys = keys)
losses_leaf_wtleaf = get_loss(reg_proj_vcount_norm_renorms, nwloocv_leaf_wtleaf,pred_ind = eval_indices_wtinleaf, true_ind = eval_indices_wtinleaf,keys = keys)




In [274]:
best_gamma_finest_finest2 = get_best_hyperparameters(losses_finest_finest2,keys)
best_gamma_summary_finest2 = get_best_hyperparameters(losses_summary_finest2,keys)
best_gamma_major_finest2 = get_best_hyperparameters(losses_major_finest2,keys)
best_gamma_wtleaf_wtleaf2 = get_best_hyperparameters(losses_wtleaf_wtleaf2,keys)
best_gamma_leaf_wtleaf2 = get_best_hyperparameters(losses_leaf_wtleaf2,keys)
best_gamma_leaf_wtleaf = get_best_hyperparameters(losses_leaf_wtleaf,keys)

meanloss_nw_finest_finest2 = get_loss_best_hyp(losses_finest_finest2, best_gamma_finest_finest2)
meanloss_nw_summary_finest2 = get_loss_best_hyp(losses_summary_finest2, best_gamma_summary_finest2)
meanloss_nw_major_finest2 = get_loss_best_hyp(losses_major_finest2, best_gamma_major_finest2)
meanloss_nw_wtleaf_wtleaf2 = get_loss_best_hyp(losses_wtleaf_wtleaf2, best_gamma_wtleaf_wtleaf2)
meanloss_nw_leaf_wtleaf2 = get_loss_best_hyp(losses_leaf_wtleaf2, best_gamma_leaf_wtleaf2)
meanloss_nw_leaf_wtleaf = get_loss_best_hyp(losses_leaf_wtleaf, best_gamma_leaf_wtleaf)


0
1
2
3
4
5
6
7
8
9
10
11


In [275]:
mean_nw_all

array([0.78692754, 0.29068181, 0.35803372, 0.27034826, 0.44147834,
       0.49263346, 0.31888494, 0.3827807 , 0.34821835, 0.49121184,
       0.64899939, 0.3360901 ])

In [125]:
from sklearn.metrics.pairwise import pairwise_kernels

def get_weights(eval_centroids, model_centroids, gamma):
    weights = pairwise_kernels(X=eval_centroids, Y=model_centroids, metric='rbf', gamma=gamma, filter_params=True)
    return (weights)

In [282]:
from mcmodels.regressors.nonparametric.nadaraya_watson import get_weights

def get_indices(ids):

    ids_unique = np.unique(ids)
    output = np.zeros((len(ids_unique), len(ids)), dtype = int)
    for i in range(len(ids_unique)):
        output[i,np.where(ids == ids_unique[i])[0] ] = 1
    return(output)

#get indices of firstlist in firstlisttest in categories defined by secondlist
def get_indices2(firstlist, firstlisttest, secondlist):
    
    sl_unique = np.unique(secondlist)
    output = np.zeros((len(sl_unique), len(secondlist)), dtype = int)
    for i in range(len(sl_unique)):
        output[i,np.intersect1d(np.where(np.isin(firstlist,firstlisttest))[0], np.where(secondlist == secondlist[i])[0])] = 1
    return(output)

#nmodels = nleafs
#populate each with experiments that share summary structure
def get_indices_summaryinleaf(summarylist , leaflist):
    
    nexp = len(leaflist)
    leaf_unique = np.unique(leaflist)
    output = np.zeros((len(leaf_unique), nexp), dtype = int)
    
    for i in range(len(leaf_unique)):
        
        summary = summarylist[np.where(leaflist == leaf_unique[i])[0]][0]
        output[i,np.where(summarylist == summary)[0]] = 1
        
        
    return(output)

#get predictions at all eval_indices using model_indices
#if an eval_indices is also a model indice, leave it out of the model
#if a model index is not an eval index, it never gets left out
def get_nwloocv_predictions_singlemodel(projections, centroids, gamma, model_indices, eval_indices):
    
#def get_loocv_predictions(projections, centroids, gamma):
    
#     projections = np.asarray(projections, dtype=np.float32)
#     neval = len(eval_indices)
#     #nexp = centroids.shape[0]
#     predictions = np.empty(projections.shape)
#     weights = get_weights(centroids, gamma)
    
#     for i in range(neval):
#         otherindices = np.setdiff1d(model_indices, eval_indices[i])
#         #this order of operations is the fastest I found
#         weights_i = weights[eval_indices[i]][model_indices] / weights[model_indices[i]][otherindices].sum()
#         weights_i[i] = 0
#         weights_i = np.asarray(weights_i, dtype=np.float32)
#         pred = np.dot(weights_i, projections[model_indices])
#         predictions[i] = pred

    eval_index_val = np.where(eval_indices == 1)[0]
    model_index_val = np.where(model_indices == 1)[0]
    
    projections = np.asarray(projections, dtype=np.float32)
    
    nmod_ind = len(model_index_val)
    neval = len(eval_index_val)
    #nexp = centroids.shape[0]
    predictions = np.empty(projections.shape)
    #print(model_index_val.shape, eval_index_val.shape)

    if len(model_index_val) > 0 and  len(eval_index_val) > 0:
        weights = pairwise_kernels(centroids[model_index_val], centroids[eval_index_val], metric='rbf', gamma=gamma, filter_params=True) #get_weights(centroids, gamma)
        for i in range(neval):
            matchindex = np.where(model_index_val == eval_index_val[i])[0]
            otherindices = np.setdiff1d(np.asarray(list(range(nmod_ind))), matchindex)         
            #this order of operations is the fastest I found
            weights_i = weights[:,i] / weights[:,i][otherindices].sum()
            weights_i[matchindex] = 0
            weights_i = np.asarray(weights_i, dtype=np.float32)
            pred = np.dot(weights_i, projections[model_index_val])
            predictions[eval_index_val[i]] = pred

        
    return(predictions)    

def get_nwloocv_predictions_multimodel(projections, centroids, gammas, model_index_matrix, eval_index_matrix):
    

    
    ntargets = projections.shape[1]
    nexp = projections.shape[0]
    nmodels = model_index_matrix.shape[0]
    ngammas = len(gammas)
    
    projections = np.asarray(projections, dtype=np.float32)
    predictions = np.empty((nmodels, ngammas, nexp, ntargets))
    
    
    for m in range(nmodels):
        #print('m', m, len(np.where(model_index_matrix[m] ==1)[0]))
        predictions[m] = np.asarray([get_nwloocv_predictions_singlemodel(projections, centroids, gammas[g], model_index_matrix[m], eval_index_matrix[m]) for g in range(ngammas)])
    
    return(predictions)  

def get_nwloocv_predictions_multimodel_merge(projections, centroids, gammas, model_index_matrix, eval_index_matrix):
    
    predictions_unmerged = get_nwloocv_predictions_multimodel(projections, centroids, gammas, model_index_matrix, eval_index_matrix)
    predictions_merged = combine_predictions(predictions_unmerged, eval_index_matrix)
    
    return(predictions_merged)
#we should not pass model_index_matrices that are identical to eval_index_matrices and have only 1 element per model
#can do automatically in the cross validation code but would rther do it explicitly to ensure identical indexing b/w experiments
#1 model leads to removing the model index from eval indices
#should never have no elements in model_indices
def screen_indices(model_indices, eval_indices):
    
    eval_indices2 = eval_indices.copy()
    mod_loc = np.where(model_indices == 1)[0]
    if len(mod_loc) == 1:
        eval_indices2[mod_loc] = 0
    return(eval_indices2)

#this will not result in certain models having no indices, but could result in an empty eval index.  cactch later
#this will result in certain indices having no prediction.  this is fine.
#can merge (sum) the index matrix to see where predictions are actually generated
def screen_index_matrices(model_index_matrices, eval_index_matrices):
    
    nmodels = model_index_matrices.shape[0]
    eval_index_matrices2 = eval_index_matrices.copy()
    for m in range(nmodels):
        eval_index_matrices2[m] = screen_indices(model_index_matrices[m], eval_index_matrices[m])
    
    return(eval_index_matrices2)

#need code for removing experiments that have no model
#this can happen when the model set is a subset of the evaluation set.
#we will therefore generate predictions for a subset
#given a leaf is included, the eval set is the same
#however, we want to remove evals in leaves we don't have a wt for... of course one could say we are doing worse...
#but we also have a fewer number of models
def screen_index_matrices2(model_index_matrices, eval_index_matrices):
    
    nmodels = model_index_matrices.shape[0]
    include_per_model = model_index_matrices.sum(axis= 1)
    to_include = np.where(include_per_model > 0)[0]
    
    model_index_matrices2 = model_index_matrices
    eval_index_matrices2 = eval_index_matrices[to_include]
    model_index_matrices2 = model_index_matrices[to_include]
    
    return(model_index_matrices2, eval_index_matrices2)

In [188]:
a= np.zeros(2)
b = a.copy()
b[1] = 1.
print(a)

[0. 0.]


In [None]:
msvds[sid].loocv_predictions_model = get_nwloocv_predictions_multimodel(projections = msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                 centroids = msvds[sid].centroids,
                                                                 gammas = gammas,
                                                                model_indices = np.where(model_indices[c] == 1)[0]
                                                                eval_indices = np.where(eval_indices[c] == 1)[0])
                                                                 #codes=np.asarray(np.vstack([leafs[sid], creline[sid]]), dtype = str).transpose())


In [98]:
def combine_predictions(predictions, eval_index_matrix):
    
    nmodels, ngammas, nexp, ntargets = predictions.shape
    combined_predictions = np.empty((ngammas, nexp, ntargets))
    for m in range(nmodels):
        combined_predictions[:,np.where(eval_index_matrix[m] == 1)[0]] = predictions[m][:,np.where(eval_index_matrix[m] == 1)[0]]
        
    return(combined_predictions)

In [None]:
#get indices where creline is wt
get_indices(creleafs, creline, wt)

In [None]:
nmodels = number of leafs

In [None]:
eval_indicator = leaf
model_indicator = creleafs[leaf][wt]

In [None]:
codes = leafs
for leaf in leafs[sid]:
    modelcodes = np.asarray([leaf])
    evalcodes = np.asarray([leaf])
    generate_model_eval_indices(codes, modelcodes = None, evalcodes = None)

In [35]:
creleaf = np.asarray(np.vstack([leafs[sid], creline[sid]]), dtype = str).transpose()

In [None]:
#generate_model_eval_indices(leafs[sid], leafs[sid][i], leafs[sid][i])

for i in range(nexp):
    #get the model and evaluation indices for the model of the ith creleaf (i.e. sharing its code)
    #and evaluate on the ith leaf
    generate_model_eval_indices(creleafs[sid], creleafs[sid][i], leafs[sid], leafs[sid][i])

In [34]:
with open('data/info/leafs.pickle', 'rb') as handle:
    leafs = pickle.load(handle)
    

In [None]:
def merge_models(modelcodes):
    
    
    generate_model_eval_indices(codes, modelcodes = None, evalcodes = leafs[sid])

In [20]:
wt = np.asarray(['C57BL/6J'])
leafs
wt_wt_inds = generate_model_eval_indices(creline[sid], wt, wt)
#leafs wil
wt_wt_inds_leafindices = generate_model_eval_indices(creline, wt, wt)

all_all_inds = generate_model_eval_indices(codes)
all_wt_inds = generate_model_eval_indices(codes, evalcodes = wt)
wt_all_inds = generate_model_eval_indices(codes, modelcodes = wt)

NameError: name 'codes' is not defined

In [None]:
def generate_model_eval_indices(codes, codescompare):
    
    nmodels = len(modelcodes[sid])
    for c in range(nmodels):

In [None]:

def get_loocv_predictions_code(projections, centroids, gammas, codes=None):

    ngam = len(gammas)
    if codes is None:
        # print('yehh')
        codes = np.zeros((projections.shape[0], 1))

    # print(codes)
    unique_codes = np.unique(codes, axis=1)
    predictions = np.empty(np.append(ngam, projections.shape))
    
    #unique_codes should be array of binary variables indicating model_indices and eval_indices for a given model.
    for c in range(len(unique_codes)):
        print(unique_codes[c])
        code_ind = np.where(codes == unique_codes[c])[0]
        #if len(model_indices) > 1:
        predictions[:, eval_indices] = np.asarray([get_loocv_predictions(projections, centroids, gammas[g], model_indices, eval_indices) for g in range(ngam)])

    return (predictions)



In [58]:
from itertools import product

In [60]:
list(product(list(range(2)), list(range(3))))

[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)]

In [53]:
list(enumerate([range(5), range(2)]))

[(0, range(0, 5)), (1, range(0, 2))]

In [None]:
def get_loocv_predictions_code(projections, centroids, gammas, codes = None):

    if codes is None:
        #print('yehh')
        codes = np.zeros((projections.shape[0], 1))
        
    #print(codes)
    unique_codes = np.unique(codes, axis = 1)
    predictions = np.empty(np.append(ngam,projections.shape))
    
    for c in range(len(unique_codes)):
        print(unique_codes[c])
        code_ind = np.where(codes == unique_codes[c])[0]
        if len(code_ind) >1:
            predictions[:,code_ind] = np.asarray([get_loocv_predictions(projections[code_ind], centroids[code_ind], gammas[g]) for g in range(ngam)])
    
    return(predictions)

In [None]:
    #should a model be (everyone his own cre)
    #or, a single set of indices
    #if we want to merge models to evaluate loss of a compound model, we should do that at a later state
    #do this for leafs as well
    nmodels = model_indices.shape[0]
    msvds[sid].loocv_predictions_model = np.empty((nmodels, ntargets))
    for c in range(nmodels):
        msvds[sid].loocv_predictions_model[c] = get_loocv_predictions_modeleval(projections = msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                 centroids = msvds[sid].centroids,
                                                                 gammas = gammas,
                                                                model_indices = np.where(model_indices[c] == 1)[0]
                                                                eval_indices = np.where(eval_indices[c] == 1)[0])
                                                                 #codes=np.asarray(np.vstack([leafs[sid], creline[sid]]), dtype = str).transpose())
