In [2]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
import pandas as pd 
import sys
import pickle
import itertools
import seaborn as sns
import matplotlib.pyplot as plt

workingdirectory = os.popen('git rev-parse --show-toplevel').read()[:-1]
sys.path.append(workingdirectory)
os.chdir(workingdirectory)

import allensdk.core.json_utilities as ju
from allensdk.core.mouse_connectivity_cache import MouseConnectivityCache

from mcmodels.core import Mask,ModelData,VoxelModelCache
from mcmodels.core.utils import get_structure_id, get_ordered_summary_structures,get_minorstructures,get_loss_paper
from mcmodels.utils import nonzero_unique, unionize
from mcmodels.core.experiment import get_voxeldata_msvd
from mcmodels.models.crossvalidation import get_best_hyperparameters,get_loss_best_hyp,get_loss, get_loocv_predictions_code
from mcmodels.core.utils import get_cre_status,get_minorstructure_dictionary,get_leaves_ontologicalorder
from mcmodels.core.utils import get_regionalized_normalized_data
from mcmodels.core.utils import get_connectivity
from mcmodels.core.utils import get_ontological_order_leaf
from mcmodels.core.utils import get_nw_loocv,get_wt_inds
from mcmodels.core.utils import get_countvec, get_twoormore



In [3]:
from sklearn.metrics.pairwise import pairwise_kernels
from mcmodels.regressors.nonparametric.nadaraya_watson import get_weights

#get where we actually modelled
def get_eval_indices(eval_index_matrices):
    eval_indices = {}
    major_structure_ids = np.asarray(list(eval_index_matrices.keys()))
    for sid in major_structure_ids:
        eval_indices[sid] = np.where(eval_index_matrices[sid].sum(axis = 0) > 0)[0]
    return(eval_indices)    



def get_weights(eval_centroids, model_centroids, gamma):
    weights = pairwise_kernels(X=eval_centroids, Y=model_centroids, metric='rbf', gamma=gamma, filter_params=True)
    return (weights)


def get_indices(ids):

    ids_unique = np.unique(ids)
    output = np.zeros((len(ids_unique), len(ids)), dtype = int)
    for i in range(len(ids_unique)):
        output[i,np.where(ids == ids_unique[i])[0] ] = 1
    return(output)

#get indices of firstlist in firstlisttest in categories defined by secondlist
def get_indices2(firstlist, firstlisttest, secondlist):
    
    sl_unique = np.unique(secondlist)
    output = np.zeros((len(sl_unique), len(secondlist)), dtype = int)
    for i in range(len(sl_unique)):
        output[i,np.intersect1d(np.where(np.isin(firstlist,firstlisttest))[0], np.where(secondlist == sl_unique[i])[0])] = 1
    return(output)

#nmodels = nleafs
#populate each with experiments that share summary structure
def get_indices_summaryinleaf(summarylist , leaflist):
    
    nexp = len(leaflist)
    leaf_unique = np.unique(leaflist)
    output = np.zeros((len(leaf_unique), nexp), dtype = int)
    
    for i in range(len(leaf_unique)):
        
        summary = summarylist[np.where(leaflist == leaf_unique[i])[0]][0]
        output[i,np.where(summarylist == summary)[0]] = 1
        
        
    return(output)

#get predictions at all eval_indices using model_indices
#if an eval_indices is also a model indice, leave it out of the model
#if a model index is not an eval index, it never gets left out
def get_nwloocv_predictions_singlemodel(projections, centroids, gamma, model_indices, eval_indices):
    
#def get_loocv_predictions(projections, centroids, gamma):
    
#     projections = np.asarray(projections, dtype=np.float32)
#     neval = len(eval_indices)
#     #nexp = centroids.shape[0]
#     predictions = np.empty(projections.shape)
#     weights = get_weights(centroids, gamma)
    
#     for i in range(neval):
#         otherindices = np.setdiff1d(model_indices, eval_indices[i])
#         #this order of operations is the fastest I found
#         weights_i = weights[eval_indices[i]][model_indices] / weights[model_indices[i]][otherindices].sum()
#         weights_i[i] = 0
#         weights_i = np.asarray(weights_i, dtype=np.float32)
#         pred = np.dot(weights_i, projections[model_indices])
#         predictions[i] = pred

    eval_index_val = np.where(eval_indices == 1)[0]
    model_index_val = np.where(model_indices == 1)[0]
    
    projections = np.asarray(projections, dtype=np.float32)
    
    nmod_ind = len(model_index_val)
    neval = len(eval_index_val)
    #nexp = centroids.shape[0]
    predictions = np.empty(projections.shape)
    #print(model_index_val.shape, eval_index_val.shape)

    if len(model_index_val) > 0 and  len(eval_index_val) > 0:
        weights = pairwise_kernels(centroids[model_index_val], centroids[eval_index_val], metric='rbf', gamma=gamma, filter_params=True) #get_weights(centroids, gamma)
        for i in range(neval):
            matchindex = np.where(model_index_val == eval_index_val[i])[0]
            otherindices = np.setdiff1d(np.asarray(list(range(nmod_ind))), matchindex)         
            #this order of operations is the fastest I found
            weights_i = weights[:,i] / weights[:,i][otherindices].sum()
            weights_i[matchindex] = 0
            weights_i = np.asarray(weights_i, dtype=np.float32)
            pred = np.dot(weights_i, projections[model_index_val])
            predictions[eval_index_val[i]] = pred

        
    return(predictions)    

def get_nwloocv_predictions_multimodel(projections, centroids, gammas, model_index_matrix, eval_index_matrix):
    

    
    ntargets = projections.shape[1]
    nexp = projections.shape[0]
    nmodels = model_index_matrix.shape[0]
    ngammas = len(gammas)
    
    projections = np.asarray(projections, dtype=np.float32)
    predictions = np.empty((nmodels, ngammas, nexp, ntargets))
    
    
    for m in range(nmodels):
        #print('m', m, len(np.where(model_index_matrix[m] ==1)[0]))
        predictions[m] = np.asarray([get_nwloocv_predictions_singlemodel(projections, centroids, gammas[g], model_index_matrix[m], eval_index_matrix[m]) for g in range(ngammas)])
    
    return(predictions)  

def combine_predictions(predictions, eval_index_matrix):
    
    nmodels, ngammas, nexp, ntargets = predictions.shape
    combined_predictions = np.empty((ngammas, nexp, ntargets))
    for m in range(nmodels):
        combined_predictions[:,np.where(eval_index_matrix[m] == 1)[0]] = predictions[m][:,np.where(eval_index_matrix[m] == 1)[0]]
        
    return(combined_predictions)

def get_nwloocv_predictions_multimodel_merge(projections, centroids, gammas, model_index_matrix, eval_index_matrix):
    
    predictions_unmerged = get_nwloocv_predictions_multimodel(projections, centroids, gammas, model_index_matrix, eval_index_matrix)
    print(predictions_unmerged.shape)
    predictions_merged = combine_predictions(predictions_unmerged, eval_index_matrix)
    
    return(predictions_merged)
#we should not pass model_index_matrices that are identical to eval_index_matrices and have only 1 element per model
#can do automatically in the cross validation code but would rther do it explicitly to ensure identical indexing b/w experiments
#1 model leads to removing the model index from eval indices
#should never have no elements in model_indices
def screen_indices(model_indices, eval_indices):
    
    eval_indices2 = eval_indices.copy()
    mod_loc = np.where(model_indices == 1)[0]
    if len(mod_loc) == 1:
        eval_indices2[mod_loc] = 0
    return(eval_indices2)

#this will not result in certain models having no indices, but could result in an empty eval index.  cactch later
#this will result in certain indices having no prediction.  this is fine.
#can merge (sum) the index matrix to see where predictions are actually generated
def screen_index_matrices(model_index_matrices, eval_index_matrices):
    
    nmodels = model_index_matrices.shape[0]
    eval_index_matrices2 = eval_index_matrices.copy()
    for m in range(nmodels):
        eval_index_matrices2[m] = screen_indices(model_index_matrices[m], eval_index_matrices[m])
    
    return(eval_index_matrices2)

#need code for removing experiments that have no model
#this can happen when the model set is a subset of the evaluation set.
#we will therefore generate predictions for a subset
#given a leaf is included, the eval set is the same
#however, we want to remove evals in leaves we don't have a wt for... of course one could say we are doing worse...
#but we also have a fewer number of models
def screen_index_matrices2(model_index_matrices, eval_index_matrices):
    
    nmodels = model_index_matrices.shape[0]
    include_per_model = model_index_matrices.sum(axis= 1)
    to_include = np.where(include_per_model > 0)[0]
    
    model_index_matrices2 = model_index_matrices
    eval_index_matrices2 = eval_index_matrices[to_include]
    model_index_matrices2 = model_index_matrices[to_include]
    
    return(model_index_matrices2, eval_index_matrices2)

In [4]:
#read data
TOP_DIR = '/Users/samsonkoelle/alleninstitute/mcm_2020/mcm_updated/'
INPUT_JSON = os.path.join(TOP_DIR, 'input_011520.json')
EXPERIMENTS_EXCLUDE_JSON = os.path.join(TOP_DIR, 'experiments_exclude.json')
FILE_DIR = '/Users/samsonkoelle/alleninstitute/mcm_2020/mcm_updated/'
OUTPUT_DIR = os.path.join(FILE_DIR, 'output')

input_data = ju.read(INPUT_JSON)
manifest_file = input_data.get('manifest_file')
manifest_file = os.path.join(TOP_DIR, manifest_file)
experiments_exclude = ju.read(EXPERIMENTS_EXCLUDE_JSON)

#its unclear why the hyperparameters are loaded from the output directory
cache = VoxelModelCache(manifest_file=manifest_file)
major_structures = input_data.get('structures')
major_structure_ids = [get_structure_id(cache, s) for s in major_structures]
data_info = pd.read_excel('/Users/samsonkoelle/alleninstitute/Whole Brain Cre Image Series_curation only.xlsx', 'all datasets curated_070919pull')
data_info.set_index("id", inplace=True)
ontological_order = get_ordered_summary_structures(cache)

mcc = MouseConnectivityCache(manifest_file = '../connectivity/mouse_connectivity_manifest.json')
st = mcc.get_structure_tree()
ai_map = st.get_id_acronym_map()
ia_map = {value: key for key, value in ai_map.items()}

#regionalize voxel model: compare with regional model
#regional parameters
cre = None
eid_set=None
high_res=False
threshold_injection = False

COARSE_STRUCTURE_SET_ID = 2
DEFAULT_STRUCTURE_SET_IDS = tuple([COARSE_STRUCTURE_SET_ID])
tree = cache.get_structure_tree()
default_structures = tree.get_structures_by_set_id(DEFAULT_STRUCTURE_SET_IDS)
default_structure_ids = [st['id'] for st in default_structures if st['id'] != 934]
#cre= True

In [5]:
#load data
msvds = {}
for sid in major_structure_ids:
    print(sid)
    voxel_data = ModelData(cache, sid)
    experiment_ids = voxel_data.get_experiment_ids(experiments_exclude=experiments_exclude, cre=cre)
    experiment_ids = np.asarray(list(experiment_ids))    
    #get injections and projections on the voxel level.
    #Note that a preprocessing screen is applied in AllenSDK to mask at projection and injection boundaries
    #Voxels intensity not in these regions in the corresponding projection vector will be 0.
    msvd = get_voxeldata_msvd(cache, sid,experiments_exclude,default_structure_ids,cre)
    msvds[sid]  = msvd

512
703
1089
1097
315
313
354
698
771
803
477
549


In [6]:
#get dictionaries of creline and leaf by experiment
creline = get_cre_status(data_info, msvds)
with open('data/info/leafs.pickle', 'rb') as handle:
    leafs = pickle.load(handle)
    
#get dictionary of minor structures for each experiment in each major division
#major division segregation is legacy code but convenient for fast cross validation in major division model
experiments_minor_structures = get_minorstructure_dictionary(msvds, data_info)

#get leaves in ontological order.  Where leafs don't exist, uses summary structure
ontological_order_leaves = get_leaves_ontologicalorder(msvd, ontological_order)

#Key isn't affected by which experiment we choose. This allows default masking to be inherited from the AllenSDK.
key = list(msvd.experiments.keys())[0]

#Identify keys denoting which voxels correspond to which structure in the ipsi and contra targets.
#contra_targetkey = msvd.experiments[list(msvd.experiments.keys())[0]].projection_mask.get_key(structure_ids=ontological_order_leaves, hemisphere_id=1)
#ipsi_targetkey = msvd.experiments[list(msvd.experiments.keys())[0]].projection_mask.get_key(structure_ids=ontological_order_leaves, hemisphere_id=2)
contra_targetkey = msvd.experiments[key].projection_mask.get_key(structure_ids=ontological_order, hemisphere_id=1)
ipsi_targetkey = msvd.experiments[key].projection_mask.get_key(structure_ids=ontological_order, hemisphere_id=2)

#get average intensities of projection structures given ipsi and contra keys
source_key = ontological_order #only relevant here when injection needs to be unionized, but currently a required argument
msvds = get_regionalized_normalized_data(msvds,cache, source_key,ipsi_targetkey,contra_targetkey)

#wt_2ormore = get_wt_inds(creline)

In [7]:
indices_leaf = {}
indices_wtinleaf = {}
indices_wtleaf = {}
indices_summary = {}
indices_summaryinleaf = {}
indices_major = {}
indices_majorinleaf = {}
indices_leaf2ormore = {}
indices_wtinleaf2ormore = {}
indices_leaf_reduced = {}
indices_wtinleaf_reduced = {}
for sid in major_structure_ids:
    
    #wt_leaf on leaf

    #get the indices of experiments sharing leafs (nmodels is number of leafs)
    indices_leaf[sid] = get_indices(leafs[sid]) #eval_indices
    indices_creleaf = get_indices(leafs[sid])
    
    #get the indices of the wts in the leaf (nmodels is number of leafs)
    indices_wtinleaf[sid] = get_indices2(creline[sid], np.asarray(['C57BL/6J']),leafs[sid]) #model_indices
    
    #get indices of experiments sharing summary structure x cre combination (nmodel is number of cre x leaf combinations)
    #indices_wtleaf[sid] = get_indices(creleafs_merged[sid])
    
    #get indices of experiments sharing summary structure(nmodel is number of summary structures)
    indices_summary[sid] = get_indices(experiments_minor_structures[sid])
    
    #get indices of experiments sharing major structure(nmodel is number of summary structures)
    indices_major[sid] = np.ones((1,experiments_minor_structures[sid].shape[0]))
    
    #get indices of experiments sharing same major structure as a leaf (nmodel is number of leafs)
    indices_majorinleaf[sid] = get_indices2(np.ones(len(leafs[sid])), np.asarray([1]),leafs[sid]) #model_indices
    
    #get indices of experiments in same summary structure as a leaf (nmodel is number of leafs)
    indices_summaryinleaf[sid] = get_indices_summaryinleaf(experiments_minor_structures[sid], leafs[sid])
    
    #evaluate models on leafs
    #model_indices, eval_indices = indices_majorinleaf, indices_leaf
    #model_indices, eval_indices = indices_summaryinleaf, indices_leaf
    #this is the most restrictive of these 3, so eval_indices_leaf2ormore is the smallest eval set
    indices_leaf2ormore[sid] = screen_index_matrices(indices_leaf[sid], indices_leaf[sid])
    
    indices_wtinleaf2ormore[sid] = screen_index_matrices(indices_wtinleaf[sid], indices_wtinleaf[sid])
    #need to find explicitly so can be used in other experiments
    #how do we line up with leaf model...
    #reduced modelset.  also
    #indices_wtinleaf_reduced, indices_leaf_reduced = screen_index_matrices2(indices_wtinleaf[sid], indices_leaf[sid])
    indices_leaf_reduced[sid], indices_wtinleaf_reduced[sid]  = screen_index_matrices2( indices_leaf2ormore[sid],indices_wtinleaf[sid])

    #if i want to only use indices_leaf_reduced as target in a different experiment, do i need to eliminate  
    #
    
    
    #leaf on leaf
    #eval_indices = indices_leaf
    #model_indices = indices_leaf

    #leaf on wt_leaf
    #model_indices = indices_leaf
    #eval_indices = indices_wtinleaf

    #summary on summary
    #model_indices = get_indices(experiments_minor_structures[sid])
    #eval_indices = get_indices(experiments_minor_structures[sid])

    #creleaf precise (nmodels is number of creleaf combinations)
    
    #indices_wtleaf = get_indices(creleafs[sid])

In [218]:
sid = 512
np.where( creline[sid] == 'C57BL/6J')[0].shape

(12,)

In [219]:
#the increase here doesn't make sense
indices_wtinleaf[sid] = get_indices2(creline[sid], np.asarray(['C57BL/6J']),leafs[sid]) 

In [220]:
indices_wtinleaf[sid].sum(axis = 1).sum()

12

In [221]:
output.sum(axis = 1).sum()

14

In [222]:
indices_wtinleaf[sid].shape

(13, 36)

In [223]:
np.unique(leafs[sid]).shape

(13,)

In [224]:
firstlist = creline[sid]
firstlisttest = np.asarray(['C57BL/6J'])
secondlist = leafs[sid]

In [225]:
    sl_unique = np.unique(secondlist)
    output = np.zeros((len(sl_unique), len(secondlist)), dtype = int)
    for i in range(len(sl_unique)):
        #print(len(np.where(secondlist == secondlist[i])[0]))
        #print(len(np.intersect1d(np.where(np.isin(firstlist,firstlisttest))[0], np.where(secondlist == secondlist[i])[0])))
        #print(secondlist[i])
        print(np.where(secondlist == sl_unique[i])[0])
        #int has length 12 
        output[i,np.intersect1d(np.where(np.isin(firstlist,firstlisttest))[0], np.where(secondlist == sl_unique[i])[0])] = 1


[ 0 12 16]
[ 6 28]
[22]
[10]
[17]
[ 9 19]
[35]
[ 5  7 14 15 30 31]
[3]
[24 25]
[ 4 13 18 29 33]
[ 1 27]
[ 2  8 11 20 21 23 26 32 34]


In [226]:
secondlist

array([  91, 1064, 1091, 1025, 1056, 1007,  846, 1007, 1091,  984,  968,
       1091,   91, 1056, 1007, 1007,   91,  976, 1056,  984, 1091, 1091,
        951, 1091, 1041, 1041, 1091, 1064,  846, 1056, 1007, 1007, 1091,
       1056, 1091,  989])

In [227]:
np.where(secondlist == secondlist[i])[0]

array([ 0, 12, 16])

In [228]:
np.where(np.isin(firstlist,firstlisttest))[0].shape

(12,)

In [229]:
np.where(secondlist == secondlist[i])[0]

array([ 0, 12, 16])

In [231]:
#where are the wildtypes in the leafs
indices_wtinleaf[512].sum(axis = 1)
#godzilla number of wildtypes and where they are in leafs

array([1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2])

In [232]:
#only has 14 'codes'
indices_wtinleaf[sid].sum(axis = 1).sum()

12

In [233]:
indices_wtinleaf2ormore[sid].sum(axis =1)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2])

In [8]:
for sid in major_structure_ids:

    msvds[sid].loocv_predictions_major_major = get_nwloocv_predictions_multimodel_merge(msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                                   msvds[sid].centroids, 
                                                                                   gammas, 
                                                                                   indices_major[sid], 
                                                                                    indices_major[sid])

    print(sid)
    msvds[sid].loocv_predictions_major_leaf2 = get_nwloocv_predictions_multimodel_merge(msvds[sid].reg_proj_vcount_norm_renorm,
                                                                                        msvds[sid].centroids,
                                                                                        gammas, 
                                                                                        indices_majorinleaf[sid], 
                                                                                        indices_leaf2ormore[sid])
    msvds[sid].loocv_predictions_summary_leaf2 = get_nwloocv_predictions_multimodel_merge(msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                                          msvds[sid].centroids, 
                                                                                          gammas, 
                                                                                          indices_summaryinleaf[sid], 
                                                                                          indices_leaf2ormore[sid])
    msvds[sid].loocv_predictions_leaf_leaf2 = get_nwloocv_predictions_multimodel_merge(msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                                       msvds[sid].centroids, 
                                                                                       gammas, 
                                                                                       indices_leaf[sid], 
                                                                                       indices_leaf2ormore[sid])
#     msvds[sid].loocv_predictions_leaf_leaf = get_nwloocv_predictions_multimodel_merge(msvds[sid].reg_proj_vcount_norm_renorm, 
#                                                                                        msvds[sid].centroids, 
#                                                                                        gammas, 
#                                                                                        indices_leaf[sid], 
#                                                                                        indices_leaf[sid])

    msvds[sid].loocv_predictions_wtleaf_leaf2 = get_nwloocv_predictions_multimodel_merge(msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                                       msvds[sid].centroids, 
                                                                                       gammas, 
                                                                                       indices_wtinleaf[sid], 
                                                                                       indices_leaf2ormore[sid])
    #say we wish to predict wild types
    msvds[sid].loocv_predictions_wtleaf_wtleaf2 = get_nwloocv_predictions_multimodel_merge(msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                                       msvds[sid].centroids, 
                                                                                       gammas, 
                                                                                       indices_wtinleaf2ormore[sid], 
                                                                                        indices_wtinleaf2ormore[sid])
                                                                                          
    msvds[sid].loocv_predictions_leaf_wtleaf2 = get_nwloocv_predictions_multimodel_merge(msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                                       msvds[sid].centroids, 
                                                                                       gammas, 
                                                                                       indices_leaf[sid], 
                                                                                        indices_wtinleaf2ormore[sid])
    #for comparison with prev exp
    msvds[sid].loocv_predictions_leaf_wtleaf = get_nwloocv_predictions_multimodel_merge(msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                                       msvds[sid].centroids, 
                                                                                       gammas, 
                                                                                       indices_leaf[sid], 
                                                                                        indices_wtinleaf[sid])
    
    

    #predict all leafs using cre in that leaf 
    #indices_leaf2ormore is a reduced set of indices_leaf so that we dont have the evaluator be the only predictor
    #if we are evaluating leaf2ormore with cres we wont have this issue any more than already.
    #however, we have to not evaluate when there are no cres in the leaf

NameError: name 'gammas' is not defined

In [38]:
gammas = np.asarray([0.1,.5,1,2,10])
for sid in major_structure_ids:
    print(sid)
    msvds[sid].loocv_predictions_major_major = get_nwloocv_predictions_multimodel_merge(msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                                       msvds[sid].centroids, 
                                                                                       gammas, 
                                                                                       indices_major[sid], 
                                                                                        indices_major[sid])


512
703
1089
1097
315




313
354
698
771
803
477
549




In [48]:
#     msvds[sid].loocv_predictions_major_major2 = get_nwloocv_predictions_multimodel_merge(msvds[sid].reg_proj_vcount_norm_renorm, 
#                                                                                        msvds[sid].centroids, 
#                                                                                        gammas, 
#                                                                                        indices_leaf[sid], 
#                                                                                        indices_leaf2ormore[sid])
a= [list(range(5))]
keys = np.asarray(list(itertools.product(*a)))

nwloocv_major_major = {}
reg_proj_vcount_norm_renorms = {}
for sid in major_structure_ids:
    nwloocv_major_major[sid] = msvds[sid].loocv_predictions_major_major
    reg_proj_vcount_norm_renorms[sid ] = msvds[sid].reg_proj_vcount_norm_renorm

eval_indices_major = get_eval_indices(indices_major)
losses_major_major = get_loss(reg_proj_vcount_norm_renorms, nwloocv_major_major,pred_ind = eval_indices_major, true_ind = eval_indices_major,keys = keys)
best_gamma_major_major = get_best_hyperparameters(losses_major_major,keys)
meanloss_nw_major_major = get_loss_best_hyp(losses_major_major, best_gamma_major_major)

0
1
2
3
4
5
6
7
8
9
10
11


In [49]:
nwloocv_major_major[549].shape

(5, 83, 577)

In [50]:
#changing target list to leafs rather than summaries
meanloss_nw_major_major

array([0.90097783, 0.77737892, 0.35946378, 0.29377341, 0.43029092,
       0.47135974, 0.40976477, 0.44626504, 0.45657043, 0.53027727,
       0.69085557, 0.42870434])

In [26]:
meanloss_nw_major_major

array([0.91745875, 0.88343212, 0.37942834, 0.30860234, 0.40127128,
       0.48650491, 0.4394712 , 0.53795851, 0.52413763, 0.56407093,
       0.73076523, 0.49477931])

In [51]:
#obviously im having some difficulty
meanloss_nw_finest_finest2

NameError: name 'meanloss_nw_finest_finest2' is not defined

In [97]:
meanloss_nw_finest_finest2

array([0.93199997, 1.12600816, 0.40212803, 0.36564319, 0.45510793,
       0.64921397, 0.53419902, 0.60271628, 0.65895914, 0.58846024,
       0.6907309 , 0.59767407])

In [10]:
nwloocv_leaf_leaf2 = {}
nwloocv_summary_leaf2 = {}
nwloocv_major_leaf2 = {}
nwloocv_wtleaf_wtleaf2 = {}
nwloocv_leaf_wtleaf2 = {}
nwloocv_leaf_wtleaf = {}
reg_proj_vcount_norm_renorms= {}
nwloocv_leaf_leaf = {}

for sid in major_structure_ids:
    nwloocv_leaf_leaf2[sid] = msvds[sid].loocv_predictions_leaf_leaf2
    reg_proj_vcount_norm_renorms[sid ] = msvds[sid].reg_proj_vcount_norm_renorm
    nwloocv_summary_leaf2[sid]= msvds[sid].loocv_predictions_summary_leaf2
    nwloocv_major_leaf2[sid] = msvds[sid].loocv_predictions_major_leaf2
    nwloocv_wtleaf_wtleaf2[sid] = msvds[sid].loocv_predictions_wtleaf_wtleaf2
    nwloocv_leaf_wtleaf2[sid] = msvds[sid].loocv_predictions_leaf_wtleaf2
    nwloocv_leaf_wtleaf[sid] = msvds[sid].loocv_predictions_leaf_wtleaf
    nwloocv_leaf_leaf[sid ] = msvds[sid].loocv_predictions_leaf_leaf
    
inds_good = {}
for sid in major_structure_ids:
    inds_good[sid] = np.asarray(list(range(msvds[sid].injections.shape[0]))) 

a= [list(range(5))]
keys = np.asarray(list(itertools.product(*a)))

eval_indices_leaf2ormore = get_eval_indices(indices_leaf2ormore)
#eval_indices_leaf  = get_eval_indices(indices_leaf)
eval_indices_wtinleaf2ormore = get_eval_indices(indices_wtinleaf2ormore)
eval_indices_wtinleaf = get_eval_indices(indices_wtinleaf)

losses_finest_finest2 = get_loss(reg_proj_vcount_norm_renorms, nwloocv_leaf_leaf2,pred_ind = eval_indices_leaf2ormore, true_ind = eval_indices_leaf2ormore,keys = keys)
losses_summary_finest2 = get_loss(reg_proj_vcount_norm_renorms, nwloocv_summary_leaf2,pred_ind = eval_indices_leaf2ormore, true_ind = eval_indices_leaf2ormore,keys = keys)
losses_major_finest2 = get_loss(reg_proj_vcount_norm_renorms, nwloocv_major_leaf2,pred_ind = eval_indices_leaf2ormore, true_ind = eval_indices_leaf2ormore,keys = keys)
losses_wtleaf_wtleaf2 = get_loss(reg_proj_vcount_norm_renorms, nwloocv_wtleaf_wtleaf2,pred_ind = eval_indices_wtinleaf2ormore, true_ind = eval_indices_wtinleaf2ormore,keys = keys)
losses_leaf_wtleaf2 = get_loss(reg_proj_vcount_norm_renorms, nwloocv_leaf_wtleaf2,pred_ind = eval_indices_wtinleaf2ormore, true_ind = eval_indices_wtinleaf2ormore,keys = keys)
losses_leaf_wtleaf = get_loss(reg_proj_vcount_norm_renorms, nwloocv_leaf_wtleaf,pred_ind = eval_indices_wtinleaf, true_ind = eval_indices_wtinleaf,keys = keys)
#losses_finest_finest = get_loss(reg_proj_vcount_norm_renorms, nwloocv_leaf_leaf,pred_ind = eval_indices_leaf, true_ind = eval_indices_leaf,keys = keys)


#best_gamma_finest_finest = get_best_hyperparameters(losses_finest_finest,keys)
best_gamma_finest_finest2 = get_best_hyperparameters(losses_finest_finest2,keys)
best_gamma_summary_finest2 = get_best_hyperparameters(losses_summary_finest2,keys)
best_gamma_major_finest2 = get_best_hyperparameters(losses_major_finest2,keys)
best_gamma_wtleaf_wtleaf2 = get_best_hyperparameters(losses_wtleaf_wtleaf2,keys)
best_gamma_leaf_wtleaf2 = get_best_hyperparameters(losses_leaf_wtleaf2,keys)
best_gamma_leaf_wtleaf = get_best_hyperparameters(losses_leaf_wtleaf,keys)

#meanloss_nw_finest_finest = get_loss_best_hyp(losses_finest_finest, best_gamma_finest_finest)
meanloss_nw_finest_finest2 = get_loss_best_hyp(losses_finest_finest2, best_gamma_finest_finest2)
meanloss_nw_summary_finest2 = get_loss_best_hyp(losses_summary_finest2, best_gamma_summary_finest2)
meanloss_nw_major_finest2 = get_loss_best_hyp(losses_major_finest2, best_gamma_major_finest2)
meanloss_nw_wtleaf_wtleaf2 = get_loss_best_hyp(losses_wtleaf_wtleaf2, best_gamma_wtleaf_wtleaf2)
meanloss_nw_leaf_wtleaf2 = get_loss_best_hyp(losses_leaf_wtleaf2, best_gamma_leaf_wtleaf2)
meanloss_nw_leaf_wtleaf = get_loss_best_hyp(losses_leaf_wtleaf, best_gamma_leaf_wtleaf)
meanloss_nwall_leaf_wtleaf = get_loss_best_hyp(losses_leaf_wtleaf2, best_gamma_finest_finest2)

AttributeError: 'VoxelDataset' object has no attribute 'loocv_predictions_leaf_leaf2'

In [None]:
losses_leaf_wtleaf2 = get_loss(reg_proj_vcount_norm_renorms, nwloocv_leaf_wtleaf2,pred_ind = eval_indices_wtinleaf2ormore, true_ind = eval_indices_wtinleaf2ormore,keys = keys)
losses_leaf_wtleaf = get_loss(reg_proj_vcount_norm_renorms, nwloocv_leaf_wtleaf,pred_ind = eval_indices_wtinleaf, true_ind = eval_indices_wtinleaf,keys = keys)


In [332]:
losses_leaf_wtleaf2_otherway = get_loss(reg_proj_vcount_norm_renorms, nwloocv_leaf_leaf2,pred_ind = eval_indices_wtinleaf2ormore, true_ind = eval_indices_wtinleaf2ormore,keys = keys)


In [337]:
#this has 12, godzilla has 8
losses_leaf_wtleaf[512].shape

(5, 12)

In [339]:
eval_indices_wtinleaf[512].shape

(12,)

In [344]:
sid = 512
np.where(get_indices2(creline[sid], np.asarray(['C57BL/6J']),leafs[sid]).sum(axis = 0) > 0)[0].shape


(12,)

In [None]:
#basically, get_indices2 might not account for the fact that some leafs only have the wt

In [345]:
indices_wtinleaf_reduced, indices_leaf_reduced = screen_index_matrices2(indices_wtinleaf[sid], indices_leaf[sid])
    

In [349]:
indices_wtinleaf_reduced.sum(axis = 1).sum()

12

In [381]:
#model_index_matrices are the indices of the leafs'
#indices_wtinleaf are the wild types
#need to make sure we dont have leafs with only 1 experiment
def screen_index_matrices2(model_index_matrices, eval_index_matrices):
    
    #nmodels = model_index_matrices.shape[0]
    include_per_model = model_index_matrices.sum(axis= 1)
    to_exclude = np.where(include_per_model <= 1)[0]
    #to_include = np.where(include_per_model > 0)[0]
    
    model_index_matrices2 = model_index_matrices.copy()
    eval_index_matrices2 = eval_index_matrices.copy()
    model_index_matrices2[to_exclude] = 0
    eval_index_matrices2[to_exclude] = 0
    
    return(model_index_matrices2, eval_index_matrices2)

In [365]:
indices_leaf_reduced, indices_wtinleaf_reduced  = screen_index_matrices2( indices_leaf2ormore[sid],indices_wtinleaf[sid])
    

In [372]:
model_index_matrices = indices_leaf2ormore[sid]
eval_index_matrices = indices_wtinleaf[sid]

In [395]:
eval_indices_wtinleaf_reduced = get_eval_indices(indices_wtinleaf_reduced)
losses_leaf_wtleaf2_otherway = get_loss(reg_proj_vcount_norm_renorms, nwloocv_leaf_leaf2,pred_ind = eval_indices_wtinleaf_reduced, true_ind = eval_indices_wtinleaf_reduced,keys = keys)

In [398]:
losses_leaf_wtleaf2_otherway[512] #finally... this is the same as before

array([[0.34174093, 1.65425879, 0.21638183, 1.14073591, 0.6425    ,
        1.12506149, 0.72780486, 1.04390712],
       [0.35124271, 1.65425879, 0.21026201, 1.32445258, 0.6425    ,
        1.12506149, 0.83424494, 1.22832153],
       [0.36399655, 1.65425879, 0.2399193 , 1.32447863, 0.6425    ,
        1.12506149, 0.86154553, 1.28456278],
       [0.38932652, 1.65425879, 0.30518207, 1.32447863, 0.6425    ,
        1.12506149, 0.86407479, 1.32012681],
       [0.44595791, 1.65425879, 0.48245589, 1.32447863, 0.6425    ,
        1.12506149,        nan,        nan]])

In [392]:

def get_loss(true_dict, prediction_dict, pred_ind=None, true_ind=None, keys=None):
    output = {}
    major_structure_ids = list(prediction_dict.keys())
    nms = len(major_structure_ids)
    ngam = prediction_dict[major_structure_ids[0]].shape[0]
    nalph = prediction_dict[major_structure_ids[0]].shape[1]
    for m in range(nms):
        sid = major_structure_ids[m]
        if pred_ind == None:
            # prediction_dict and true_dict will contain predictions for 'bad' experiments with no recorded injection
            # when we have the wild type predictions, the subset is what is good among the wild types
            # so 'true' subsetting is always good, since it is w.r.t. the full injection
            # but prediction needs good w.r.t. wt
            pind = np.asarray(list(range(prediction_dict[sid].shape[1])), dtype=int)
        else:
            pind = pred_ind[sid]
        if true_ind == None:
            tind = np.asarray(list(range(true_dict[sid].shape[0])), dtype=int)
        else:
            tind = true_ind[sid]

        nexp = len(pind)

        output[sid] = np.zeros(np.append([len(np.unique(keys[:, i])) for i in range(keys.shape[1])], nexp))

        for j in range(keys.shape[0]):
            output[sid][tuple(keys[j])] = np.asarray(
                [get_loss_paper(true_dict[sid][tind[i]], prediction_dict[sid][tuple(keys[j])][pind[i]]) for i in
                 range(nexp)])

    return (output)

In [None]:
pind = 

In [391]:
losses_leaf_wtleaf2_otherway[512].shape


(5, 13)

In [389]:
indices_wtinleaf_reduced[512].sum(axis = 1)

array([1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 2])

In [380]:
eval_index_matrices2.sum(axis  = 1).sum()

8

In [368]:
indices_wtinleaf_reduced.sum(axis = 1).sum()
#maybe should be 8? if only 8 of the wild types are in leafs with more than 1 experiment

5

In [369]:
indices_leaf2ormore[512].sum(axis = 1)

array([3, 2, 0, 0, 0, 2, 0, 6, 0, 2, 5, 2, 9])

In [370]:
#this is for 2 wts
indices_wtinleaf2ormore[512].sum(axis = 1)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2])

In [371]:
indices_wtinleaf[sid].sum(axis = 1)

array([1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2])

In [None]:
def get_eval_indices(eval_index_matrices):
    eval_indices = {}
    major_structure_ids = np.asarray(list(eval_index_matrices.keys()))
    for sid in major_structure_ids:
        eval_indices[sid] = np.where(eval_index_matrices[sid].sum(axis = 0) > 0)[0]
    return(eval_indices)    

In [331]:
meanloss_nwall_leaf_wtleaf

array([0.23437622,        nan, 0.1817548 , 0.1541542 , 0.30627729,
       0.15907457, 0.41965146, 0.10320351,        nan, 0.18120275,
       0.47311745, 0.23478083])

In [327]:
meanloss_nw_leaf_wtleaf

array([1.24103258, 2.        , 0.26152648, 0.46757392, 0.38090104,
       0.46156683, 0.78686578, 0.39213311, 0.75838845, 0.50065412,
       0.38147014, 0.37212881])

In [262]:
eval_indices_wtinleaf2ormore = get_eval_indices(indices_wtinleaf2ormore)

In [265]:
eval_indices_wtinleaf2ormore

{512: array([11, 21]),
 703: array([], dtype=int64),
 1089: array([  0,  11,  25,  34,  38,  39,  47,  49,  51,  59,  65,  70,  72,
         74,  83,  87,  90,  98, 103, 104, 116, 117, 120, 121]),
 1097: array([35, 51, 58]),
 315: array([   9,   13,   33,   41,   74,   81,   93,  101,  120,  121,  131,
         148,  151,  202,  237,  241,  254,  255,  258,  275,  286,  321,
         330,  331,  346,  348,  354,  360,  366,  381,  394,  398,  410,
         427,  429,  433,  441,  444,  458,  479,  492,  512,  523,  540,
         566,  573,  601,  630,  632,  676,  681,  696,  697,  701,  710,
         712,  749,  755,  765,  772,  778,  807,  811,  816,  829,  834,
         853,  863,  865,  873,  883,  893,  913,  923,  926,  951,  954,
         998, 1001, 1006, 1015, 1022, 1025, 1027, 1029, 1037, 1058, 1065,
        1070, 1072, 1086, 1100, 1125, 1127]),
 313: array([ 3,  9, 17, 19, 20, 25]),
 354: array([2, 5, 9]),
 698: array([ 1,  4, 21, 28, 29]),
 771: array([], dtype=int64),
 803

In [267]:
get_loss_paper(msvds[sid].reg_proj_vcount_norm_renorm[11], msvds[sid].reg_proj_vcount_norm_renorm[21])

0.23437621991390023

In [268]:
get_loss_paper(msvds[sid].reg_proj_vcount_norm_renorm[21], msvds[sid].reg_proj_vcount_norm_renorm[11])

0.23437621991390023

In [252]:
#36 experiments, but only 12 wt type.  13 different leafs.  one wild type leaf combo
len(np.unique(leafs[sid]))

13

In [250]:
len(np.where(creline[512]=='C57BL/6J')[0])

12

In [248]:
nwloocv_leaf_wtleaf2[512][0].sum(axis = 1).shape

(36,)

In [236]:
losses_leaf_wtleaf2[512]
#this is still different than in previous notebook godzilla
#why are they so different from each other... check predictions in evalindices
#generate those predictions manually

array([[0.21638183, 1.14073591],
       [0.21026201, 1.32445258],
       [0.2399193 , 1.32447863],
       [0.30518207, 1.32447863],
       [0.48245589, 1.32447863]])

In [237]:
np.nanmax(nwloocv_leaf_wtleaf2[sid])

0.970118522644043

In [261]:

nwloocv_leaf_wtleaf2[512][0].sum(axis = 1)

array([2.13929893, 2.13929893, 2.13929893, 2.13929893,        nan,
       2.13929893, 2.13929893, 2.13929893, 2.13929893,        nan,
       2.13929893, 3.26033067, 2.13929893, 2.13929893,        nan,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       2.13929893, 2.41753067, 2.13929893, 2.13929893,        nan,
       2.68548563, 2.68548563, 2.68548563, 2.68548563,        nan,
       0.        , 2.68548563, 1.3779863 , 2.13929893, 1.92727318,
       0.        ])

In [279]:
losses_leaf_wtleaf2[512]

array([[0.21638183, 1.14073591],
       [0.21026201, 1.32445258],
       [0.2399193 , 1.32447863],
       [0.30518207, 1.32447863],
       [0.48245589, 1.32447863]])

In [281]:
get_loss_paper(nwloocv_leaf_wtleaf2[512][0,21],nwloocv_leaf_wtleaf2[512][0,11])

1.1426680081123983

In [283]:
get_loss_paper(msvds[sid].reg_proj_vcount_norm_renorm[21],nwloocv_leaf_wtleaf2[512][0,21])

1.1407359061086297

In [284]:
get_loss_paper(msvds[sid].reg_proj_vcount_norm_renorm[11],nwloocv_leaf_wtleaf2[512][0,11])

0.21638183384258114

In [322]:
#these predictions don't seem right... messed up indices
sid = 512
msvds[sid].loocv_predictions_leaf_wtleaf2 = get_nwloocv_predictions_multimodel_merge(msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                                   msvds[sid].centroids, 
                                                                                   gammas, 
                                                                                   indices_wtinleaf2ormore[sid], 
                                                                                    indices_wtinleaf2ormore[sid])


(13, 5, 36, 577)




In [323]:
get_loss_paper(msvds[sid].reg_proj_vcount_norm_renorm[11],msvds[512].loocv_predictions_leaf_wtleaf2[0,11])

0.23437620792567018

In [324]:
get_loss_paper(msvds[sid].reg_proj_vcount_norm_renorm[21],msvds[512].loocv_predictions_leaf_wtleaf2[0,21])

0.23437622420714097

In [310]:
msvds[512].loocv_predictions_leaf_wtleaf2.shape

(13, 5, 36, 577)

In [None]:
def get_nwloocv_predictions_singlemodel(projections, centroids, gamma, model_indices, eval_indices):
    
#def get_loocv_predictions(projections, centroids, gamma):
    
#     projections = np.asarray(projections, dtype=np.float32)
#     neval = len(eval_indices)
#     #nexp = centroids.shape[0]
#     predictions = np.empty(projections.shape)
#     weights = get_weights(centroids, gamma)
    
#     for i in range(neval):
#         otherindices = np.setdiff1d(model_indices, eval_indices[i])
#         #this order of operations is the fastest I found
#         weights_i = weights[eval_indices[i]][model_indices] / weights[model_indices[i]][otherindices].sum()
#         weights_i[i] = 0
#         weights_i = np.asarray(weights_i, dtype=np.float32)
#         pred = np.dot(weights_i, projections[model_indices])
#         predictions[i] = pred

    eval_index_val = np.where(eval_indices == 1)[0]
    model_index_val = np.where(model_indices == 1)[0]
    
    projections = np.asarray(projections, dtype=np.float32)
    
    nmod_ind = len(model_index_val)
    neval = len(eval_index_val)
    #nexp = centroids.shape[0]
    predictions = np.empty(projections.shape)
    #print(model_index_val.shape, eval_index_val.shape)

    if len(model_index_val) > 0 and  len(eval_index_val) > 0:
        weights = pairwise_kernels(centroids[model_index_val], centroids[eval_index_val], metric='rbf', gamma=gamma, filter_params=True) #get_weights(centroids, gamma)
        for i in range(neval):
            matchindex = np.where(model_index_val == eval_index_val[i])[0]
            otherindices = np.setdiff1d(np.asarray(list(range(nmod_ind))), matchindex)         
            #this order of operations is the fastest I found
            weights_i = weights[:,i] / weights[:,i][otherindices].sum()
            weights_i[matchindex] = 0
            weights_i = np.asarray(weights_i, dtype=np.float32)
            pred = np.dot(weights_i, projections[model_index_val])
            predictions[eval_index_val[i]] = pred

        
    return(predictions)    

def get_nwloocv_predictions_multimodel(projections, centroids, gammas, model_index_matrix, eval_index_matrix):
    

    
    ntargets = projections.shape[1]
    nexp = projections.shape[0]
    nmodels = model_index_matrix.shape[0]
    ngammas = len(gammas)
    
    projections = np.asarray(projections, dtype=np.float32)
    predictions = np.empty((nmodels, ngammas, nexp, ntargets))
    
    
    for m in range(nmodels):
        #print('m', m, len(np.where(model_index_matrix[m] ==1)[0]))
        predictions[m] = np.asarray([get_nwloocv_predictions_singlemodel(projections, centroids, gammas[g], model_index_matrix[m], eval_index_matrix[m]) for g in range(ngammas)])
    
    return(predictions)  

def combine_predictions(predictions, eval_index_matrix):
    
    nmodels, ngammas, nexp, ntargets = predictions.shape
    combined_predictions = np.empty((ngammas, nexp, ntargets))
    for m in range(nmodels):
        combined_predictions[:,np.where(eval_index_matrix[m] == 1)[0]] = predictions[m][:,np.where(eval_index_matrix[m] == 1)[0]]
        
    return(combined_predictions)

def get_nwloocv_predictions_multimodel_merge(projections, centroids, gammas, model_index_matrix, eval_index_matrix):
    
    predictions_unmerged = get_nwloocv_predictions_multimodel(projections, centroids, gammas, model_index_matrix, eval_index_matrix)
    print(predictions_unmerged.shape)
    predictions_merged = combine_predictions(predictions_unmerged, eval_index_matrix)
    
    return(predictions_merged)

In [297]:
np.where(eval_index_matrix[m] == 1)[0]


array([11, 21])

In [None]:
#so theres only 2 points worth evaluating?  34 in person distinct cre leaf combos
#there 2 leafs with double... so how is it 2 and not 4?

In [64]:
#this is consisntent with old result
meanloss_nw_finest_finest2

array([0.78692754, 0.29068181, 0.35803372, 0.27034826, 0.44147834,
       0.49263346, 0.31888494, 0.3827807 , 0.34821835, 0.49121184,
       0.64899939, 0.3360901 ])

In [70]:
#why not this??
meanloss_nw_leaf_wtleaf2

array([1.98668425,        nan, 0.63826926, 1.84877534, 0.33209361,
       1.78119019, 1.25544011, 0.10097336,        nan, 1.80148266,
       0.19512863, 1.63289607])

In [72]:
nwloocv_leaf_wtleaf2[512].shape

(5, 36, 577)

In [74]:
nwloocv_leaf_wtleaf2[512][0][0]

array([3.12427152e-03, 2.09801419e-05, 1.76919217e-03, 1.08384375e-35,
       0.00000000e+00, 9.42581147e-03, 1.22441968e-04, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 4.75900888e-05,
       8.56435072e-05, 2.20537637e-04, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 8.98282230e-03, 0.00000000e+00,
       1.94945571e-03, 0.00000000e+00, 4.25323188e-01, 7.72098952e-04,
       2.99081206e-03, 5.26415708e-04, 9.23750486e-05, 0.00000000e+00,
       8.47871415e-04, 0.00000000e+00, 5.58666179e-05, 2.31866681e-04,
       6.47603825e-04, 0.00000000e+00, 6.13230368e-05, 1.58015191e-05,
       0.00000000e+00, 1.06011739e-05, 1.36638922e-03, 4.21375917e-05,
       1.15039665e-03, 2.97770498e-07, 0.00000000e+00, 2.02284878e-06,
       2.12574087e-05, 6.46157772e-04, 4.58102766e-03, 0.00000000e+00,
       7.41571785e-05, 1.88214832e-03, 2.07855846e-05, 0.00000000e+00,
       2.91276792e-05, 0.00000000e+00, 0.00000000e+00, 2.30481179e-04,
      

In [None]:
#godzilla
#it appears i didnt restrict the 'creleaf' set enough and actually computed wildtypes in major structure
#we have 6 wtinleaf2ormore

In [239]:
indices_leaf[512].shape

(13, 36)

In [81]:
    sid = 512
    msvds[sid].loocv_predictions_leaf_wtleaf2 = get_nwloocv_predictions_multimodel_merge(msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                                       msvds[sid].centroids, 
                                                                                       gammas, 
                                                                                       indices_leaf[sid], 
                                                                                        indices_wtinleaf2ormore[sid])


(13, 5, 36, 577)




In [246]:
eval_index_matrix.sum(axis = 1) #this is right

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2])

In [102]:
msvds[sid].loocv_predictions_leaf_wtleaf2[0].max()

0.9156172275543213

In [100]:
msvds[sid].loocv_predictions_leaf_wtleaf2.shape

(5, 36, 577)

In [316]:
sid = 512
projections = msvds[sid].reg_proj_vcount_norm_renorm

centroids =                                   msvds[sid].centroids
gamma = gammas[0]
model_index_matrix=                             indices_wtinleaf2ormore[sid]
eval_index_matrix=                indices_wtinleaf2ormore[sid]
model_indices = model_index_matrix[0]
eval_indices = eval_index_matrix[0]

In [317]:
msvds[512].loocv_predictions_leaf_wtleaf2 = get_nwloocv_predictions_multimodel(projections, centroids, gammas, model_index_matrix, eval_index_matrix)



In [302]:
get_loss_paper(asdf[-1][0][21,:], projections[21])

0.23437621546526288

In [None]:
def get_nwloocv_predictions_multimodel_merge(projections, centroids, gammas, model_index_matrix, eval_index_matrix):
    
    predictions_unmerged = get_nwloocv_predictions_multimodel(projections, centroids, gammas, model_index_matrix, eval_index_matrix)
    print(predictions_unmerged.shape)
    predictions_merged = combine_predictions(predictions_unmerged, eval_index_matrix)
    
    return(predictions_merged)

In [319]:
msvds[512].loocv_predictions_leaf_wtleaf2 =  get_nwloocv_predictions_multimodel_merge(projections, centroids, gammas, model_index_matrix, eval_index_matrix)

(13, 5, 36, 577)




In [305]:
get_loss_paper(asdf[0][21,:], projections[21])

0.23437621546526288

In [306]:
get_loss_paper(asdf[0][11,:], projections[11])

0.234376212459474

In [294]:
    eval_index_val = np.asarray([11,21])#np.where(eval_indices == 1)[0]
    model_index_val = np.asarray([11,21])#np.where(model_indices == 1)[0]
    
    projections = np.asarray(projections, dtype=np.float32)
    
    nmod_ind = len(model_index_val)
    neval = len(eval_index_val)
    #nexp = centroids.shape[0]
    predictions = np.empty(projections.shape)
    #print(model_index_val.shape, eval_index_val.shape)

    if len(model_index_val) > 0 and  len(eval_index_val) > 0:
        weights = pairwise_kernels(centroids[model_index_val], centroids[eval_index_val], metric='rbf', gamma=gamma, filter_params=True) #get_weights(centroids, gamma)
        for i in range(neval):
            matchindex = np.where(model_index_val == eval_index_val[i])[0]
            otherindices = np.setdiff1d(np.asarray(list(range(nmod_ind))), matchindex)         
            #this order of operations is the fastest I found
            weights_i = weights[:,i] / weights[:,i][otherindices].sum()
            weights_i[matchindex] = 0
            weights_i = np.asarray(weights_i, dtype=np.float32)
            pred = np.dot(weights_i, projections[model_index_val])
            print(pred.max())
            predictions[eval_index_val[i]] = pred


0.8285275
0.9855121


In [295]:
get_loss_paper(predictions[11],predictions[21])

0.2343762122189106

In [296]:
get_loss_paper(predictions[21],predictions[11])

0.2343762122189106

In [140]:
model_index_val

array([], dtype=int64)

In [144]:
eval_index_matrix.sum(axis = 1)

array([0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 2, 0])

In [145]:
model_index_matrix.sum(axis = 1)

array([0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 2, 0])

In [118]:
model_index_matrix.sum(axis = 1)

array([3, 2, 1, 1, 1, 2, 1, 6, 1, 2, 5, 2, 9])

In [131]:
model_index_matrix.sum(axis = 1).sum()

36

In [132]:
eval_index_matrix.sum(axis = 1)

array([0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 2, 0])

In [135]:
model_index_matrix.sum(axis = 1).sum()

36

In [274]:
    predictions_unmerged = get_nwloocv_predictions_multimodel(projections, centroids, gammas, model_index_matrix, eval_index_matrix)




In [275]:
predictions = predictions_unmerged

In [276]:
    nmodels, ngammas, nexp, ntargets = predictions.shape
    combined_predictions = np.empty((ngammas, nexp, ntargets))
    for m in range(nmodels):
        combined_predictions[:,np.where(eval_index_matrix[m] == 1)[0]] = predictions[m][:,np.where(eval_index_matrix[m] == 1)[0]]


In [278]:
get_loss_paper(combined_predictions[0,11,:],combined_predictions[0,21,:])

0.2343762122189106

In [103]:
projections[0].max()


0.6157449559467582

In [89]:
predictions_merged

NameError: name 'predictions_merged' is not defined

In [78]:
msvds[sid].loocv_predictions_leaf_wtleaf2[0].max()

0.9156172275543213

In [82]:
msvds[sid].loocv_predictions_leaf_wtleaf2[0].shape

(36, 577)

In [9]:
meanloss_nw_major_finest2

NameError: name 'meanloss_nw_major_finest2' is not defined

In [65]:
losses = np.asarray([meanloss_nw_major_major,
                     meanloss_nw_finest_finest2, 
           meanloss_nw_summary_finest2,
            meanloss_nw_major_finest2  ,
                    meanloss_nw_wtleaf_wtleaf2,
                    meanloss_nw_leaf_wtleaf2,
                    meanloss_nw_leaf_wtleaf]).transpose()


losses2 = losses[[4,7,2,1,10,9,11,3,5,8,6,0]]
loss =pd.DataFrame(losses2)#, columns = ['all','allwt'])

In [66]:
loss

Unnamed: 0,0,1,2,3,4,5,6
0,0.430291,0.441478,0.437777,0.761299,0.185551,0.332094,0.332094
1,0.446265,0.382781,0.382707,0.241034,0.099833,0.100973,0.100973
2,0.359464,0.358034,0.318438,0.679845,0.137924,0.638269,0.638269
3,0.777379,0.290682,0.290682,0.290682,,,1.437976
4,0.690856,0.648999,0.648954,1.007649,0.131307,0.195129,0.209115
5,0.530277,0.491212,0.491212,0.763025,0.518663,1.801483,1.049773
6,0.428704,0.33609,0.337409,0.926016,0.557145,1.632896,1.248982
7,0.293773,0.270348,0.270348,0.828003,0.155343,1.848775,1.848775
8,0.47136,0.492633,0.453227,1.648834,0.165519,1.78119,1.667228
9,0.45657,0.348218,0.348218,0.614635,,,0.905921


In [47]:
#losses = losses_finest_finest2
#hyps = best_gamma_finest_finest2

def get_best_hyperparameters(losses, keys):
    
    major_structure_ids = np.asarray(list(losses.keys()))
    nms = len(major_structure_ids)
    nkey = keys.shape[1]
    output = np.empty((nms, nkey))
    for m in range(nms):
        print(m)
        sid = major_structure_ids[m]
        lvec = np.asarray([np.nanmean(losses[sid][key]) for key in keys])
        if np.any(~np.isnan(lvec)):
            output[m] = keys[np.nanargmin(lvec)]
        # if len(np.where(np.isnan(np.nanmean(losses[sid][:,:], axis = 1)))[0]) < losses[sid].shape[0]:
        #    output[m] = np.nanargmin(np.nanmean(losses[sid][:,:], axis = 1))

    output = np.asarray(output, dtype=int)
    return(output)
    
def get_loss_best_hyp(losses, hyps):
    major_structure_ids = np.asarray(list(losses.keys()))
    nms = len(major_structure_ids)
    output = np.zeros(nms)
    for m in range(nms):
        sid = major_structure_ids[m]
        output[m] = np.nanmean(losses[sid][hyps[m], :])
    return (output)

In [74]:
    major_structure_ids = np.asarray(list(losses.keys()))
    nms = len(major_structure_ids)
    output = np.zeros(nms)
    for m in range(nms):
        sid = major_structure_ids[m]
        output[m] = np.nanmean(losses[sid][hyps[m], :])


TypeError: 'NoneType' object is not subscriptable

In [80]:
best_gamma_finest_finest2

In [67]:
m = 0
sid = major_structure_ids[m]
lvec = np.asarray([np.nanmean(losses[sid][key]) for key in keys])


  This is separate from the ipykernel package so we can avoid doing imports until


True

In [48]:
m = 1
sid = major_structure_ids[m]
lvec = np.asarray([np.nanmean(losses[sid][key]) for key in keys])


  This is separate from the ipykernel package so we can avoid doing imports until


In [49]:
losses[sid]

array([], shape=(5, 0), dtype=float64)

In [43]:
losses[sid]

array([], shape=(5, 0), dtype=float64)

In [36]:
lvec

array([0.24061124, 0.24061124, 0.24061124, 0.24061124,        nan])

In [19]:
# #Set gammas for crossvalidation
# gammas = np.asarray([0.1,.5,1,2,10])

# for sid in major_structure_ids:
#     print(sid)
#     msvds[sid].loocv_predictions_all = get_loocv_predictions_code(projections = msvds[sid].regional_projection_vcount_norm_renorm, 
#                                                                  centroids = msvds[sid].centroids,
#                                                                  gammas = gammas)    
#     msvds[sid].loocv_predictions_leaf = get_loocv_predictions_code(projections = msvds[sid].reg_proj_vcount_norm_renorm, 
#                                                                  centroids = msvds[sid].centroids,
#                                                                  gammas = gammas,
#                                                                  codes = np.expand_dims(leafs[sid], axis = 1))
#     msvds[sid].loocv_predictions_cre = get_loocv_predictions_code(projections = msvds[sid].reg_proj_vcount_norm_renorm, 
#                                                                  centroids = msvds[sid].centroids,
#                                                                  gammas = gammas,
#                                                                  codes=np.expand_dims(np.asarray(creline[sid], dtype = str), axis = 1))
#     msvds[sid].loocv_predictions_creleaf = get_loocv_predictions_code(projections = msvds[sid].reg_proj_vcount_norm_renorm, 
#                                                                  centroids = msvds[sid].centroids,
#                                                                  gammas = gammas,
#                                                                  codes=np.asarray(np.vstack([leafs[sid], creline[sid]]), dtype = str).transpose())
    
# #this function is not suitable for generating predictions based off of a class that shouldn't itself be used for the prediction
# #the codes are used to segreate
# #we need to have two codes.
# #the first code denotes the different levels of model
# #the second should be a dictionary with key of the first code saying whos model we should use

In [None]:
#every experiment can be specified by an include 1 0 in model and include in evaluation 1 0 

In [6]:

# def get_loocv_predictions_code(projections, centroids, gammas, codes=None):

#     ngam = len(gammas)
#     if codes is None:
#         # print('yehh')
#         codes = np.zeros((projections.shape[0], 1))

#     # print(codes)
#     unique_codes = np.unique(codes, axis=1)
#     predictions = np.empty(np.append(ngam, projections.shape))

#     for c in range(len(unique_codes)):
#         print(unique_codes[c])
#         code_ind = np.where(codes == unique_codes[c])[0]
#         if len(code_ind) > 1:
#             predictions[:, code_ind] = np.asarray(
#                 [get_loocv_predictions(projections[code_ind], centroids[code_ind], gammas[g]) for g in range(ngam)])

#     return (predictions)



In [7]:
#codes are the levels for models and evals
#modelcodes are to be included in the model
#evalcodes are to be included in evalcodes
def generate_model_eval_indices(codes, modelcodes = None, evalcodes = None):
    
    if modelcodes is None:
        modelcodes = np.unique(np.asarray(list(codes.values())))
    if evalcodes is None:
        evalcodes = np.unique(np.asarray(list(codes.values())))
        
    for sid in np.asarray(list(codes.keys())):
        nexp = len(codes)
        #nmodels = len(modelcodes[sid])
        model_indices = np.zeros((nexp)) #np.asarray((nmodels, nexp))
        eval_indices = np.zeros((nexp)) #np.asarray((nmodels, nexp))
        #for c in range(nmodels):
        model_indices[np.where(np.isin(codes, modelcodes))[0]] = 1
        eval_indices[np.where(np.isin(codes, evalcodes))[0]] = 1
    
    return(model_indices, eval_indices)

In [8]:
def get_indices(codelist, testcode):
    
    output = np.zeros(len(codelist))
    output[np.where(np.isin(codelist, testcode))[0]] = 1
    
    return(output)

In [9]:
# def get_indices(codelist, testcode, codelist2, testcode2):
    
#     output = np.zeros(len(codelist))
#     output[np.where(np.isin(codelist, testcode))[0]] = 1
    
#     return(output)

In [11]:
# for sid in major_structure_ids

#     creline_options = np.unique(creline[sid])
#     for creline_option in creline_options:
#         model_indices = get_indices(creline[sid], creline_option)
#         eval_indices = get_indices(creline[sid], creline[sid][i])

In [12]:
#can

In [13]:
# model_indices is matrix of dimension nmodels x nexperiments
# eval_indices is matrix of dimension nmodels x nexperiments
# each leaf gets its own model

In [14]:
#get the indices of different leafs
#eval_indices = get_indices(leafs[sid])
#get the indices of the wts in that leaf
#model_indices = get_indices2(creline[sid], np.asarray(['C57BL/6J']),leafs[sid])

In [15]:
creleafs = {}
creleafs_merged = {}
for sid in major_structure_ids:
    creleafs[sid] = np.asarray(np.vstack([leafs[sid], creline[sid]]), dtype = str).transpose()
    creleafs_merged[sid] = [creleafs[sid][:,0][i]  + creleafs[sid][:,1][i] for i in range(creleafs[sid].shape[0])]

TypeError: get_indices() missing 1 required positional argument: 'testcode'

AttributeError: 'VoxelDataset' object has no attribute 'loocv_predictions_wtleaf_wtleaf2'

0
1
2
3
4
5
6
7
8
9
10
11


In [275]:
#these were the answers before... why changed???
mean_nw_all

array([0.78692754, 0.29068181, 0.35803372, 0.27034826, 0.44147834,
       0.49263346, 0.31888494, 0.3827807 , 0.34821835, 0.49121184,
       0.64899939, 0.3360901 ])

In [91]:
meanloss_nw_finest_finest2

array([0.7597419 , 0.47051428, 0.35582579, 0.28493421, 0.41721117,
       0.47354946, 0.35344191, 0.48337169, 0.41948756, 0.52509813,
       0.6907309 , 0.42712092])

In [92]:
major_structures

['CB',
 'CTXsp',
 'HPF',
 'HY',
 'Isocortex',
 'MB',
 'MY',
 'OLF',
 'P',
 'PAL',
 'STR',
 'TH']

In [282]:
from sklearn.metrics.pairwise import pairwise_kernels
from mcmodels.regressors.nonparametric.nadaraya_watson import get_weights


def get_weights(eval_centroids, model_centroids, gamma):
    weights = pairwise_kernels(X=eval_centroids, Y=model_centroids, metric='rbf', gamma=gamma, filter_params=True)
    return (weights)


def get_indices(ids):

    ids_unique = np.unique(ids)
    output = np.zeros((len(ids_unique), len(ids)), dtype = int)
    for i in range(len(ids_unique)):
        output[i,np.where(ids == ids_unique[i])[0] ] = 1
    return(output)

#get indices of firstlist in firstlisttest in categories defined by secondlist
def get_indices2(firstlist, firstlisttest, secondlist):
    
    sl_unique = np.unique(secondlist)
    output = np.zeros((len(sl_unique), len(secondlist)), dtype = int)
    for i in range(len(sl_unique)):
        output[i,np.intersect1d(np.where(np.isin(firstlist,firstlisttest))[0], np.where(secondlist == secondlist[i])[0])] = 1
    return(output)

#nmodels = nleafs
#populate each with experiments that share summary structure
def get_indices_summaryinleaf(summarylist , leaflist):
    
    nexp = len(leaflist)
    leaf_unique = np.unique(leaflist)
    output = np.zeros((len(leaf_unique), nexp), dtype = int)
    
    for i in range(len(leaf_unique)):
        
        summary = summarylist[np.where(leaflist == leaf_unique[i])[0]][0]
        output[i,np.where(summarylist == summary)[0]] = 1
        
        
    return(output)

#get predictions at all eval_indices using model_indices
#if an eval_indices is also a model indice, leave it out of the model
#if a model index is not an eval index, it never gets left out
def get_nwloocv_predictions_singlemodel(projections, centroids, gamma, model_indices, eval_indices):
    
#def get_loocv_predictions(projections, centroids, gamma):
    
#     projections = np.asarray(projections, dtype=np.float32)
#     neval = len(eval_indices)
#     #nexp = centroids.shape[0]
#     predictions = np.empty(projections.shape)
#     weights = get_weights(centroids, gamma)
    
#     for i in range(neval):
#         otherindices = np.setdiff1d(model_indices, eval_indices[i])
#         #this order of operations is the fastest I found
#         weights_i = weights[eval_indices[i]][model_indices] / weights[model_indices[i]][otherindices].sum()
#         weights_i[i] = 0
#         weights_i = np.asarray(weights_i, dtype=np.float32)
#         pred = np.dot(weights_i, projections[model_indices])
#         predictions[i] = pred

    eval_index_val = np.where(eval_indices == 1)[0]
    model_index_val = np.where(model_indices == 1)[0]
    
    projections = np.asarray(projections, dtype=np.float32)
    
    nmod_ind = len(model_index_val)
    neval = len(eval_index_val)
    #nexp = centroids.shape[0]
    predictions = np.empty(projections.shape)
    #print(model_index_val.shape, eval_index_val.shape)

    if len(model_index_val) > 0 and  len(eval_index_val) > 0:
        weights = pairwise_kernels(centroids[model_index_val], centroids[eval_index_val], metric='rbf', gamma=gamma, filter_params=True) #get_weights(centroids, gamma)
        for i in range(neval):
            matchindex = np.where(model_index_val == eval_index_val[i])[0]
            otherindices = np.setdiff1d(np.asarray(list(range(nmod_ind))), matchindex)         
            #this order of operations is the fastest I found
            weights_i = weights[:,i] / weights[:,i][otherindices].sum()
            weights_i[matchindex] = 0
            weights_i = np.asarray(weights_i, dtype=np.float32)
            pred = np.dot(weights_i, projections[model_index_val])
            predictions[eval_index_val[i]] = pred

        
    return(predictions)    

def get_nwloocv_predictions_multimodel(projections, centroids, gammas, model_index_matrix, eval_index_matrix):
    

    
    ntargets = projections.shape[1]
    nexp = projections.shape[0]
    nmodels = model_index_matrix.shape[0]
    ngammas = len(gammas)
    
    projections = np.asarray(projections, dtype=np.float32)
    predictions = np.empty((nmodels, ngammas, nexp, ntargets))
    
    
    for m in range(nmodels):
        #print('m', m, len(np.where(model_index_matrix[m] ==1)[0]))
        predictions[m] = np.asarray([get_nwloocv_predictions_singlemodel(projections, centroids, gammas[g], model_index_matrix[m], eval_index_matrix[m]) for g in range(ngammas)])
    
    return(predictions)  

def get_nwloocv_predictions_multimodel_merge(projections, centroids, gammas, model_index_matrix, eval_index_matrix):
    
    predictions_unmerged = get_nwloocv_predictions_multimodel(projections, centroids, gammas, model_index_matrix, eval_index_matrix)
    predictions_merged = combine_predictions(predictions_unmerged, eval_index_matrix)
    
    return(predictions_merged)
#we should not pass model_index_matrices that are identical to eval_index_matrices and have only 1 element per model
#can do automatically in the cross validation code but would rther do it explicitly to ensure identical indexing b/w experiments
#1 model leads to removing the model index from eval indices
#should never have no elements in model_indices
def screen_indices(model_indices, eval_indices):
    
    eval_indices2 = eval_indices.copy()
    mod_loc = np.where(model_indices == 1)[0]
    if len(mod_loc) == 1:
        eval_indices2[mod_loc] = 0
    return(eval_indices2)

#this will not result in certain models having no indices, but could result in an empty eval index.  cactch later
#this will result in certain indices having no prediction.  this is fine.
#can merge (sum) the index matrix to see where predictions are actually generated
def screen_index_matrices(model_index_matrices, eval_index_matrices):
    
    nmodels = model_index_matrices.shape[0]
    eval_index_matrices2 = eval_index_matrices.copy()
    for m in range(nmodels):
        eval_index_matrices2[m] = screen_indices(model_index_matrices[m], eval_index_matrices[m])
    
    return(eval_index_matrices2)

#need code for removing experiments that have no model
#this can happen when the model set is a subset of the evaluation set.
#we will therefore generate predictions for a subset
#given a leaf is included, the eval set is the same
#however, we want to remove evals in leaves we don't have a wt for... of course one could say we are doing worse...
#but we also have a fewer number of models
def screen_index_matrices2(model_index_matrices, eval_index_matrices):
    
    nmodels = model_index_matrices.shape[0]
    include_per_model = model_index_matrices.sum(axis= 1)
    to_include = np.where(include_per_model > 0)[0]
    
    model_index_matrices2 = model_index_matrices
    eval_index_matrices2 = eval_index_matrices[to_include]
    model_index_matrices2 = model_index_matrices[to_include]
    
    return(model_index_matrices2, eval_index_matrices2)

In [188]:
a= np.zeros(2)
b = a.copy()
b[1] = 1.
print(a)

[0. 0.]


In [None]:
msvds[sid].loocv_predictions_model = get_nwloocv_predictions_multimodel(projections = msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                 centroids = msvds[sid].centroids,
                                                                 gammas = gammas,
                                                                model_indices = np.where(model_indices[c] == 1)[0]
                                                                eval_indices = np.where(eval_indices[c] == 1)[0])
                                                                 #codes=np.asarray(np.vstack([leafs[sid], creline[sid]]), dtype = str).transpose())


In [None]:
#get indices where creline is wt
get_indices(creleafs, creline, wt)

In [None]:
nmodels = number of leafs

In [None]:
eval_indicator = leaf
model_indicator = creleafs[leaf][wt]

In [None]:
codes = leafs
for leaf in leafs[sid]:
    modelcodes = np.asarray([leaf])
    evalcodes = np.asarray([leaf])
    generate_model_eval_indices(codes, modelcodes = None, evalcodes = None)

In [35]:
creleaf = np.asarray(np.vstack([leafs[sid], creline[sid]]), dtype = str).transpose()

In [None]:
#generate_model_eval_indices(leafs[sid], leafs[sid][i], leafs[sid][i])

for i in range(nexp):
    #get the model and evaluation indices for the model of the ith creleaf (i.e. sharing its code)
    #and evaluate on the ith leaf
    generate_model_eval_indices(creleafs[sid], creleafs[sid][i], leafs[sid], leafs[sid][i])

In [34]:
with open('data/info/leafs.pickle', 'rb') as handle:
    leafs = pickle.load(handle)
    

In [None]:
def merge_models(modelcodes):
    
    
    generate_model_eval_indices(codes, modelcodes = None, evalcodes = leafs[sid])

In [20]:
wt = np.asarray(['C57BL/6J'])
leafs
wt_wt_inds = generate_model_eval_indices(creline[sid], wt, wt)
#leafs wil
wt_wt_inds_leafindices = generate_model_eval_indices(creline, wt, wt)

all_all_inds = generate_model_eval_indices(codes)
all_wt_inds = generate_model_eval_indices(codes, evalcodes = wt)
wt_all_inds = generate_model_eval_indices(codes, modelcodes = wt)

NameError: name 'codes' is not defined

In [None]:
def generate_model_eval_indices(codes, codescompare):
    
    nmodels = len(modelcodes[sid])
    for c in range(nmodels):

In [None]:

def get_loocv_predictions_code(projections, centroids, gammas, codes=None):

    ngam = len(gammas)
    if codes is None:
        # print('yehh')
        codes = np.zeros((projections.shape[0], 1))

    # print(codes)
    unique_codes = np.unique(codes, axis=1)
    predictions = np.empty(np.append(ngam, projections.shape))
    
    #unique_codes should be array of binary variables indicating model_indices and eval_indices for a given model.
    for c in range(len(unique_codes)):
        print(unique_codes[c])
        code_ind = np.where(codes == unique_codes[c])[0]
        #if len(model_indices) > 1:
        predictions[:, eval_indices] = np.asarray([get_loocv_predictions(projections, centroids, gammas[g], model_indices, eval_indices) for g in range(ngam)])

    return (predictions)



In [58]:
from itertools import product

In [60]:
list(product(list(range(2)), list(range(3))))

[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)]

In [53]:
list(enumerate([range(5), range(2)]))

[(0, range(0, 5)), (1, range(0, 2))]

In [None]:
def get_loocv_predictions_code(projections, centroids, gammas, codes = None):

    if codes is None:
        #print('yehh')
        codes = np.zeros((projections.shape[0], 1))
        
    #print(codes)
    unique_codes = np.unique(codes, axis = 1)
    predictions = np.empty(np.append(ngam,projections.shape))
    
    for c in range(len(unique_codes)):
        print(unique_codes[c])
        code_ind = np.where(codes == unique_codes[c])[0]
        if len(code_ind) >1:
            predictions[:,code_ind] = np.asarray([get_loocv_predictions(projections[code_ind], centroids[code_ind], gammas[g]) for g in range(ngam)])
    
    return(predictions)

In [None]:
    #should a model be (everyone his own cre)
    #or, a single set of indices
    #if we want to merge models to evaluate loss of a compound model, we should do that at a later state
    #do this for leafs as well
    nmodels = model_indices.shape[0]
    msvds[sid].loocv_predictions_model = np.empty((nmodels, ntargets))
    for c in range(nmodels):
        msvds[sid].loocv_predictions_model[c] = get_loocv_predictions_modeleval(projections = msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                 centroids = msvds[sid].centroids,
                                                                 gammas = gammas,
                                                                model_indices = np.where(model_indices[c] == 1)[0]
                                                                eval_indices = np.where(eval_indices[c] == 1)[0])
                                                                 #codes=np.asarray(np.vstack([leafs[sid], creline[sid]]), dtype = str).transpose())
