In [1]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
import pandas as pd 
import sys
import pickle
import itertools
import seaborn as sns
import matplotlib.pyplot as plt

workingdirectory = os.popen('git rev-parse --show-toplevel').read()[:-1]
sys.path.append(workingdirectory)
os.chdir(workingdirectory)

import allensdk.core.json_utilities as ju
from allensdk.core.mouse_connectivity_cache import MouseConnectivityCache

from mcmodels.core import Mask,ModelData,VoxelModelCache
from mcmodels.core.utils import get_structure_id, get_ordered_summary_structures,get_minorstructures,get_loss_paper
from mcmodels.utils import nonzero_unique, unionize
from mcmodels.core.experiment import get_voxeldata_msvd
from mcmodels.models.crossvalidation import get_best_hyperparameters,get_loss_best_hyp,get_loocv_predictions,get_loss
from mcmodels.core.utils import get_cre_status,get_minorstructure_dictionary,get_leaves_ontologicalorder
from mcmodels.core.utils import get_regionalized_normalized_data
from mcmodels.core.utils import get_connectivity
from mcmodels.core.utils import get_ontological_order_leaf
from mcmodels.core.utils import get_nw_loocv,get_wt_inds
from mcmodels.core.utils import get_countvec



In [2]:
#read data
TOP_DIR = '/Users/samsonkoelle/alleninstitute/mcm_2020/mcm_updated/'
INPUT_JSON = os.path.join(TOP_DIR, 'input_011520.json')
EXPERIMENTS_EXCLUDE_JSON = os.path.join(TOP_DIR, 'experiments_exclude.json')
FILE_DIR = '/Users/samsonkoelle/alleninstitute/mcm_2020/mcm_updated/'
OUTPUT_DIR = os.path.join(FILE_DIR, 'output')

input_data = ju.read(INPUT_JSON)
manifest_file = input_data.get('manifest_file')
manifest_file = os.path.join(TOP_DIR, manifest_file)
experiments_exclude = ju.read(EXPERIMENTS_EXCLUDE_JSON)

#its unclear why the hyperparameters are loaded from the output directory
cache = VoxelModelCache(manifest_file=manifest_file)
major_structures = input_data.get('structures')
major_structure_ids = [get_structure_id(cache, s) for s in major_structures]
data_info = pd.read_excel('/Users/samsonkoelle/alleninstitute/Whole Brain Cre Image Series_curation only.xlsx', 'all datasets curated_070919pull')
data_info.set_index("id", inplace=True)
ontological_order = get_ordered_summary_structures(cache)

mcc = MouseConnectivityCache(manifest_file = '../connectivity/mouse_connectivity_manifest.json')
st = mcc.get_structure_tree()
ai_map = st.get_id_acronym_map()
ia_map = {value: key for key, value in ai_map.items()}

#regionalize voxel model: compare with regional model
#regional parameters
cre = None
eid_set=None
high_res=False
threshold_injection = False

COARSE_STRUCTURE_SET_ID = 2
DEFAULT_STRUCTURE_SET_IDS = tuple([COARSE_STRUCTURE_SET_ID])
tree = cache.get_structure_tree()
default_structures = tree.get_structures_by_set_id(DEFAULT_STRUCTURE_SET_IDS)
default_structure_ids = [st['id'] for st in default_structures if st['id'] != 934]
#cre= True

In [3]:
msvds = {}
#gammas = np.asarray([0.1])
for sid in major_structure_ids:
    print(sid)
    voxel_data = ModelData(cache, sid)
    print(cre)
    experiment_ids = voxel_data.get_experiment_ids(experiments_exclude=experiments_exclude, cre=cre)
    experiment_ids = np.asarray(list(experiment_ids))    
    msvd = get_voxeldata_msvd(cache, sid,experiments_exclude,default_structure_ids,cre)
    #msvd.l2losses, msvd.paperlosses,msvd.normspredict,msvd.normtrue = single_region_cv(msvd, gammas)
    msvds[sid]  = msvd

512
None
703
None
1089
None
1097
None
315
None
313
None
354
None
698
None
771
None
803
None
477
None
549
None


In [5]:
from mcmodels.models.crossvalidation import get_loocv_predictions_nnlinear_number_inj_norm

In [None]:
# creline = get_cre_status(data_info, msvds)
# experiments_minor_structures = get_minorstructure_dictionary(msvds, data_info)
# leavves = get_leaves_ontologicalorder(msvd, ontological_order)

# # contra_key = msvd.experiments[list(msvd.experiments.keys())[0]].projection_mask.get_key(structure_ids=leavves, hemisphere_id=1)
# # ipsi_key = msvd.experiments[list(msvd.experiments.keys())[0]].projection_mask.get_key(structure_ids=leavves, hemisphere_id=2)

# key = list(msvd.experiments.keys())[0]
# contra_key = msvd.experiments[key].projection_mask.get_key(structure_ids=ontological_order, hemisphere_id=1)
# ipsi_key = msvd.experiments[key].projection_mask.get_key(structure_ids=ontological_order, hemisphere_id=2)

# msvds = get_regionalized_normalized_data(msvds,cache, ontological_order,ipsi_key,contra_key)
# thres_ncomp = np.asarray([[1e-10,0]])
# wt_2ormore = get_wt_inds(creline)

#get dictionaries of creline and leaf by experiment
creline = get_cre_status(data_info, msvds)
with open('data/info/leafs.pickle', 'rb') as handle:
    leafs = pickle.load(handle)
    
#get dictionary of minor structures for each experiment in each major division
#major division segregation is legacy code but convenient for fast cross validation in major division model
experiments_minor_structures = get_minorstructure_dictionary(msvds, data_info)

#get leaves in ontological order.  Where leafs don't exist, uses summary structure
ontological_order_leaves = get_leaves_ontologicalorder(msvd, ontological_order)

#Key isn't affected by which experiment we choose. This allows default masking to be inherited from the AllenSDK.
key = list(msvd.experiments.keys())[0]

#Identify keys denoting which voxels correspond to which structure in the ipsi and contra targets.
#contra_targetkey = msvd.experiments[list(msvd.experiments.keys())[0]].projection_mask.get_key(structure_ids=ontological_order_leaves, hemisphere_id=1)
#ipsi_targetkey = msvd.experiments[list(msvd.experiments.keys())[0]].projection_mask.get_key(structure_ids=ontological_order_leaves, hemisphere_id=2)
contra_targetkey = msvd.experiments[key].projection_mask.get_key(structure_ids=ontological_order, hemisphere_id=1)
ipsi_targetkey = msvd.experiments[key].projection_mask.get_key(structure_ids=ontological_order, hemisphere_id=2)

#get average intensities of projection structures given ipsi and contra keys
source_key = ontological_order_leaves#_leaves #only relevant here when injection needs to be unionized, but currently a required argument
msvds = get_regionalized_normalized_data(msvds,cache, source_key,ipsi_targetkey,contra_targetkey)

#wt_2ormore = get_wt_inds(creline)


In [150]:
from sklearn.metrics.pairwise import pairwise_kernels
from mcmodels.regressors.nonparametric.nadaraya_watson import get_weights

def get_weights(eval_centroids, model_centroids, gamma):
    weights = pairwise_kernels(X=eval_centroids, Y=model_centroids, metric='rbf', gamma=gamma, filter_params=True)
    return (weights)


def get_indices(ids):

    ids_unique = np.unique(ids)
    output = np.zeros((len(ids_unique), len(ids)), dtype = int)
    for i in range(len(ids_unique)):
        output[i,np.where(ids == ids_unique[i])[0] ] = 1
    return(output)

#get indices of firstlist in firstlisttest in categories defined by secondlist
def get_indices2(firstlist, firstlisttest, secondlist):
    
    sl_unique = np.unique(secondlist)
    output = np.zeros((len(sl_unique), len(secondlist)), dtype = int)
    for i in range(len(sl_unique)):
        output[i,np.intersect1d(np.where(np.isin(firstlist,firstlisttest))[0], np.where(secondlist == secondlist[i])[0])] = 1
    return(output)

#nmodels = nleafs
#populate each with experiments that share summary structure
def get_indices_summaryinleaf(summarylist , leaflist):
    
    nexp = len(leaflist)
    leaf_unique = np.unique(leaflist)
    output = np.zeros((len(leaf_unique), nexp), dtype = int)
    
    for i in range(len(leaf_unique)):
        
        summary = summarylist[np.where(leaflist == leaf_unique[i])[0]][0]
        output[i,np.where(summarylist == summary)[0]] = 1
        
        
    return(output)

#get predictions at all eval_indices using model_indices
#if an eval_indices is also a model indice, leave it out of the model
#if a model index is not an eval index, it never gets left out
# def get_homo_loocv_predictions_multimodel(projections, injections, hyperparameters, model_indices, eval_indices):
    
# #def get_loocv_predictions(projections, centroids, gamma):
    
# #     projections = np.asarray(projections, dtype=np.float32)
# #     neval = len(eval_indices)
# #     #nexp = centroids.shape[0]
# #     predictions = np.empty(projections.shape)
# #     weights = get_weights(centroids, gamma)
    
# #     for i in range(neval):
# #         otherindices = np.setdiff1d(model_indices, eval_indices[i])
# #         #this order of operations is the fastest I found
# #         weights_i = weights[eval_indices[i]][model_indices] / weights[model_indices[i]][otherindices].sum()
# #         weights_i[i] = 0
# #         weights_i = np.asarray(weights_i, dtype=np.float32)
# #         pred = np.dot(weights_i, projections[model_indices])
# #         predictions[i] = pred

#     eval_index_val = np.where(eval_indices == 1)[0]
#     model_index_val = np.where(model_indices == 1)[0]
    
#     projections = np.asarray(projections, dtype=np.float32)
    
#     nmod_ind = len(model_index_val)
#     neval = len(eval_index_val)
#     #nexp = centroids.shape[0]
#     predictions = np.empty(projections.shape)
#     #print(model_index_val.shape, eval_index_val.shape)

#     if len(model_index_val) > 0 and  len(eval_index_val) > 0:
#         weights = pairwise_kernels(centroids[model_index_val], centroids[eval_index_val], metric='rbf', gamma=gamma, filter_params=True) #get_weights(centroids, gamma)
#         for i in range(neval):
#             matchindex = np.where(model_index_val == eval_index_val[i])[0]
#             otherindices = np.setdiff1d(np.asarray(list(range(nmod_ind))), matchindex)         
#             #this order of operations is the fastest I found
#             weights_i = weights[:,i] / weights[:,i][otherindices].sum()
#             weights_i[matchindex] = 0
#             weights_i = np.asarray(weights_i, dtype=np.float32)
#             pred = np.dot(weights_i, projections[model_index_val])
#             predictions[eval_index_val[i]] = pred

        
#     return(predictions)    

def get_homo_loocv(msvd, indices, loocv, hyperparameters):

    if len(indices) > 1:
        projections = msvd.reg_proj_vcount_norm_renorm
        injections = msvd.reg_inj_vcount_norm / np.expand_dims(np.linalg.norm(msvd.reg_inj_vcount_norm, axis = 1),1)
        injections[np.where(np.isnan(injections))] = 0.
        
        nreg = projections.shape[1]
        nexp = projections.shape[0]
        nhyp = hyperparameters.shape[0]
        loocv_predictions = np.zeros((nhyp, nexp, nreg))
        for g in range(nhyp):
            loocv_predictions[g, indices] = loocv(projections[indices], injections[indices], *hyperparameters[g])
        return (loocv_predictions)
    else:
        return (np.asarray([]))
    

def get_homo_loocv_predictions_singlemodel(projections, injections, thresh, number, model_indices, eval_indices):
    
    eval_index_val = np.where(eval_indices == 1)[0]
    model_index_val = np.where(model_indices == 1)[0]
    
    projections = np.asarray(projections, dtype=np.float32)
    injections = np.asarray(injections, dtype=np.float32)
    injections = injections / np.expand_dims(np.linalg.norm(injections, axis = 1),1)
    injections[np.where(np.isnan(injections))] = 0.
    
    nexp = projections.shape[0]
    print('nexp',nexp)
    predictions = np.zeros(projections.shape)
    homo_est = HomogeneousModel(kappa=np.inf)
    nmod_ind = len(model_index_val)
    #neval = len(eval_index_val)
    if len(model_index_val) > 0 and  len(eval_index_val) > 0:
        
        for i in range(nexp):
            matchindex = np.where(model_index_val == eval_index_val[i])[0]
            otherindices = np.setdiff1d(np.asarray(list(range(nmod_ind))), matchindex)         

            print('exp', i)
            inj, inds = get_reduced_matrix_ninj(injections[otherindices], thresh, number)
            homo_est.fit(inj, projections[otherindices])
            pred = homo_est.predict(injections[i:(i + 1)][:, inds])
            predictions[i] = pred / np.linalg.norm(pred)

    return (predictions)

def get_homo_loocv_predictions_multimodel(projections, injections, hyperparameters, model_index_matrix, eval_index_matrix):
    

    
    ntargets = projections.shape[1]
    nexp = projections.shape[0]
    nmodels = model_index_matrix.shape[0]
    nhyp = hyperparameters.shape[0]
    
    projections = np.asarray(projections, dtype=np.float32)
    predictions = np.empty((nmodels, nhyp, nexp, ntargets))
    
    for m in range(nmodels):
        #print('m', m, len(np.where(model_index_matrix[m] ==1)[0]))
        predictions[m] = np.asarray([get_homo_loocv_predictions_singlemodel(projections, injections, hyperparameters[g][0], hyperparameters[g][1], model_index_matrix[m], eval_index_matrix[m]) for g in range(nhyp)])
    
    return(predictions)  

def get_homo_loocv_predictions_multimodel_merge(projections, injections, hyperparameters, model_index_matrix, eval_index_matrix):
    
    predictions_unmerged = get_homo_loocv_predictions_multimodel(projections, injections, hyperparameters, model_index_matrix, eval_index_matrix)
    predictions_merged = combine_predictions(predictions_unmerged, eval_index_matrix)
    
    return(predictions_merged)
#we should not pass model_index_matrices that are identical to eval_index_matrices and have only 1 element per model
#can do automatically in the cross validation code but would rther do it explicitly to ensure identical indexing b/w experiments
#1 model leads to removing the model index from eval indices
#should never have no elements in model_indices
def screen_indices(model_indices, eval_indices):
    
    eval_indices2 = eval_indices.copy()
    mod_loc = np.where(model_indices == 1)[0]
    if len(mod_loc) == 1:
        eval_indices2[mod_loc] = 0
    return(eval_indices2)

#this will not result in certain models having no indices, but could result in an empty eval index.  cactch later
#this will result in certain indices having no prediction.  this is fine.
#can merge (sum) the index matrix to see where predictions are actually generated
def screen_index_matrices(model_index_matrices, eval_index_matrices):
    
    nmodels = model_index_matrices.shape[0]
    eval_index_matrices2 = eval_index_matrices.copy()
    for m in range(nmodels):
        eval_index_matrices2[m] = screen_indices(model_index_matrices[m], eval_index_matrices[m])
    
    return(eval_index_matrices2)

#need code for removing experiments that have no model
#this can happen when the model set is a subset of the evaluation set.
#we will therefore generate predictions for a subset
#given a leaf is included, the eval set is the same
#however, we want to remove evals in leaves we don't have a wt for... of course one could say we are doing worse...
#but we also have a fewer number of models
def screen_index_matrices2(model_index_matrices, eval_index_matrices):
    
    nmodels = model_index_matrices.shape[0]
    include_per_model = model_index_matrices.sum(axis= 1)
    to_include = np.where(include_per_model > 0)[0]
    
    model_index_matrices2 = model_index_matrices
    eval_index_matrices2 = eval_index_matrices[to_include]
    model_index_matrices2 = model_index_matrices[to_include]
    
    return(model_index_matrices2, eval_index_matrices2)

In [20]:
indices_leaf = {}
indices_wtinleaf = {}
indices_wtleaf = {}
indices_summary = {}
indices_summaryinleaf = {}
indices_major = {}
indices_majorinleaf = {}
indices_leaf2ormore = {}
indices_wtinleaf2ormore = {}
for sid in major_structure_ids:
    
    #wt_leaf on leaf

    #get the indices of experiments sharing leafs (nmodels is number of leafs)
    indices_leaf[sid] = get_indices(leafs[sid]) #eval_indices
    indices_creleaf = get_indices(leafs[sid])
    
    #get the indices of the wts in the leaf (nmodels is number of leafs)
    indices_wtinleaf[sid] = get_indices2(creline[sid], np.asarray(['C57BL/6J']),leafs[sid]) #model_indices
    
    #get indices of experiments sharing summary structure x cre combination (nmodel is number of cre x leaf combinations)
    #indices_wtleaf[sid] = get_indices(creleafs_merged[sid])
    
    #get indices of experiments sharing summary structure(nmodel is number of summary structures)
    indices_summary[sid] = get_indices(experiments_minor_structures[sid])
    
    #get indices of experiments sharing major structure(nmodel is number of summary structures)
    indices_major[sid] = np.ones((1,experiments_minor_structures[sid].shape[0]))
    
    #get indices of experiments sharing same major structure as a leaf (nmodel is number of leafs)
    indices_majorinleaf[sid] = get_indices2(np.ones(len(leafs[sid])), np.asarray([1]),leafs[sid]) #model_indices
    
    #get indices of experiments in same summary structure as a leaf (nmodel is number of leafs)
    indices_summaryinleaf[sid] = get_indices_summaryinleaf(experiments_minor_structures[sid], leafs[sid])
    
    #evaluate models on leafs
    #model_indices, eval_indices = indices_majorinleaf, indices_leaf
    #model_indices, eval_indices = indices_summaryinleaf, indices_leaf
    #this is the most restrictive of these 3, so eval_indices_leaf2ormore is the smallest eval set
    indices_leaf2ormore[sid] = screen_index_matrices(indices_leaf[sid], indices_leaf[sid])
    
    indices_wtinleaf2ormore[sid] = screen_index_matrices(indices_wtinleaf[sid], indices_wtinleaf[sid])
    #need to find explicitly so can be used in other experiments
    #how do we line up with leaf model...
    #reduced modelset.  also
    #indices_wtinleaf_reduced, indices_leaf_reduced = screen_index_matrices2(indices_wtinleaf[sid], indices_leaf[sid])
    
    #if i want to only use indices_leaf_reduced as target in a different experiment, do i need to eliminate  
    #
    
    
    #leaf on leaf
    #eval_indices = indices_leaf
    #model_indices = indices_leaf

    #leaf on wt_leaf
    #model_indices = indices_leaf
    #eval_indices = indices_wtinleaf

    #summary on summary
    #model_indices = get_indices(experiments_minor_structures[sid])
    #eval_indices = get_indices(experiments_minor_structures[sid])

    #creleaf precise (nmodels is number of creleaf combinations)
    
    #indices_wtleaf = get_indices(creleafs[sid])

In [21]:
msvds[sid].reg_inj_vcount_norm.shape

(83, 45)

In [None]:
# for sid in major_structure_ids:
#     print(sid)
#     #print(msvds[sid].projections.shape[0], len(wt_2ormore[sid]))
#     msvds[sid].loocv_predictions_all = get_homo_loocv(msvds[sid], np.asarray(list(range(msvds[sid].projections.shape[0]))), get_loocv_predictions_nnlinear_number_inj_norm, thres_ncomp)
#     #msvds[sid].loocv_predictions_wt = get_homo_loocv(msvds[sid], wt_2ormore[sid], get_loocv_predictions_nnlinear_number_inj_norm, thres_ncomp)


In [76]:
from mcmodels.models.homogeneous import svd_subset_selection, HomogeneousModel
from mcmodels.core.utils import get_reduced_matrix_ninj

In [77]:

# homo_loocv_predictions_all = {}
# homo_loocv_predictions_wt = {}
# homo_reg_proj_vcount_norm_renorms= {}
# for sid in major_structure_ids:
#     homo_loocv_predictions_all[sid ] = msvds[sid].loocv_predictions_all
#     homo_loocv_predictions_wt[sid ] = msvds[sid].loocv_predictions_wt
#     homo_reg_proj_vcount_norm_renorms[sid ] = msvds[sid].reg_proj_vcount_norm_renorm
    

In [78]:

def combine_predictions(predictions, eval_index_matrix):
    
    nmodels, ngammas, nexp, ntargets = predictions.shape
    combined_predictions = np.empty((ngammas, nexp, ntargets))
    for m in range(nmodels):
        combined_predictions[:,np.where(eval_index_matrix[m] == 1)[0]] = predictions[m][:,np.where(eval_index_matrix[m] == 1)[0]]
        
    return(combined_predictions)

In [114]:
np.asarray([[1e-10],[0]])

array([[1.e-10],
       [0.e+00]])

In [157]:
for sid in major_structure_ids:
    msvds[sid].loocv_predictions_model = get_homo_loocv_predictions_multimodel_merge(projections = msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                     injections = msvds[sid].reg_inj_vcount_norm,
                                                                    hyperparameters = np.asarray([[1e-10,0]]),
                                                                    model_index_matrix = indices_major[sid],
                                                                    eval_index_matrix = indices_major[sid])
                                                                     #codes=np.asarray(np.vstack([leafs[sid], creline[sid]]), dtype = str).transpose())


nexp 36
exp 0
exp 1
exp 2
exp 3
exp 4
exp 5
exp 6
exp 7
exp 8
exp 9
exp 10
exp 11




exp 12
exp 13
exp 14
exp 15
exp 16
exp 17
exp 18
exp 19
exp 20
exp 21
exp 22
exp 23
exp 24
exp 25
exp 26
exp 27
exp 28
exp 29
exp 30
exp 31
exp 32
exp 33
exp 34
exp 35
nexp 7
exp 0
exp 1
exp 2
exp 3
exp 4
exp 5
exp 6
nexp 122
exp 0
exp 1
exp 2
exp 3
exp 4
exp 5
exp 6
exp 7
exp 8
exp 9
exp 10
exp 11
exp 12
exp 13
exp 14
exp 15
exp 16
exp 17
exp 18
exp 19
exp 20
exp 21
exp 22
exp 23
exp 24
exp 25
exp 26
exp 27
exp 28
exp 29
exp 30
exp 31
exp 32
exp 33
exp 34
exp 35
exp 36
exp 37
exp 38
exp 39
exp 40
exp 41
exp 42
exp 43
exp 44
exp 45
exp 46
exp 47
exp 48
exp 49
exp 50
exp 51
exp 52
exp 53
exp 54
exp 55
exp 56
exp 57
exp 58
exp 59
exp 60
exp 61
exp 62
exp 63
exp 64
exp 65
exp 66
exp 67
exp 68
exp 69
exp 70
exp 71
exp 72
exp 73
exp 74
exp 75
exp 76
exp 77
exp 78
exp 79
exp 80
exp 81
exp 82
exp 83
exp 84
exp 85
exp 86
exp 87
exp 88
exp 89
exp 90
exp 91
exp 92
exp 93
exp 94
exp 95
exp 96
exp 97
exp 98
exp 99
exp 100
exp 101
exp 102
exp 103
exp 104
exp 105
exp 106
exp 107
exp 108
exp 109
exp 

KeyboardInterrupt: 

In [152]:

homo_loocv_predictions_all = {}
#homo_loocv_predictions_wt = {}
homo_reg_proj_vcount_norm_renorms= {}
for sid in major_structure_ids[:2]:
    homo_loocv_predictions_all[sid ] = msvds[sid].loocv_predictions_model
    #homo_loocv_predictions_wt[sid ] = msvds[sid].loocv_predictions_wt
    homo_reg_proj_vcount_norm_renorms[sid ] = msvds[sid].reg_proj_vcount_norm_renorm
    

In [153]:
inds_good = {}
for sid in major_structure_ids[:2]:
    inds_good[sid] = np.asarray(list(range(msvds[sid].injections.shape[0])))
    
a = [list(range(1)), list(range(1))]
keys = np.asarray(list(itertools.product(*a)))


In [154]:

def get_loss(true_dict, prediction_dict, pred_ind=None, true_ind=None, keys=None):
    output = {}
    
    major_structure_ids = list(prediction_dict.keys())
    nms = len(major_structure_ids)
    ngam = prediction_dict[major_structure_ids[0]].shape[0]
    nalph = prediction_dict[major_structure_ids[0]].shape[1]
    for m in range(nms):
        sid = major_structure_ids[m]
        if pred_ind == None:
            # prediction_dict and true_dict will contain predictions for 'bad' experiments with no recorded injection
            # when we have the wild type predictions, the subset is what is good among the wild types
            # so 'true' subsetting is always good, since it is w.r.t. the full injection
            # but prediction needs good w.r.t. wt
            pind = np.asarray(list(range(prediction_dict[sid].shape[1])), dtype=int)
        else:
            pind = pred_ind[sid]
        if true_ind == None:
            tind = np.asarray(list(range(true_dict[sid].shape[0])), dtype=int)
        else:
            tind = true_ind[sid]

        nexp = len(pind)

        output[sid] = np.zeros((keys.shape[0], nexp))

        for j in range(keys.shape[0]):
            output[sid][j] = np.asarray(
                [get_loss_paper(true_dict[sid][tind[i]], prediction_dict[sid][j][pind[i]]) for i in
                 range(nexp)])

    return (output)

In [155]:
losses_all = get_loss(homo_reg_proj_vcount_norm_renorms, homo_loocv_predictions_all,pred_ind = inds_good, true_ind = inds_good,keys = keys)


In [156]:

sel_ga_all = get_best_hyperparameters(losses_all,keys)


mean_nw_all = get_loss_best_hyp(losses_all, sel_ga_all)

#mean_nw_wt = get_loss_best_hyp(losses_all, sel_ga_wt)

print(mean_nw_all)


losses = np.asarray([mean_nw_all]).transpose()
losses2 = losses[[4,7,2,1,10,9,11,3,5,8,6,0]]
loss =pd.DataFrame(losses2, columns = ['all','allwt'])

0
1
[0.73357498 0.8541805 ]


IndexError: index 4 is out of bounds for axis 0 with size 2

In [147]:
losses_all[512]

array([[1.79429952, 1.84022172, 1.20661765, 1.99502652, 1.43279907,
        1.99557181, 1.76841474, 1.94121895, 1.02176886, 1.93389254,
        1.96911868, 1.9698524 , 1.52393303, 1.97455856, 1.99993808,
        1.94585568, 1.35517941, 1.29777866, 1.37154365, 1.67780285,
        1.9648431 , 1.91962371, 1.94727464, 1.97926452, 1.95114343,
        1.84326631, 1.06150107, 1.37236168, 1.54600063, 1.618074  ,
        1.21057103, 1.75188756, 1.99358748, 1.97687266, 1.84841024,
        1.84299481]])

In [105]:
sel_ga_all

array([[0, 0],
       [0, 0]])

In [109]:
msvds[315].reg_proj_vcount_norm_renorm[0].shape

(577,)

In [111]:
homo_loocv_predictions_all[512 ]

array([[[1.26964645e-02, 3.08185627e-05, 1.31216450e-02, ...,
         1.92882362e-04, 3.58154386e-04, 1.86203950e-04],
        [2.97906734e-02, 1.05838342e-06, 6.33131200e-03, ...,
         0.00000000e+00, 1.08713975e-04, 1.90663955e-03],
        [3.26103419e-02, 1.45890322e-02, 6.21838699e-05, ...,
         7.82005554e-06, 1.12742873e-05, 4.12790716e-04],
        ...,
        [9.41307750e-03, 1.65668753e-05, 2.88871955e-03, ...,
         0.00000000e+00, 0.00000000e+00, 3.19308252e-04],
        [3.89679149e-02, 1.44760925e-02, 3.76606731e-05, ...,
         8.91638501e-06, 1.34690918e-05, 5.35137835e-04],
        [3.53414789e-02, 1.37833795e-02, 4.04842394e-05, ...,
         8.07139531e-06, 1.23910249e-05, 4.88802907e-04]]])

In [129]:
sid = 512
get_homo_loocv(msvds[sid], np.asarray(list(range(msvds[sid].projections.shape[0]))), get_loocv_predictions_nnlinear_number_inj_norm, thres_ncomp)


exp 0
exp 1
exp 2
exp 3
exp 4
exp 5
exp 6
exp 7
exp 8
exp 9
exp 10
exp 11
exp 12


  predictions[i] = pred / np.linalg.norm(pred)


exp 13
exp 14
exp 15
exp 16
exp 17
exp 18
exp 19
exp 20
exp 21
exp 22
exp 23
exp 24
exp 25
exp 26
exp 27
exp 28
exp 29
exp 30
exp 31
exp 32
exp 33
exp 34
exp 35


array([[[8.56070966e-03, 3.05101003e-05, 1.35357073e-02, ...,
         7.38799063e-05, 1.24148733e-04, 9.93363574e-05],
        [2.71225795e-02, 2.57367301e-05, 6.23216294e-03, ...,
         0.00000000e+00, 1.14158065e-04, 1.88135076e-03],
        [3.52268964e-02, 1.53093301e-02, 7.20390090e-05, ...,
         7.36269931e-06, 1.32501909e-05, 4.51150147e-04],
        ...,
        [7.39513477e-03, 3.51957024e-05, 2.86665931e-03, ...,
         0.00000000e+00, 0.00000000e+00, 3.00048589e-04],
        [4.77858670e-02, 2.00595204e-02, 5.85877387e-05, ...,
         8.99483348e-06, 2.13925159e-05, 7.38395029e-04],
        [4.40028347e-02, 1.91143900e-02, 6.05519745e-05, ...,
         8.28236080e-06, 1.99363203e-05, 6.84276223e-04]]])

In [121]:
thres_ncomp = np.asarray([[1e-10,0]])

In [131]:
get_homo_loocv_predictions_multimodel_merge(projections = msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                    injections = msvds[sid].reg_inj_vcount_norm,
                                                                    hyperparameters = np.asarray([[1e-10,0]]),
                                                                    model_index_matrix = indices_major[sid],
                                                                    eval_index_matrix = indices_major[sid])

nexp 36
exp 0
exp 1
exp 2
exp 3
exp 4
exp 5
exp 6
exp 7
exp 8
exp 9
exp 10
exp 11
exp 12




exp 13
exp 14
exp 15
exp 16
exp 17
exp 18
exp 19
exp 20
exp 21
exp 22
exp 23
exp 24
exp 25
exp 26
exp 27
exp 28
exp 29
exp 30
exp 31
exp 32
exp 33
exp 34
exp 35


array([[[1.26964645e-02, 3.08185627e-05, 1.31216450e-02, ...,
         1.92882362e-04, 3.58154386e-04, 1.86203950e-04],
        [2.97906734e-02, 1.05838342e-06, 6.33131200e-03, ...,
         0.00000000e+00, 1.08713975e-04, 1.90663955e-03],
        [3.26103419e-02, 1.45890322e-02, 6.21838699e-05, ...,
         7.82005554e-06, 1.12742873e-05, 4.12790716e-04],
        ...,
        [9.41307750e-03, 1.65668753e-05, 2.88871955e-03, ...,
         0.00000000e+00, 0.00000000e+00, 3.19308252e-04],
        [3.89679149e-02, 1.44760925e-02, 3.76606731e-05, ...,
         8.91638501e-06, 1.34690918e-05, 5.35137835e-04],
        [3.53414789e-02, 1.37833795e-02, 4.04842394e-05, ...,
         8.07139531e-06, 1.23910249e-05, 4.88802907e-04]]])

In [133]:
get_homo_loocv_predictions_singlemodel(projections = msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                    injections = msvds[sid].reg_inj_vcount_norm,
                                                                    thresh = 1e-10,
                                                                    number = 0, 
                                                                    model_indices = indices_major[sid][0],
                                                                    eval_indices = indices_major[sid][0])

nexp 36
exp 0
exp 1
exp 2
exp 3
exp 4
exp 5
exp 6
exp 7
exp 8
exp 9
exp 10
exp 11
exp 12




exp 13
exp 14
exp 15
exp 16
exp 17
exp 18
exp 19
exp 20
exp 21
exp 22
exp 23
exp 24
exp 25
exp 26
exp 27
exp 28
exp 29
exp 30
exp 31
exp 32
exp 33
exp 34
exp 35


array([[1.26964645e-02, 3.08185627e-05, 1.31216450e-02, ...,
        1.92882362e-04, 3.58154386e-04, 1.86203950e-04],
       [2.97906734e-02, 1.05838342e-06, 6.33131200e-03, ...,
        0.00000000e+00, 1.08713975e-04, 1.90663955e-03],
       [3.26103419e-02, 1.45890322e-02, 6.21838699e-05, ...,
        7.82005554e-06, 1.12742873e-05, 4.12790716e-04],
       ...,
       [9.41307750e-03, 1.65668753e-05, 2.88871955e-03, ...,
        0.00000000e+00, 0.00000000e+00, 3.19308252e-04],
       [3.89679149e-02, 1.44760925e-02, 3.76606731e-05, ...,
        8.91638501e-06, 1.34690918e-05, 5.35137835e-04],
       [3.53414789e-02, 1.37833795e-02, 4.04842394e-05, ...,
        8.07139531e-06, 1.23910249e-05, 4.88802907e-04]])

In [149]:
get_loocv_predictions_nnlinear_number_inj_norm(projections = msvds[sid].reg_proj_vcount_norm_renorm, 
                                                                    injections = msvds[sid].reg_inj_vcount_norm,
                                                                    thresh = 1e-10,
                                                                    number = 0)

exp 0
exp 1
exp 2
exp 3
exp 4
exp 5
exp 6
exp 7
exp 8
exp 9
exp 10
exp 11
exp 12


  predictions[i] = pred / np.linalg.norm(pred)


exp 13
exp 14
exp 15
exp 16
exp 17
exp 18
exp 19
exp 20
exp 21
exp 22
exp 23
exp 24
exp 25
exp 26
exp 27
exp 28
exp 29
exp 30
exp 31
exp 32
exp 33
exp 34
exp 35


array([[1.26964645e-02, 3.08185627e-05, 1.31216450e-02, ...,
        1.92882362e-04, 3.58154386e-04, 1.86203950e-04],
       [2.97906734e-02, 1.05838342e-06, 6.33131200e-03, ...,
        0.00000000e+00, 1.08713975e-04, 1.90663955e-03],
       [3.26103419e-02, 1.45890322e-02, 6.21838699e-05, ...,
        7.82005554e-06, 1.12742873e-05, 4.12790716e-04],
       ...,
       [9.41307750e-03, 1.65668753e-05, 2.88871955e-03, ...,
        0.00000000e+00, 0.00000000e+00, 3.19308252e-04],
       [3.89679149e-02, 1.44760925e-02, 3.76606731e-05, ...,
        8.91638501e-06, 1.34690918e-05, 5.35137835e-04],
       [3.53414789e-02, 1.37833795e-02, 4.04842394e-05, ...,
        8.07139531e-06, 1.23910249e-05, 4.88802907e-04]])

In [148]:
sid = 512
get_homo_loocv(msvds[sid], np.asarray(list(range(msvds[sid].projections.shape[0]))), get_loocv_predictions_nnlinear_number_inj_norm, thres_ncomp)


exp 0
exp 1
exp 2
exp 3
exp 4
exp 5
exp 6
exp 7
exp 8
exp 9
exp 10
exp 11
exp 12


  predictions[i] = pred / np.linalg.norm(pred)


exp 13
exp 14
exp 15
exp 16
exp 17
exp 18
exp 19
exp 20
exp 21
exp 22
exp 23
exp 24
exp 25
exp 26
exp 27
exp 28
exp 29
exp 30
exp 31
exp 32
exp 33
exp 34
exp 35


array([[[8.56070966e-03, 3.05101003e-05, 1.35357073e-02, ...,
         7.38799063e-05, 1.24148733e-04, 9.93363574e-05],
        [2.71225795e-02, 2.57367301e-05, 6.23216294e-03, ...,
         0.00000000e+00, 1.14158065e-04, 1.88135076e-03],
        [3.52268964e-02, 1.53093301e-02, 7.20390090e-05, ...,
         7.36269931e-06, 1.32501909e-05, 4.51150147e-04],
        ...,
        [7.39513477e-03, 3.51957024e-05, 2.86665931e-03, ...,
         0.00000000e+00, 0.00000000e+00, 3.00048589e-04],
        [4.77858670e-02, 2.00595204e-02, 5.85877387e-05, ...,
         8.99483348e-06, 2.13925159e-05, 7.38395029e-04],
        [4.40028347e-02, 1.91143900e-02, 6.05519745e-05, ...,
         8.28236080e-06, 1.99363203e-05, 6.84276223e-04]]])