## Imports

In [1]:
import numpy as np
import pandas as pd
import scipy
from scipy import optimize
from multiprocessing import Process
import copy
import random
from sklearn import preprocessing

## Import from Mrx3

In [67]:
c = np.load("/beegfs/home/pmatyskova/project/mrstep_c_miss_hm185cor_minres_mr1500_filled0.npy")
e = np.load("/beegfs/home/pmatyskova/project/mrstep_e_miss_hm185cor_minres_mr1500_filled0.npy")

## Gene set size

In [69]:
set_size = 1300
c = c[0:set_size,]
e = e[0:set_size,]

## Solve a linear matrix equation

options: scipy.optimize.nnls or np.linalg.solve

In [71]:
Xe = np.ones(e.shape[0])
d, a = scipy.optimize.nnls(c,Xe)

#s_scaler = preprocessing.StandardScaler() #applying row standardization
#e = s_scaler.fit_transform(e)
#c = s_scaler.fit_transform(c)

In [72]:
for i in range(e.shape[1]):
    e_loop = e[:,i]
    d1 = scipy.optimize.nnls(c, e_loop)
    d = np.vstack([d, d1[0]])

## Save d matrix

In [73]:
d1 = d[1:d.shape[0],:]
d1.shape

(2031, 185)

In [74]:
np.save(file = "/beegfs/home/pmatyskova/project/d_miss_hm185cor_minres_mr1300_filled0", arr = d1)

## Quantitative model evaluation

In [75]:
hyp = pd.read_csv('/beegfs/scratch/bruening_scratch/lsteuernagel/data/petra_data/hypoMap_avg_signatures/hypomap_avg_signatures_C185_rna.txt', 
                  sep = "\t")
hyp.index = hyp.iloc[:,0]
hyp = hyp.drop(['Unnamed: 0'], axis=1)

In [76]:
def annot_function(d, annot, ref):
    #annotate predictions (cell type & voxel locations + ABA annotations)
    d = pd.DataFrame(d)
    d.columns = hyp.columns
    d = d.loc[:,d.columns.isin(ref.loc[:,'cluster'])] #only keep cell types for which we have reference
    d['merge'] = d.index #index column for merging
    d_annot = pd.merge(d, annot)
    d_annot = d_annot.drop('merge', axis = 1)
    
    return(d_annot)

In [77]:
def eval_function(d, annot, ref):    
    #evaluation (comparson with the ground truth)
    d_ann = annot_function(d, annot, ref)
    
    model_eval = []
    for i in range(ref.shape[0]):
        #filter region that is predicted in the ground truth for each cell type
        #to include not only exact region name but also its children - not just "Medial preoptic nucleus"
        #but also Medial preoptic nucleus, central/lateral/medial part:
        filt = [] 
        for j in range(d_ann.shape[0]):
            filt_i = ref['Region_ground_truth'][i] in d_ann['name'][j]
            filt.append(filt_i)
        d_filt = d_ann[filt]
    
        #calculations:
        score_i = (sum(d_filt[ref['cluster'][i]]))/(sum(d_ann[ref['cluster'][i]])+0.000001)
        model_eval.append(score_i)
    
    copy_ref = copy.copy(ref)
    copy_ref['model_eval'] = model_eval
    return(copy_ref)

In [78]:
ann_hypnoSFO = pd.read_csv('/beegfs/home/pmatyskova/project/ish_annot_hypnoSFO.csv')
ann_hypnoSFO['merge'] = ann_hypnoSFO.index
ann_hypnoSFO = ann_hypnoSFO.iloc[:,[0,1,2,7,14]]

gt_hm185 = pd.read_csv('/beegfs/scratch/bruening_scratch/lsteuernagel/projects/analysis_projects/volumetric_analysis/hypoMap_region_annotation_withSpatial_C185.txt', sep = "\t")
gt_hm185 = gt_hm185.iloc[:,0:2]

In [79]:
gt_hm185 = gt_hm185.drop_duplicates(subset = ['cluster'], keep='first')
gt_hm185.index = np.arange(0,len(gt_hm185))

In [80]:
final_eval = eval_function(d1, ann_hypnoSFO, gt_hm185)

Unnamed: 0,cluster,Region_ground_truth,model_eval
0,C185-65: Unassigned.Mixed.GABA-2,Medial preoptic nucleus,0.000000
1,C185-71: Vip.Vipr2.GABA-2,Suprachiasmatic nucleus,0.598862
2,C185-72: Fam122b.Vipr2.GABA-2,Suprachiasmatic nucleus,0.000000
3,C185-73: Cck.Vipr2.GABA-2,Suprachiasmatic nucleus,0.166317
4,C185-11: Cbln2.Trh.GLU-2,Paraventricular hypothalamic nucleus,0.219861
...,...,...,...
63,C185-134: Frzb.Tanycytes,Arcuate hypothalamic nucleus,0.142445
64,C185-51: Tac2.GLU-5,Arcuate hypothalamic nucleus,0.000000
65,C185-61: Prkch.GLU-8,Lateral mammillary nucleus,0.999999
66,C185-64: Meis2.Mixed.GABA-2,Zona incerta,0.000000


## Permutation test on voxel randomized predictions

In [81]:
def voxelperm_annot_function(d, annot, ref):
    #annotate voxel permuted predictions (cell type & randomised voxel location + ABA annotations)
    d = pd.DataFrame(d)
    d.columns = hyp.columns 
    d = d.loc[:,d.columns.isin(ref.loc[:,'cluster'])] #only keep cell types for which we have reference
    
    d_perm = d.sample(frac=1, axis=0) #suffle row order
    d_perm.index = d.index
    
    d_perm['merge'] = d_perm.index #index column for merging
    d_annot = pd.merge(d_perm, annot)
    d_annot = d_annot.drop('merge', axis = 1)
    
    model_eval = []
    for i in range(ref.shape[0]):
        #filter region that is predicted in the ground truth for each cell type
        #to include not only exact region name but also its children - not just "Medial preoptic nucleus"
        #but also Medial preoptic nucleus, central/lateral/medial part:
        filt = [] 
        for j in range(d_annot.shape[0]):
            filt_i = ref['Region_ground_truth'][i] in d_annot['name'][j]
            filt.append(filt_i)
        d_filt = d_annot[filt]
    
        #calculations:
        score_i = (sum(d_filt[ref['cluster'][i]]))/(sum(d_annot[ref['cluster'][i]])+0.000001)
        model_eval.append(score_i)
    
    copy2_ref = copy.copy(ref)
    copy2_ref['randmodel_eval'] = model_eval
    return(copy2_ref)

In [82]:
voxpermutation_iters = 200
voxpermut_evals = pd.DataFrame(columns = list(map('x{}'.format, range(1, voxpermutation_iters+1))))
for i in range(voxpermutation_iters):
    dvoxperm_evaluation = voxelperm_annot_function(d1, ann_hypnoSFO, gt_hm185) #gt_hm286 or gt_hm286n or gt_hm185
    voxpermut_evals.iloc[:,i] = dvoxperm_evaluation['randmodel_eval']

voxpermut_eval_tot = voxpermut_evals.mean(axis=1)
final_eval['voxpermut_eval'] = voxpermut_eval_tot
final_eval

Unnamed: 0,cluster,Region_ground_truth,model_eval,voxpermut_eval
0,C185-65: Unassigned.Mixed.GABA-2,Medial preoptic nucleus,0.000000,0.010700
1,C185-71: Vip.Vipr2.GABA-2,Suprachiasmatic nucleus,0.598862,0.004284
2,C185-72: Fam122b.Vipr2.GABA-2,Suprachiasmatic nucleus,0.000000,0.005293
3,C185-73: Cck.Vipr2.GABA-2,Suprachiasmatic nucleus,0.166317,0.006105
4,C185-11: Cbln2.Trh.GLU-2,Paraventricular hypothalamic nucleus,0.219861,0.018450
...,...,...,...,...
63,C185-134: Frzb.Tanycytes,Arcuate hypothalamic nucleus,0.142445,0.015346
64,C185-51: Tac2.GLU-5,Arcuate hypothalamic nucleus,0.000000,0.000000
65,C185-61: Prkch.GLU-8,Lateral mammillary nucleus,0.999999,0.000226
66,C185-64: Meis2.Mixed.GABA-2,Zona incerta,0.000000,0.144307


In [83]:
final_eval.to_csv("/beegfs/home/pmatyskova/project/feval_miss_hm185cor_minres_mr1300_filled0.csv") 