In [17]:
import sys, os
from Deconvolution.BLADE import Framework
import numpy as np
from numpy import transpose as t
import itertools
import pickle
from scipy.optimize import nnls
from sklearn.svm import SVR
from sklearn.svm import NuSVR

from sklearn.metrics import mean_squared_error as mse
import pandas as pd

# modules for visualization
import qgrid
from matplotlib import pyplot as plt
import seaborn as sns

from sklearn.model_selection import LeaveOneOut


### Run BLADE with TCGA bulk and Puram scRNA-seq reference


#### Application of deconvolution methods

From here, we will apply the following three methods for further performance comparison:
1. BLADE (estimation of cellular fraction + group-mode/high-resolution-mode purification)
2. NNLS (estimation of fraction)
3. SVR followed by NNLS (estimation of fraction + group-mode purification) - similar to CIBERSORTx


##### 1. Application of BLADE

These are the key parameters used in BLADE (note that there is default settings of these parameters, if not specified):
- Hyperparameters (`hyperpars`): `Alpha`, `Alpha0`, `Kappa0` and `SigmaY`, each of which can be defined as a list of options. BLADE takes an empirical Bayes approach to find the optimal parameter set given the all possible combinations. 
- `Nrep`: Number of repeat for evaluating each parameter configuration.
- `Nrepfinal`: Number of repeated optimizations for the final parameter set.
- `Njob`: Number of parallel jobs.

## Prepare signature matrix as required

In [18]:
df_Puram_std = pd.read_csv("/home/cke/Puram/HNSCC2PuramGSE103322_qc_std_pseudotrain.tsv",sep='\t',index_col=0)
df_Puram_mean = pd.read_csv("/home/cke/Puram/HNSCC2PuramGSE103322_qc_mean_pseudotrain.tsv",sep='\t',index_col=0)

df_pseudo = pd.read_csv("/home/cke/Puram/Puram_pseudobulk_fromraw_test.tsv",sep='\t',index_col=0).T

marker_genes_100DEGs = pd.read_csv("/home/cke/Puram/top100DEGs_pseudobulk.txt",header=None).iloc[0,:]
marker_genes_100 = pd.read_csv("/home/cke/Puram/top100markers_de_cor_symbol.txt",header=None).iloc[0,:]
marker_genes_50 = pd.read_csv("/home/cke/Puram/top50markers_de_cor_symbol.txt",header=None).iloc[0,:]
marker_genes_20 = pd.read_csv("/home/cke/Puram/top20markers_de_cor_symbol.txt",header=None).iloc[0,:]

In [None]:
# generate train-test splits with lot cv
loo = LeaveOneOut()
for train, test in loo.split(bulk_pseudocount_raw):
    print("sample train %s test %s" % (train, test))
    

In [12]:
def run_BLADE(marker_genes, df_Puram_std, df_Puram_mean, df_pseudo):
    marker_genes = marker_genes.drop_duplicates()
    df_Puram_std_filtered = df_Puram_std.loc[marker_genes,:]
    df_Puram_mean_filtered = df_Puram_mean.loc[marker_genes,:]

    merge_genes_mean = pd.merge(df_Puram_mean_filtered,df_pseudo,left_index=True,right_index=True,how='inner')
    merge_genes_std = pd.merge(df_Puram_std_filtered,df_pseudo,left_index=True,right_index=True,how='inner')

    print("Get mean and std exp!")

    #simple tumor cell type setup, there are 10 annotated cell types
    df_TCGA_shared = merge_genes_mean.iloc[:,10:]
    df_shared_mean = merge_genes_mean.iloc[:,:10]
    df_shared_std = merge_genes_std.iloc[:,:10]

    print("Get common genes! ",df_shared_mean.shape[0])
    print("cell types: ",df_shared_mean.shape[1])
    print("bulk samples: ",df_TCGA_shared.shape[1])
    return df_TCGA_shared, df_shared_mean, df_shared_std

In [13]:
df_TCGA_shared, df_shared_mean, df_shared_std = run_BLADE(marker_genes, df_Puram_std, df_Puram_mean, df_pseudo)

Get mean and std exp!
Get common genes!  876
cell types:  10
bulk samples:  9


In [14]:
list_markers = [marker_genes_100DEGs,marker_genes_100,marker_genes_50,marker_genes_20]
list_num = ['100DEGs','100','50','20']

In [16]:
# test for script
for i in range(len(list_markers)):
    df_TCGA_shared, df_shared_mean, df_shared_std = run_BLADE(list_markers[i], df_Puram_std, df_Puram_mean, df_pseudo)
    print("start BLADE!")
    Y = df_TCGA_shared.to_numpy()
    mean = df_shared_mean.to_numpy() 
    sd = df_shared_std.to_numpy() 
    outfile = './BLADE/data/Puramfiltered_pseudobulk_BLADEout_'+list_num[i]+'.pickle'
    print(outfile)
#     final_obj, best_obj, best_set, outs = Framework(
#         mean, sd, Y,
#         Alphas=hyperpars['Alpha'], Alpha0s=hyperpars['Alpha0'], 
#         Kappa0s=hyperpars['Kappa0'], SYs=hyperpars['SY'],
#         Nrep=Nrep, Njob=Njob, Nrepfinal=Nrepfinal)
#     pickle.dump(
#         {
#             'final_obj': final_obj,
#             'best_obj': best_obj,
#             'best_set': best_set,
#             'outs' : outs
#         }, open(outfile, 'wb')
#         )

Get mean and std exp!
Get common genes!  876
cell types:  10
bulk samples:  9
start BLADE!
./BLADE/data/Puramfiltered_pseudobulk_BLADEout_100DEGs.pickle
Get mean and std exp!
Get common genes!  915
cell types:  10
bulk samples:  9
start BLADE!
./BLADE/data/Puramfiltered_pseudobulk_BLADEout_100.pickle
Get mean and std exp!
Get common genes!  463
cell types:  10
bulk samples:  9
start BLADE!
./BLADE/data/Puramfiltered_pseudobulk_BLADEout_50.pickle
Get mean and std exp!
Get common genes!  194
cell types:  10
bulk samples:  9
start BLADE!
./BLADE/data/Puramfiltered_pseudobulk_BLADEout_20.pickle


In [7]:
df_Puram_mean

Unnamed: 0,B cell,Dendritic,Endothelial,Fibroblast,Macrophage,Mast,T cell,myocyte,other,tumor
C9orf152,0.000000,0.129314,0.052885,0.005461,0.000000,0.000000,0.002805,0.000000,0.180350,0.035203
RPS11,6.492917,5.855553,6.375462,5.998231,6.124608,4.881329,4.473081,5.786865,6.249870,6.651266
ELMO2,0.231680,1.369407,0.738443,0.812115,0.723999,0.965855,0.536432,0.548613,0.670335,1.162779
CREB3L1,0.000000,0.000000,0.026067,0.176889,0.000000,0.000000,0.001527,0.000000,0.437221,0.014168
PNMA1,0.453225,1.837884,1.065678,1.151217,0.230229,0.482366,0.410232,0.538573,0.432366,1.219110
...,...,...,...,...,...,...,...,...,...,...
PIK3IP1,0.157418,0.000000,1.018390,0.796913,1.598965,2.003856,3.261685,0.253657,0.941179,1.408405
SNRPD2,2.199768,2.046748,3.646981,2.856363,5.250042,2.220285,2.216675,3.812994,3.847157,6.123107
SLC39A6,0.382470,0.488054,0.343366,0.892032,0.843303,0.437975,0.426652,0.387895,1.241713,2.634749
CTSC,1.737483,3.486444,2.743636,1.961192,4.411457,2.088800,2.629155,1.266796,2.373988,3.919493


In [98]:
(marker_genes.isin(df_Puram_mean.index)==True).sum()

876

In [99]:
# writing a list of all DEGs
# import csv
# with open("compare_genes_test.txt",'w') as f:
#     write = csv.writer(f)
#     write.writerow(df_Puram_mean.index.tolist())

In [100]:
df_TCGA_shared

Unnamed: 0,0,6,7,8,10,12,13,16,17,18,20,22,23,24,25,26,28
MIR4461,201133.2200,525544.7500,3142.921100,208558.160000,142554.61000,107386.016000,97362.48400,606043.2000,422924.6000,811308.1000,1.581472e+06,273109.7800,32629.800000,91803.8200,562450.060,475229.1200,184733.4000
PDE4C,2403.5480,2658.7458,21.780113,975.862400,554.24260,1229.643100,552.01100,4283.4850,1858.2470,3067.3530,3.149312e+03,1504.2003,302.019740,879.1892,5989.998,2309.5160,5435.8237
EPAS1,13034.5980,41034.9800,395.913020,11122.048000,5980.84330,2261.637000,7121.60350,47756.7730,32371.6390,36928.0860,3.011680e+04,41015.0000,29.630247,30245.7560,87970.055,44142.9400,107356.2100
LGALS9,7587.0576,11659.4410,0.000000,2480.692400,747.97760,4670.452600,8458.63100,20376.5310,2575.9110,6667.8105,3.117047e+03,1594.3037,2548.390100,3513.1140,15522.141,13376.6840,8730.4450
PDDC1,5756.5210,7803.6562,41.039944,1874.560900,1296.16610,2290.543200,827.51984,10820.4680,5156.0396,6751.4320,6.870435e+03,1789.3386,1260.093600,1384.8588,8843.993,4643.5010,7796.2993
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GNG11,7145.7860,71095.1200,0.000000,21789.709000,15134.83300,2478.605500,3211.74660,111096.2500,44151.1200,33739.4650,3.529330e+03,12481.9840,0.000000,10244.9740,31012.424,20079.5940,68684.8900
DSC2,980.3828,6021.7910,1939.966300,74.301346,104.64166,1019.224800,1481.03520,4152.9350,29652.8960,9827.3640,5.417641e+04,8271.0260,0.000000,1585.1553,42107.510,17701.2970,8332.5640
RGS2,442737.2800,23337.7540,0.000000,48976.760000,69027.08000,384017.100000,120987.29000,724758.6000,193879.4500,384367.7200,5.340379e+04,113373.1400,15534.194000,216069.3100,404783.720,89468.0500,203109.8100
YWHAE,56611.6520,276873.0000,5867.562500,57174.957000,35570.56200,50986.734000,37257.73000,218181.9500,250634.3400,188840.1100,5.340487e+05,127783.4500,6522.911000,45664.6200,261133.470,156929.9400,155024.0200


In [102]:
df_shared_mean

Unnamed: 0,B cell,Dendritic,Endothelial,Fibroblast,Macrophage,Mast,T cell,myocyte,other,tumor
MIR4461,5.590770,6.447604,6.130216,4.745143,5.290368,5.358786,5.548533,3.480288,6.212385,6.555363
PDE4C,1.660976,1.742952,1.741246,1.783527,1.272856,1.779568,2.200499,1.391293,1.970503,1.219440
EPAS1,0.202552,0.232827,4.895536,3.536859,0.471268,2.206391,0.246861,0.275504,1.219639,2.007012
LGALS9,0.079975,3.482618,2.543983,0.294104,4.423526,1.463169,0.515896,0.000000,0.509081,0.622734
PDDC1,2.230254,2.334691,2.458050,2.312669,1.888216,2.441583,2.923569,1.904185,2.557850,1.893306
...,...,...,...,...,...,...,...,...,...,...
GNG11,0.008303,0.337393,6.096758,1.754329,0.176046,0.000000,0.005571,0.000000,0.531203,0.094131
DSC2,0.000000,0.724566,0.042762,0.030033,0.592182,0.000000,0.008683,0.201772,1.253575,3.273117
RGS2,3.353258,5.752627,0.778545,1.891414,7.420248,7.496888,3.817707,1.188729,1.379331,1.583197
YWHAE,4.085103,4.992411,6.026584,4.533413,5.706846,4.065982,2.886136,5.041878,5.521537,6.252306


In [103]:
df_shared_std

Unnamed: 0,B cell,Dendritic,Endothelial,Fibroblast,Macrophage,Mast,T cell,myocyte,other,tumor
MIR4461,2.771261,1.322891,2.582957,3.100253,2.683149,2.577139,2.430174,3.797055,2.784173,1.892720
PDE4C,0.753315,0.572979,0.706621,0.839213,0.606451,0.515157,0.523784,1.110109,0.986966,0.617054
EPAS1,0.936905,0.895370,2.421875,2.734070,1.174104,2.572555,1.075585,0.998555,2.063641,2.020875
LGALS9,0.532285,2.683842,2.600418,1.126008,1.985790,2.347989,1.618106,0.000000,1.414960,1.403489
PDDC1,0.687852,0.547174,0.850952,0.725201,0.761549,0.535213,0.527190,1.183509,1.155891,0.847387
...,...,...,...,...,...,...,...,...,...,...
GNG11,0.092755,1.311057,2.442785,2.700790,0.914934,0.000000,0.123143,0.000000,1.732624,0.620622
DSC2,0.000000,1.614030,0.360689,0.303682,1.355583,0.000000,0.184878,0.879736,1.934231,1.890718
RGS2,3.722500,3.273434,2.026612,2.992162,1.260457,2.595347,3.892897,2.401644,2.691856,2.394299
YWHAE,2.534912,2.082696,1.647219,2.165275,0.828796,2.533361,2.910775,2.489479,2.426815,0.749371


In [93]:
df_shared_mean.isna().any()

B cell         False
Dendritic      False
Endothelial    False
Fibroblast     False
Macrophage     False
Mast           False
T cell         False
myocyte        False
other          False
tumor          False
dtype: bool

In [76]:
hyperpars = {
    'Alpha': [1, 10],
    'Alpha0': [0.1, 1, 5],
    'Kappa0': [1, 0.5, 0.1],
    'SY': [1,0.3,0.5],
}

Nrep=3
Nrepfinal=10
Njob=10

In [77]:
# outfile = '/home/cke/BLADE/data/Puramfiltered_pseudobulk_BLADEout_2.pickle'

# pickle.dump(
#     {
#         'final_obj': final_obj,
#         'best_obj': best_obj,
#         'best_set': best_set,
#         'outs' : outs
#     }, open(outfile, 'wb')
#     )

In [94]:
mean

array([[ 6.97383474,  6.95405568,  7.27243041, ...,  6.79606694,
         7.78030016,  7.33904036],
       [ 1.75962876,  1.82479378,  1.85677772, ...,  1.77680651,
         2.46920037,  1.15945546],
       [ 1.85722904,  1.16550407,  6.1807131 , ...,  1.3652669 ,
         3.7994877 ,  3.56055886],
       ...,
       [ 6.93881335,  7.64457082,  4.69549779, ...,  4.78151178,
         5.55882479,  4.38075476],
       [ 5.61343609,  5.85581798,  6.59178552, ...,  6.17387377,
         6.92529273,  6.44143567],
       [ 4.98628543,        -inf,        -inf, ...,        -inf,
         1.4235597 , -0.38934145]])

In [104]:
print("start BLADE!")
Y = df_TCGA_shared.to_numpy()
mean = df_shared_mean.to_numpy() 
sd = df_shared_std.to_numpy() 

outfile = '/home/cke/BLADE/data/Puramfiltered_pseudobulk_BLADEout_3.pickle'

final_obj, best_obj, best_set, outs = Framework(
    mean, sd, Y,
    Alphas=hyperpars['Alpha'], Alpha0s=hyperpars['Alpha0'], 
    Kappa0s=hyperpars['Kappa0'], SYs=hyperpars['SY'],
    Nrep=Nrep, Njob=Njob, Nrepfinal=Nrepfinal)

pickle.dump(
    {
        'final_obj': final_obj,
        'best_obj': best_obj,
        'best_set': best_set,
        'outs' : outs
    }, open(outfile, 'wb')
    )

start BLADE!
all of 876 genes are used for optimization.
All samples are used during the optimization.
Initialization with Support vector regression


[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done   1 tasks      | elapsed:    2.1s
[Parallel(n_jobs=10)]: Done   2 out of  17 | elapsed:    2.2s remaining:   16.2s
[Parallel(n_jobs=10)]: Done   4 out of  17 | elapsed:    3.0s remaining:    9.7s
[Parallel(n_jobs=10)]: Done   6 out of  17 | elapsed:    5.1s remaining:    9.3s
[Parallel(n_jobs=10)]: Done   8 out of  17 | elapsed:    5.3s remaining:    6.0s
[Parallel(n_jobs=10)]: Done  10 out of  17 | elapsed:    6.5s remaining:    4.6s
[Parallel(n_jobs=10)]: Done  12 out of  17 | elapsed:    7.1s remaining:    3.0s
[Parallel(n_jobs=10)]: Done  14 out of  17 | elapsed:   24.4s remaining:    5.2s


No feature filtering is done (fsel = 0)


[Parallel(n_jobs=10)]: Done  17 out of  17 | elapsed: 15.8min finished
[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  g_Exp = g_Exp_Beta(Nu, Omega, Beta, B0, Ngene, Ncell, Nsample)
  g_Exp = g_Exp_Beta(Nu, Omega, Beta, B0, Ngene, Ncell, Nsample)
  g_Exp = g_Exp_Beta(Nu, Omega, 

  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
[Parallel(n_jobs=10)]: Done   5 tasks      | elapsed:  1.3min
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Ns

Done optimization, elapsed time (min): 12.863855449358622


In [105]:
final_obj

<Deconvolution.BLADE.BLADE at 0x7f2b0da4cc70>

Given the configuration above, BLADE is applied to each of the simulation dataset created previously.  

BLADE produce several outcomes:
- `final_obj`: final BLADE object with optimized variational parameters
- `best_obj`: BLADE object trained with the best parameter set found by the Empirical Bayes framework. Empirical Bayes framework is applied after selecting a subset of samples (5 samples; indicated by `Ind_sample` below), and thus the outcome contains only 5 samples. If `Nsample` <= 5, `final_obj` is identical to `best_obj`.
- `best_set`: Best parameter set defined by Empirical Bayes framework.
- `outs`: Outcome of BLADE for every possible combination of hyperparameters, used in the Empirical Bayes framework. 


## Results

In [106]:
BLADE_out = pickle.load(open("/home/cke/BLADE/data/Puramfiltered_pseudobulk_BLADEout_3.pickle", 'rb'))

In [107]:
obj = BLADE_out['final_obj']
    
outcomes = {
    'BLADE': {
        'Fraction': t(obj.ExpF(obj.Beta)), 
        'Signature': np.mean(obj.Nu, 0), #group mode purification
        'HighRes': obj.Nu                #highresolution mode purification
    }}

In [108]:
obj

<Deconvolution.BLADE.BLADE at 0x7f2b0da22560>

In [109]:
outcomes['BLADE']['HighRes']

array([[[ 5.59083921e+00,  6.44849199e+00,  6.13113380e+00, ...,
          3.46914925e+00,  6.21386009e+00,  6.55981133e+00],
        [ 1.66097744e+00,  1.74295329e+00,  1.74124740e+00, ...,
          1.39129368e+00,  1.97050528e+00,  1.21944148e+00],
        [ 2.02574137e-01,  2.32845082e-01,  4.89353991e+00, ...,
          2.75517556e-01,  1.21979103e+00,  2.00784748e+00],
        ...,
        [ 3.35523013e+00,  5.76356166e+00,  7.78570631e-01, ...,
          1.18878989e+00,  1.37945428e+00,  1.58343068e+00],
        [ 4.08814639e+00,  4.99597196e+00,  6.03102912e+00, ...,
          5.04443418e+00,  5.52499723e+00,  6.25538873e+00],
        [ 1.18037957e+00,  5.49282765e-04,  2.27218723e-04, ...,
         -2.63053965e-04,  8.08728436e-02,  7.98081344e-03]],

       [[ 5.59212969e+00,  6.44818986e+00,  6.12173173e+00, ...,
          3.47402942e+00,  6.21354115e+00,  6.56467101e+00],
        [ 1.66097724e+00,  1.74295306e+00,  1.74125001e+00, ...,
          1.39129398e+00,  1.97050437e

In [110]:
filtered_celltypefrac_BLADE = pd.DataFrame(outcomes['BLADE']['Fraction'])
filtered_celltypefrac_BLADE

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,0.094188,0.082175,0.087848,0.098185,0.226443,0.090333,0.099923,0.090596,0.073373,0.081633,0.067671,0.083702,0.09027,0.082582,0.082386,0.078246,0.086417
1,0.073839,0.062026,0.060814,0.076939,0.07768,0.071295,0.129871,0.06596,0.057871,0.064918,0.043148,0.061008,0.073219,0.060616,0.058202,0.056922,0.059757
2,0.095035,0.150224,0.089587,0.232026,0.092578,0.088692,0.102601,0.121637,0.078951,0.094818,0.073095,0.10092,0.08365,0.091355,0.090747,0.084635,0.090732
3,0.122324,0.200797,0.109284,0.111028,0.116599,0.113797,0.145841,0.198266,0.10745,0.152467,0.096878,0.166292,0.102175,0.276515,0.247923,0.263022,0.297942
4,0.086125,0.086657,0.076619,0.095256,0.088894,0.083901,0.101118,0.092243,0.085353,0.083549,0.068411,0.093036,0.080763,0.083728,0.088824,0.076508,0.083293
5,0.066003,0.059484,0.057766,0.07295,0.06819,0.065212,0.077596,0.060087,0.049421,0.061084,0.040147,0.046807,0.065763,0.049007,0.059546,0.0579,0.059243
6,0.224418,0.077274,0.086359,0.083334,0.088756,0.180515,0.094377,0.130564,0.082033,0.134514,0.073116,0.074815,0.138145,0.078948,0.085654,0.075407,0.078258
7,0.054155,0.059875,0.058072,0.06784,0.06335,0.056333,0.065278,0.059289,0.055556,0.059643,0.045459,0.060456,0.058829,0.06113,0.056896,0.058936,0.058879
8,0.06741,0.039022,0.204663,0.056028,0.071091,0.131184,0.050908,0.058493,0.036057,0.056446,0.047513,0.04401,0.179794,0.079152,0.039301,0.047897,0.055674
9,0.116502,0.182466,0.168987,0.106413,0.106418,0.118739,0.132488,0.122865,0.373934,0.210927,0.444561,0.268954,0.127391,0.136967,0.190523,0.200526,0.129805


In [111]:
filtered_celltypefrac_BLADE.columns = df_TCGA_shared.columns

In [112]:
filtered_celltypefrac_BLADE.index = df_shared_mean.columns

In [113]:
list_ind = filtered_celltypefrac_BLADE.index.tolist()
# list_ind[6] = 'other'
filtered_celltypefrac_BLADE.index=list_ind

In [114]:
filtered_celltypefrac_BLADE

Unnamed: 0,0,6,7,8,10,12,13,16,17,18,20,22,23,24,25,26,28
B cell,0.094188,0.082175,0.087848,0.098185,0.226443,0.090333,0.099923,0.090596,0.073373,0.081633,0.067671,0.083702,0.09027,0.082582,0.082386,0.078246,0.086417
Dendritic,0.073839,0.062026,0.060814,0.076939,0.07768,0.071295,0.129871,0.06596,0.057871,0.064918,0.043148,0.061008,0.073219,0.060616,0.058202,0.056922,0.059757
Endothelial,0.095035,0.150224,0.089587,0.232026,0.092578,0.088692,0.102601,0.121637,0.078951,0.094818,0.073095,0.10092,0.08365,0.091355,0.090747,0.084635,0.090732
Fibroblast,0.122324,0.200797,0.109284,0.111028,0.116599,0.113797,0.145841,0.198266,0.10745,0.152467,0.096878,0.166292,0.102175,0.276515,0.247923,0.263022,0.297942
Macrophage,0.086125,0.086657,0.076619,0.095256,0.088894,0.083901,0.101118,0.092243,0.085353,0.083549,0.068411,0.093036,0.080763,0.083728,0.088824,0.076508,0.083293
Mast,0.066003,0.059484,0.057766,0.07295,0.06819,0.065212,0.077596,0.060087,0.049421,0.061084,0.040147,0.046807,0.065763,0.049007,0.059546,0.0579,0.059243
T cell,0.224418,0.077274,0.086359,0.083334,0.088756,0.180515,0.094377,0.130564,0.082033,0.134514,0.073116,0.074815,0.138145,0.078948,0.085654,0.075407,0.078258
myocyte,0.054155,0.059875,0.058072,0.06784,0.06335,0.056333,0.065278,0.059289,0.055556,0.059643,0.045459,0.060456,0.058829,0.06113,0.056896,0.058936,0.058879
other,0.06741,0.039022,0.204663,0.056028,0.071091,0.131184,0.050908,0.058493,0.036057,0.056446,0.047513,0.04401,0.179794,0.079152,0.039301,0.047897,0.055674
tumor,0.116502,0.182466,0.168987,0.106413,0.106418,0.118739,0.132488,0.122865,0.373934,0.210927,0.444561,0.268954,0.127391,0.136967,0.190523,0.200526,0.129805


In [115]:
# ignore this block and above
# use code in runMuSiC to store and access celltypefrac file
filtered_celltypefrac_BLADE.T.to_csv("/home/cke/BLADE/data/filtered_celltypefrac_BLADE_pseudobulk.csv")