In [53]:
library(MuSiC)
library(Biobase)
library(ggplot2)
library(dplyr)
library(ggcorrplot)
library(reshape)
library(corrplot)
library(tidyr)
library(caret)

In [2]:
args = commandArgs(trailingOnly=TRUE)
print("running BayesPrism with following args:")

if (length(args)==0) {
  stop("At least one argument must be supplied", call.=FALSE)
} else if (length(args)==6) {
  # default subtype location
  args[7] = args[3]
}
print(args)

[1] "running BayesPrism with following args:"
[1] "/home/cke/.local/share/jupyter/runtime/kernel-5c0b67b6-fe1e-40f9-ad68-8463d9ef97e3.json"


In [None]:
  /home/cke/Puram/scRNAlabels/ /home/cke/Puram/markers/top20markers_de_cor_symbol.txt test_fullpipeline_1_pseudobulk_top20markers_6LOT /home/cke/PseudoBulk/Results/MuSiC/

In [87]:
args[1] <- "/home/cke/Real/InputToWrapper/real_scRNAref.tsv"
args[2] <- "/home/cke/TCGA-HNSC.htseq_counts_exp2_symbol_samplexgene.tsv"
args[3] <- "/home/cke/Puram/scRNAlabels/"
args[4] <- "/home/cke/Puram/markers/top100markers_de_cor_symbol.txt"
args[5] <- "test_fullpipeline_real_top100markers"
args[6] <- "/home/cke/Real/Results/MuSiC/"
args[7] <- args[3]

In [88]:
path_scRNA <- args[1]
path_bulk <- args[2]
path_label <- args[3]
path_marker <- args[4]
FS_setup <- args[5]
path_out <- args[6]
path_label_subtype <- args[7]

In [85]:
path_scRNA

In [89]:
df_Puram <- read.table(path_scRNA,sep='\t',header=TRUE,row.names = 1,check.names = FALSE)
df_Puram = t(df_Puram)

df_bulk <- read.table(path_bulk,sep='\t',header=TRUE,row.names = 1,check.names = FALSE)
df_bulk <- t(df_bulk)

# for LOT setup, copy a test pseudobulk sample!
if (grepl('LOT',FS_setup)) {
    df_bulk_new <- cbind(df_bulk,df_bulk[,colnames(df_bulk)[1]])
    colnames(df_bulk_new) <- c(colnames(df_bulk),0)
    df_bulk <- df_bulk_new
}

label <- read.table(paste0(path_label,"cellcategory_simple.csv"),sep=',',header=TRUE,row.names = 1)
label_subtype <- read.table(paste0(path_label_subtype,"cellcategory_subtype.csv"),sep=',',header=TRUE,row.names = 1)

label$subtype <- label_subtype$cell_category
label$sampleID <- rownames(label)
label <- subset(label, row.names(label) %in% colnames(df_Puram))
label_subtype <- subset(label_subtype, row.names(label_subtype) %in% colnames(df_Puram))

if (path_marker!='noFS') {
    marker_genes <- scan(path_marker,character(),sep=',')
    }

In [74]:
# generate expressionset object and run MuSiC main function, process result cell type fraction
run_MuSiC <- function(df_Puram_filtered,df_bulk,label){
    # prepare scRNA-seq ref data, as expressionset object
    metadata <- data.frame(labelDescription= c("sampleID","cell_category", "subtype"), row.names=c("sampleID","cell_category", "subtype"))
    Puram.eset = ExpressionSet(assayData = data.matrix(df_Puram_filtered), phenoData =  new("AnnotatedDataFrame", data = label, varMetadata = metadata) )
    bulk_raw.eset = ExpressionSet(assayData = data.matrix(df_bulk))
    # Estimate cell type proportions
    Est.prop.tcga_raw = music_prop(bulk.eset = bulk_raw.eset, sc.eset = Puram.eset, clusters = 'cell_category',samples = 'sampleID')
    MuSiC_res <- data.matrix(Est.prop.tcga_raw$Est.prop.weighted)
    MuSiC_res <- data.matrix(Est.prop.tcga_raw$Est.prop.weighted)
    NNLS_res <- data.frame(Est.prop.tcga_raw$Est.prop.allgene)
    colnames(MuSiC_res)[5] <- 'other'
    MuSiC_res <- MuSiC_res[, order(colnames(MuSiC_res))]
    colnames(MuSiC_res)[9] <- 'T-cell'
    colnames(MuSiC_res)[1] <- 'B-cell'
    
    colnames(NNLS_res)[5] <- 'other'
    NNLS_res <- NNLS_res[, order(colnames(NNLS_res))]
    colnames(NNLS_res)[9] <- 'T-cell'
    colnames(NNLS_res)[1] <- 'B-cell'
    return(list(MuSiC_res,NNLS_res))
}

In [38]:
# generate expressionset object and run MuSiC main function, process result cell type fraction
run_MuSiC <- function(df_Puram_filtered,df_bulk,label){
    # prepare scRNA-seq ref data, as expressionset object
    metadata <- data.frame(labelDescription= c("sampleID","cell_category", "subtype"), row.names=c("sampleID","cell_category", "subtype"))
    Puram.eset = ExpressionSet(assayData = data.matrix(df_Puram_filtered), phenoData =  new("AnnotatedDataFrame", data = label, varMetadata = metadata) )
    bulk_raw.eset = ExpressionSet(assayData = data.matrix(df_bulk))
    # Estimate cell type proportions
    Est.prop.tcga_raw = music_prop(bulk.eset = bulk_raw.eset, sc.eset = Puram.eset, clusters = 'cell_category',samples = 'sampleID')
    MuSiC_res <- data.matrix(Est.prop.tcga_raw$Est.prop.weighted)
    colnames(MuSiC_res)[5] <- 'other'
    MuSiC_res <- MuSiC_res[, order(colnames(MuSiC_res))]
    colnames(MuSiC_res)[9] <- 'T-cell'
    colnames(MuSiC_res)[1] <- 'B-cell'
    return(MuSiC_res)
}

In [75]:
metadata <- data.frame(labelDescription= c("sampleID","cell_category", "subtype"), row.names=c("sampleID","cell_category", "subtype"))
Puram.eset = ExpressionSet(assayData = data.matrix(df_Puram_filtered), phenoData =  new("AnnotatedDataFrame", data = label, varMetadata = metadata) )
bulk_raw.eset = ExpressionSet(assayData = data.matrix(df_bulk))

In [76]:
Puram.eset

ExpressionSet (storageMode: lockedEnvironment)
assayData: 915 features, 4484 samples 
  element names: exprs 
protocolData: none
phenoData
  sampleNames: HN28_P15_D06_S330_comb HN28_P6_G05_S173_comb ...
    HNSCC20_P3_G06_S78_comb (4484 total)
  varLabels: cell_category subtype sampleID
  varMetadata: labelDescription
featureData: none
experimentData: use 'experimentData(object)'
Annotation:  

In [77]:
bulk_raw.eset

ExpressionSet (storageMode: lockedEnvironment)
assayData: 546 features, 38929 samples 
  element names: exprs 
protocolData: none
phenoData: none
featureData: none
experimentData: use 'experimentData(object)'
Annotation:  

In [78]:
label

Unnamed: 0_level_0,cell_category,subtype,sampleID
Unnamed: 0_level_1,<chr>,<chr>,<chr>
HN28_P15_D06_S330_comb,Fibroblast,Fibroblast,HN28_P15_D06_S330_comb
HN28_P6_G05_S173_comb,Fibroblast,Fibroblast,HN28_P6_G05_S173_comb
HN26_P14_D11_S239_comb,tumor,tumor 26,HN26_P14_D11_S239_comb
HN26_P14_H05_S281_comb,Fibroblast,Fibroblast,HN26_P14_H05_S281_comb
HN26_P25_H09_S189_comb,tumor,tumor 26,HN26_P25_H09_S189_comb
HN26_P14_H06_S282_comb,tumor,tumor 26,HN26_P14_H06_S282_comb
HN25_P25_C04_S316_comb,tumor,tumor 25,HN25_P25_C04_S316_comb
HN26_P25_A11_S107_comb,Fibroblast,Fibroblast,HN26_P25_A11_S107_comb
HN26_P25_C09_S129_comb,tumor,tumor 26,HN26_P25_C09_S129_comb
HNSCC26_P24_H05_S377_comb,tumor,tumor 26,HNSCC26_P24_H05_S377_comb


In [90]:
df_bulk

Unnamed: 0,TCGA-BB-4224-01A,TCGA-H7-7774-01A,TCGA-CV-6943-01A,TCGA-CN-5374-01A,TCGA-CQ-6227-01A,TCGA-CV-6959-01A,TCGA-F7-A61V-01A,TCGA-CV-7413-01A,TCGA-CV-7247-01A,TCGA-CR-5249-01A,⋯,TCGA-CV-6960-11A,TCGA-CV-A464-01A,TCGA-C9-A47Z-01A,TCGA-CN-6010-01A,TCGA-WA-A7GZ-11A,TCGA-CV-7235-01A,TCGA-CX-7086-01A,TCGA-CV-6935-11A,TCGA-P3-A6SW-01A,TCGA-HD-A6HZ-01A
TSPAN6,2237,2740,2686,2086,10167,1154,1978,1930,1066,2538,⋯,3340,929,1707,2218,2537,492,2741,8492,770,923
TNMD,2,0,0,1,9,6,1,1,1,0,⋯,0,2,0,0,41,0,0,3,1,0
DPM1,1606,1691,1649,2333,3021,2766,1762,1668,1760,1268,⋯,1388,2332,1926,1574,1171,976,1952,1578,1569,1183
SCYL3,1063,803,917,1288,537,527,482,671,600,1066,⋯,608,472,410,852,557,376,1003,818,620,607
C1orf112,1208,317,402,1105,459,747,331,476,874,950,⋯,206,386,210,729,127,363,1103,281,591,344
FGR,256,330,1535,639,362,906,147,477,204,924,⋯,304,331,134,550,165,488,214,419,462,925
CFH,177,5472,15358,5565,6963,12856,793,3669,2791,5289,⋯,6363,7799,1264,5558,4651,5178,1809,7368,2082,2482
FUCA2,2371,2316,3490,2065,4883,6120,2364,3366,1837,2584,⋯,1242,3028,1356,2921,647,2138,2639,1355,1603,1769
GCLC,2741,8370,4671,2902,3140,7448,2827,2324,9779,2971,⋯,2100,2494,3710,8749,3029,20007,13623,2276,31675,3949
NFYA,2515,816,2080,1777,2448,3527,1159,1459,1916,2572,⋯,1369,1711,1671,1596,848,4131,2530,1583,1473,899


In [91]:
df_Puram_filtered

Unnamed: 0,HN28_P15_D06_S330_comb,HN28_P6_G05_S173_comb,HN26_P14_D11_S239_comb,HN26_P14_H05_S281_comb,HN26_P25_H09_S189_comb,HN26_P14_H06_S282_comb,HN25_P25_C04_S316_comb,HN26_P25_A11_S107_comb,HN26_P25_C09_S129_comb,HNSCC26_P24_H05_S377_comb,⋯,HNSCC20_P3_B10_S22_comb,HNSCC20_P13_B11_S215_comb,HNSCC20_P3_C08_S32_comb,HNSCC17_P4_H03_S183_comb,HNSCC20_P3_F09_S69_comb,HNSCC17_P4_G12_S180_comb,HNSCC20_P13_C05_S221_comb,HNSCC17_P4_C12_S132_comb,HNSCC20_P3_H08_S92_comb,HNSCC20_P3_G06_S78_comb
APBB2,2.588050e+01,0.9299743,19.8700580,1.360206e+02,0.0000000,0.000000,0.000000,87.0884700,0.0000000,0.000000,⋯,0.000000,0.4500004,0.5299968,0.000000e+00,9.099917e-01,5.709965,0.000000,26.700508,0.0000000,0.0000000
SLC10A7,0.000000e+00,0.0000000,0.0000000,0.000000e+00,0.0000000,0.000000,0.000000,0.0000000,0.0000000,0.000000,⋯,5.380048,0.0000000,0.0000000,0.000000e+00,0.000000e+00,1.160023,0.000000,0.000000,0.0000000,71.9304660
CHD1,1.896077e+01,2.0300280,7.6399930,0.000000e+00,25.6301860,1.739980,0.000000,14.6502110,2.9299796,0.000000,⋯,7.830041,12.6294260,0.0000000,0.000000e+00,2.709964e+00,0.000000,15.770133,0.000000,1.9699738,0.0000000
TAOK3,0.000000e+00,0.0000000,63.2383840,0.000000e+00,112.6691060,18.040266,15.530114,108.8195300,0.0000000,0.000000,⋯,0.000000,24.4809600,0.0000000,0.000000e+00,1.519994e+00,0.000000,0.000000,37.899147,6.4599504,0.0000000
HNF1A-AS1,2.409973e+00,2.8599942,0.9600167,2.349991e+00,1.2000098,0.630003,13.370685,1.5899955,0.8100261,7.480058,⋯,1.290035,1.7299755,0.0000000,0.000000e+00,4.270046e+00,2.569959,3.660024,0.550000,1.4799789,0.0000000
CIAO1,0.000000e+00,0.0000000,87.7231200,0.000000e+00,44.4903220,55.650227,14.270435,0.0000000,40.4411100,130.548870,⋯,20.270237,91.6402600,80.6998400,0.000000e+00,6.552216e+01,210.468400,2.409973,0.000000,15.6205240,113.4795300
TNNT3,0.000000e+00,0.0000000,0.0000000,0.000000e+00,91.1693300,0.000000,0.000000,0.0000000,0.0000000,0.000000,⋯,0.000000,0.0000000,0.0000000,0.000000e+00,0.000000e+00,0.000000,0.000000,0.000000,0.0000000,0.0000000
CD82,3.399972e+00,0.0000000,68.0500700,0.000000e+00,200.4637800,267.650880,0.000000,1.2600248,241.6418600,104.089966,⋯,50.851803,190.1209100,2.4899700,2.168552e+02,5.755207e+01,165.767930,199.067950,19.979992,39.6915320,81.6986160
EMC8,1.703966e+02,0.0000000,46.0999700,0.000000e+00,0.0000000,28.979078,7.810030,0.0000000,0.0000000,0.000000,⋯,26.100029,66.4279300,97.3090600,9.397033e+01,0.000000e+00,0.000000,0.000000,42.190964,0.0000000,73.7100000
CHST6,0.000000e+00,4.6899550,0.8500373,7.099965e-01,0.1899996,0.550000,10.090312,97.5398940,0.3900025,5.639995,⋯,1.570009,0.8100261,1.0700366,4.500004e-01,1.960022e+00,2.260009,6.649975,0.000000,2.1799650,0.4500004


In [92]:
if (path_marker!='noFS'){
    df_Puram_filtered <- subset(df_Puram, row.names(df_Puram) %in% marker_genes)
} else {
    df_Puram_filtered <- df_Puram
}

list_res <- run_MuSiC(df_Puram_filtered,df_bulk,label)
MuSiC_res <- list_res$MuSiC_res
NNLS_res <- list_res$NNLS_res

Creating Relative Abundance Matrix...

Creating Variance Matrix...

Creating Library Size Matrix...

Used 824 common genes...

Used 10 cell types in deconvolution...

TCGA-BB-4224-01A has common genes 810 ...

TCGA-H7-7774-01A has common genes 780 ...

TCGA-CV-6943-01A has common genes 794 ...

TCGA-CN-5374-01A has common genes 802 ...

TCGA-CQ-6227-01A has common genes 790 ...

TCGA-CV-6959-01A has common genes 794 ...

TCGA-F7-A61V-01A has common genes 781 ...

TCGA-CV-7413-01A has common genes 796 ...

TCGA-CV-7247-01A has common genes 785 ...

TCGA-CR-5249-01A has common genes 793 ...

TCGA-CQ-5331-01A has common genes 783 ...

TCGA-BB-8601-01A has common genes 780 ...

TCGA-CV-A45W-01A has common genes 793 ...

TCGA-CQ-7063-01A has common genes 771 ...

TCGA-CN-5373-01A has common genes 783 ...

TCGA-CV-A45T-01A has common genes 793 ...

TCGA-HD-A4C1-01A has common genes 778 ...

TCGA-CV-7438-11A has common genes 787 ...

TCGA-CV-5444-01A has common genes 790 ...

TCGA-BA-5556-01A

TCGA-CV-7421-01A has common genes 788 ...

TCGA-CQ-5323-01A has common genes 781 ...

TCGA-UF-A718-01A has common genes 794 ...

TCGA-KU-A6H7-01A has common genes 790 ...

TCGA-CN-5363-01A has common genes 782 ...

TCGA-CN-4730-01A has common genes 781 ...

TCGA-CN-5370-01A has common genes 800 ...

TCGA-UF-A71D-01A has common genes 781 ...

TCGA-UF-A71E-01A has common genes 784 ...

TCGA-CV-7416-11A has common genes 781 ...

TCGA-CV-7104-01A has common genes 784 ...

TCGA-CV-A45P-01A has common genes 790 ...

TCGA-CV-7238-01A has common genes 800 ...

TCGA-CV-7099-01A has common genes 781 ...

TCGA-CV-A6JO-01B has common genes 777 ...

TCGA-CV-5971-01A has common genes 790 ...

TCGA-D6-A6EN-01A has common genes 762 ...

TCGA-CN-4729-01A has common genes 794 ...

TCGA-CV-6960-01A has common genes 784 ...

TCGA-IQ-A61H-01A has common genes 792 ...

TCGA-CV-6956-01A has common genes 783 ...

TCGA-MT-A51W-01A has common genes 781 ...

TCGA-CQ-6221-01A has common genes 777 ...

TCGA-CV-724

TCGA-H7-A6C5-11A has common genes 794 ...

TCGA-H7-8501-01A has common genes 774 ...

TCGA-IQ-7632-01A has common genes 777 ...

TCGA-CV-A6JZ-01A has common genes 781 ...

TCGA-HD-7832-01A has common genes 769 ...

TCGA-CV-A6JE-01A has common genes 774 ...

TCGA-CN-4725-01A has common genes 787 ...

TCGA-BA-5151-01A has common genes 788 ...

TCGA-CN-4726-01A has common genes 774 ...

TCGA-F7-A624-01A has common genes 775 ...

TCGA-BA-A6DL-01A has common genes 789 ...

TCGA-BA-6869-01A has common genes 797 ...

TCGA-QK-A6IH-01A has common genes 771 ...

TCGA-HD-7754-01A has common genes 792 ...

TCGA-CV-A6JN-01A has common genes 778 ...

TCGA-TN-A7HI-01A has common genes 804 ...

TCGA-CR-7365-01A has common genes 782 ...

TCGA-CV-5440-01A has common genes 792 ...

TCGA-CV-6956-11A has common genes 783 ...

TCGA-MZ-A5BI-01A has common genes 798 ...

TCGA-BB-4228-01A has common genes 794 ...

TCGA-QK-A6II-01A has common genes 760 ...

TCGA-CV-7252-11A has common genes 793 ...

TCGA-CV-597

In [103]:
list_res[1]

Unnamed: 0,B-cell,Dendritic,Endothelial,Fibroblast,Macrophage,Mast,myocyte,other,T-cell,tumor
TCGA-BB-4224-01A,0.000000e+00,3.964843e-04,0.007915017,0.019124071,0.003512223,7.138458e-05,5.595701e-03,0.000000e+00,4.616951e-03,0.9587681691
TCGA-H7-7774-01A,6.878687e-04,1.124047e-03,0.003222896,0.000410047,0.009727659,2.148952e-04,2.163649e-05,8.790180e-03,2.285588e-03,0.9735151836
TCGA-CV-6943-01A,1.163821e-02,1.356121e-03,0.023924146,0.111994251,0.111859930,5.352875e-04,1.263657e-03,6.448526e-05,1.268459e-03,0.7360954495
TCGA-CN-5374-01A,8.863048e-03,5.272455e-03,0.019330301,0.016299824,0.041702588,2.607036e-04,1.659818e-02,9.214550e-04,5.741275e-03,0.8850101726
TCGA-CQ-6227-01A,9.230941e-05,0.000000e+00,0.025547257,0.185134693,0.012943776,2.515838e-04,7.379379e-02,0.000000e+00,0.000000e+00,0.7022365865
TCGA-CV-6959-01A,2.758318e-03,0.000000e+00,0.022741526,0.046153678,0.031534268,1.706089e-04,8.240982e-02,0.000000e+00,0.000000e+00,0.8142317773
TCGA-F7-A61V-01A,8.154545e-04,0.000000e+00,0.007714979,0.002575240,0.000000000,0.000000e+00,3.004476e-02,5.658383e-02,0.000000e+00,0.9022657364
TCGA-CV-7413-01A,2.928950e-04,8.011098e-05,0.007436195,0.068289047,0.030191181,1.079430e-04,0.000000e+00,0.000000e+00,0.000000e+00,0.8936026288
TCGA-CV-7247-01A,0.000000e+00,0.000000e+00,0.020838423,0.042469657,0.000000000,1.491673e-05,7.312261e-02,0.000000e+00,1.633980e-04,0.8633909921
TCGA-CR-5249-01A,1.894002e-02,9.936199e-03,0.020447085,0.067472288,0.129776063,1.028893e-03,7.640565e-06,2.954841e-03,1.911089e-03,0.7475258848
