In [1]:
import sys, os
import argparse

from Deconvolution.BLADE import Framework
import numpy as np
from numpy import transpose as t
import itertools
import pickle
from scipy.optimize import nnls
from scipy.stats import gaussian_kde
from matplotlib.colors import LogNorm
from sklearn.svm import SVR
from sklearn.svm import NuSVR

from sklearn.metrics import mean_squared_error as mse
import pandas as pd
from tqdm import trange,tqdm
# modules for visualization
import qgrid
from matplotlib import pyplot as plt
import seaborn as sns
import anndata as ad
import scanpy as sc
import scanorama
from sklearn.model_selection import LeaveOneOut,StratifiedKFold,KFold
import cycler

import warnings
warnings.filterwarnings('ignore')

In [2]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=100, facecolor='white')

scanpy==1.9.1 anndata==0.8.0 umap==0.5.3 numpy==1.21.5 scipy==1.8.0 pandas==1.4.2 scikit-learn==1.0.2 statsmodels==0.13.2 python-igraph==0.9.9 louvain==0.7.1 pynndescent==0.5.7


In [3]:
def getloclist(wd,keyword=["top","marker","DEG"]):
    loc_list = []
    for root, dirs, files in os.walk(wd):
        for file in files:
            for key in keyword:
                if key in file:
                    loc = os.path.join(root, file)
                    loc_list.append(loc) #get file location
                    break
    return loc_list

In [29]:
final_adata = sc.read_h5ad(path_adata)
final_adata.uns['log1p']["base"] = None
final_adata_mean = final_adata.copy()
sc.pp.log1p(final_adata_mean)



### Prepare scRNA signature file for CIBERSORTx input
Run BayesPrism first to get signature.csv file in place

In [4]:
def prepare_CIBERSORTx(path_label,path_signature):
    labels = pd.read_csv(path_label+"cellcategory_simple.csv",index_col=0)
    dict_label = labels.to_dict()['cell_category']
    scRNAref =  pd.read_csv(path_signature,index_col=0)
    scRNAref.columns = scRNAref.columns.map(dict_label)
    scRNAref.to_csv(path_signature.split(".")[0]+"_toCIBERSORTx.tsv",sep='\t')
    # see preapare_BLADE for test set, signature file from runBP 

In [8]:
# CAUTIONS! this is not part of the script but you need to run it in notebook after the pipeline script finishes
list_CIBERSORTx_signature = getloclist("/home/cke/PseudoBulk/Results/",keyword=["_signature.tsv"])
for i in list_CIBERSORTx_signature:
    prepare_CIBERSORTx("/home/cke/Puram/scRNAlabels/",i)

## File preparation
### Pseudobulk: Cross-validation of dataset

In [5]:
def prepare_BLADE(final_adata_mean,final_adata,mode,out):
    # Leave-one-out CV
#prepare var and mean signature matrix for BLADE
    merge_celltype = pd.merge(final_adata_mean.to_df(),final_adata.obs,left_index=True,right_index=True)
    if mode == 'real':
        counts_Puram_mean = merge_celltype.groupby(['Cell_type']).aggregate(np.mean).fillna(0)[final_adata_mean.to_df().columns]
        counts_Puram_std = merge_celltype.groupby(['Cell_type']).aggregate(np.std).fillna(0)[final_adata_mean.to_df().columns]
        counts_Puram_mean.T.to_csv(out+mode+"_mean.tsv",sep='\t')
        #save signature matrices for TCGA deconv
        counts_Puram_std.T.to_csv(out+mode+"_std.tsv",sep='\t')
        list_LOT = []

    elif mode == 'pseudobulk':
        merge_celltype_pseudobulk = merge_celltype.groupby(['batch']).sum()
        sample = merge_celltype_pseudobulk.index.tolist()
        list_LOT = []
        for train_index, test_index in LeaveOneOut().split(sample):
            print("LOT TRAIN:", train_index, "TEST:", test_index)
            train_sample = merge_celltype_pseudobulk.iloc[train_index,].index.tolist()
            train = merge_celltype[merge_celltype['batch'].isin(train_sample)]
            test_sample = merge_celltype_pseudobulk.iloc[test_index,].index.tolist()
            list_LOT.append(test_sample)
            print("Leaving out: ",test_sample)
            test = merge_celltype_pseudobulk[merge_celltype_pseudobulk.index.isin(test_sample)]
            counts_Puram_mean = train.groupby(['Cell_type']).aggregate(np.mean).fillna(0)[final_adata_mean.to_df().columns]
            counts_Puram_std = train.groupby(['Cell_type']).aggregate(np.std).fillna(0)[final_adata_mean.to_df().columns]
            counts_Puram_mean.T.to_csv(out+mode+"_LOT"+"".join(str(i) for i in test_sample)+"_mean.tsv",sep='\t')
            counts_Puram_std.T.to_csv(out+mode+"_LOT"+"".join(str(i) for i in test_sample)+"_std.tsv",sep='\t')
            # save the leftout testset for all methods
            test.iloc[:,:final_adata.to_df().shape[1]].to_csv(out+mode+"_LOT"+"".join(str(i) for i in test_sample)+"_test.tsv",sep='\t')
            # for CIBERSORTx
            test.iloc[:,:final_adata.to_df().shape[1]].T.to_csv(out+mode+"_LOT"+"".join(str(i) for i in test_sample)+"_test_transpose.tsv",sep='\t')
    return list_LOT

In [55]:
def prepare_BLADE_kfoldCV(final_adata_mean,final_adata,mode,out,n_splits=5):
    #stratified n-fold CV
#prepare var and mean signature matrix for BLADE
    merge_celltype = pd.merge(final_adata_mean.to_df(),final_adata.obs,left_index=True,right_index=True)
    if mode == 'real':
        counts_Puram_mean = merge_celltype.groupby(['Cell_type']).aggregate(np.mean).fillna(0)[final_adata_mean.to_df().columns]
        counts_Puram_std = merge_celltype.groupby(['Cell_type']).aggregate(np.std).fillna(0)[final_adata_mean.to_df().columns]
        counts_Puram_mean.T.to_csv(out+mode+"_mean.tsv",sep='\t')
        #save signature matrices for TCGA deconv
        counts_Puram_std.T.to_csv(out+mode+"_std.tsv",sep='\t')
        list_CV = []

    elif mode == 'pseudobulk':
        singlecells = merge_celltype.index.tolist()
        batch = merge_celltype['batch'].tolist()
        list_CV = []
        count = 1
        for train_index, test_index in StratifiedKFold(n_splits=n_splits).split(singlecells,batch):
            print("CV TRAIN:",count , len(train_index), "TEST:", len(test_index))
            train_sc = merge_celltype.iloc[train_index,].index.tolist()
            train = merge_celltype.loc[train_sc,]
            test_sc = merge_celltype.iloc[test_index,].index.tolist()
            list_CV.append(str(count))
            test = merge_celltype.loc[test_sc,].groupby(['batch']).sum()
            counts_Puram_mean = train.groupby(['Cell_type']).aggregate(np.mean).fillna(0)[final_adata_mean.to_df().columns]
            counts_Puram_std = train.groupby(['Cell_type']).aggregate(np.std).fillna(0)[final_adata_mean.to_df().columns]
            counts_Puram_mean.T.to_csv(out+mode+"_CV"+str(count)+"_mean.tsv",sep='\t')
            counts_Puram_std.T.to_csv(out+mode+"_CV"+str(count)+"_std.tsv",sep='\t')
            # save the leftout testset for all methods
            test.iloc[:,:final_adata.to_df().shape[1]].to_csv(out+mode+"_CV"+str(count)+"_test.tsv",sep='\t')
            # for CIBERSORTx
            test.iloc[:,:final_adata.to_df().shape[1]].T.to_csv(out+mode+"_CV"+str(count)+"_test_transpose.tsv",sep='\t')
            count+=1
    return list_CV

In [56]:
list_CV = prepare_BLADE_kfoldCV(final_adata_mean,final_adata,'pseudobulk',"/home/cke/PseudoBulk/InputToWrapper/")

CV TRAIN: 1 3587 TEST: 897
CV TRAIN: 2 3587 TEST: 897
CV TRAIN: 3 3587 TEST: 897
CV TRAIN: 4 3587 TEST: 897
CV TRAIN: 5 3588 TEST: 896


In [6]:
def prepare_Rdeconv(final_adata,mode,out):
    merge_sample = pd.merge(final_adata.to_df(),final_adata.obs,left_index=True,right_index=True)
    if mode == 'real':
        scRNA_input = final_adata.to_df().loc[:,final_adata.to_df().columns]
        scRNA_input.to_csv(out+mode+"_scRNAref.tsv",sep='\t')
        list_LOT = []
        
    elif mode == 'pseudobulk':
        merge_sample_pseudobulk = merge_sample.groupby(['batch']).sum()
        sample = merge_sample_pseudobulk.index.tolist()
        list_LOT = []
        for train_index, test_index in LeaveOneOut().split(sample):
            print("LOT TRAIN:", train_index, "TEST:", test_index)
            train_sample = merge_sample_pseudobulk.iloc[train_index,].index.tolist()
            train = merge_sample[merge_sample['batch'].isin(train_sample)]
            test_sample = merge_sample_pseudobulk.iloc[test_index,].index.tolist()
            list_LOT.append(test_sample)
            print("Leaving out: ",test_sample)
            test = merge_sample_pseudobulk[merge_sample_pseudobulk.index.isin(test_sample)]
            train.iloc[:,:final_adata.to_df().shape[1]].to_csv(out+mode+"_LOT"+"".join(str(i) for i in test_sample)+"_scRNAtrain.tsv",sep='\t')
            test.iloc[:,:final_adata.to_df().shape[1]].to_csv(out+mode+"_LOT"+"".join(str(i) for i in test_sample)+"_test.tsv",sep='\t')
            # for CIBERSORTx
            test.iloc[:,:final_adata.to_df().shape[1]].T.to_csv(out+mode+"_LOT"+"".join(str(i) for i in test_sample)+"_test_transpose.tsv",sep='\t')
    return list_LOT

In [54]:
def prepare_Rdeconv_kfoldCV(final_adata,mode,out,n_splits=5):
    merge_sample = pd.merge(final_adata.to_df(),final_adata.obs,left_index=True,right_index=True)
    if mode == 'real':
        scRNA_input = final_adata.to_df().loc[:,final_adata.to_df().columns]
        scRNA_input.to_csv(out+mode+"_scRNAref.tsv",sep='\t')
        list_CV = []
        
    elif mode == 'pseudobulk':
        singlecells = merge_celltype.index.tolist()
        batch = merge_celltype['batch'].tolist()
        list_CV = []
        count = 1
        for train_index, test_index in StratifiedKFold(n_splits=n_splits).split(singlecells,batch):
            print("CV TRAIN:",count , len(train_index), "TEST:", len(test_index))
            train_sc = merge_celltype.iloc[train_index,].index.tolist()
            train = merge_celltype.loc[train_sc,]
            test_sc = merge_celltype.iloc[test_index,].index.tolist()
            list_CV.append(str(count))
            test = merge_celltype.loc[test_sc,].groupby(['batch']).sum()
            train.iloc[:,:final_adata.to_df().shape[1]].to_csv(out+mode+"_CV"+str(count)+"_scRNAtrain.tsv",sep='\t')
            test.iloc[:,:final_adata.to_df().shape[1]].to_csv(out+mode+"_CV"+str(count)+"_test.tsv",sep='\t')
            # for CIBERSORTx
            test.iloc[:,:final_adata.to_df().shape[1]].T.to_csv(out+mode+"_CV"+str(count)+"_test_transpose.tsv",sep='\t')
            count+=1
    return list_CV

In [52]:
list_CV = prepare_Rdeconv_kfoldCV(final_adata,'pseudobulk',"/home/cke/PseudoBulk/InputToWrapper/")

CV TRAIN: 1 3587 TEST: 897
CV TRAIN: 2 3587 TEST: 897
CV TRAIN: 3 3587 TEST: 897
CV TRAIN: 4 3587 TEST: 897
CV TRAIN: 5 3588 TEST: 896


In [57]:
list_CV

['1', '2', '3', '4', '5']

### Running each method in cmd

In [58]:
def run_cmd(mode,out,out_res,dict_FS,folder_marker,
            test_sample=False,path_label="home/cke/Puram/scRNAlabels/",
            path_bulk=False,name="unnamed_job"):
    # "out" is folder where input files prepared in last steps
    if mode == 'real':
        for FS_setup, marker_file in dict_FS.items():
            print(mode,"mode: now with feature selection setup: \r",FS_setup)
            cmd_MuSiC = "".join(["Rscript /home/cke/runscripts/runMuSiC.r ", 
                                 out,mode,"_scRNAref.tsv ", # scRNA signature matrix
                                path_bulk," ", # real bulk RNAseq matrix
                                 path_label," ", 
                              marker_file," ",
                               name+"_"+mode+"_"+FS_setup+" ", # job name FS_setup
                              out_res+"MuSiC/"," &", # output frac folder
                                ])
            cmd_BP = "".join(["Rscript /home/cke/runscripts/runBP.r ", 
                                 out,mode,"_scRNAref.tsv ", # signature matrix
                                path_bulk," ", # testset pseudobulk matrix
                                 path_label," ",
                              marker_file," ",
                               name+"_"+mode+"_"+FS_setup+" ", # job name FS_setup
                              out_res+"BayesPrism/"," ", # output frac folder
                              out_res," &" # output CIBERSORTx prelim signature
                                ])
            print("now running in cmd: \r",cmd_MuSiC)
            print("now running in cmd: \r",cmd_BP)
            os.system(cmd_MuSiC)
            os.system(cmd_BP)
        # for BLADE, feature selection is done within wrapper
        # R is lame, plz use python to develop new tool :)
        cmd_BLADE = "".join(["python /home/cke/runscripts/runBLADE.py ", 
                             out,mode,"_std.tsv ", # std signature matrix
                             out,mode,"_mean.tsv ", # mean signature matrix
                            path_bulk," ", # testset pseudobulk matrix
                             out_res+"BLADE/"," ", # output folder
                             "--folder_marker ",folder_marker," ", # marker folder
                             "--name ",name+"_"+mode," &" # job name, background run
                            ])
        print("now running in cmd: \r",cmd_BLADE)
        os.system(cmd_BLADE)
    elif mode == "pseudobulk":
        for FS_setup, marker_file in dict_FS.items():
            print(mode,"mode: now with feature selection setup: \r",FS_setup)
            cmd_MuSiC = "".join(["Rscript /home/cke/runscripts/runMuSiC.r ", 
                                 out,mode,"_CV",str(test_sample),"_scRNAtrain.tsv ", # signature matrix
                                out,mode,"_CV",str(test_sample),"_test.tsv ", # testset pseudobulk matrix
                                 path_label," ",
                              marker_file," ",
                               name+"_"+mode+"_"+FS_setup+"_"+str(test_sample)+"CV ", # job name, FS_setup
                              out_res+"MuSiC/"," &", # output frac folder
                                ])
            cmd_BP = "".join(["Rscript /home/cke/runscripts/runBP.r ", 
                                 out,mode,"_CV",str(test_sample),"_scRNAtrain.tsv ", # signature matrix
                                out,mode,"_CV",str(test_sample),"_test.tsv ", # testset pseudobulk matrix
                                 path_label," ",
                              marker_file," ",
                               name+"_"+mode+"_"+FS_setup+"_"+str(test_sample)+"CV ", # job name, FS_setup
                              out_res+"BayesPrism/"," ", # output frac folder
                              out_res," &" # output CIBERSORTx prelim signature
                                ])
            print("now running in cmd: \r",cmd_MuSiC)
            print("now running in cmd: \r",cmd_BP)
            os.system(cmd_MuSiC)
            os.system(cmd_BP)
        # for BLADE, feature selection is done within wrapper
        # R is lame, plz use python to develop new tool :)
        cmd_BLADE = "".join(["python /home/cke/runscripts/runBLADE.py ", 
                             out,mode,"_CV",str(test_sample),"_std.tsv ", # std signature matrix
                             out,mode,"_CV"+str(test_sample),"_mean.tsv ", # mean signature matrix
                            out,mode,"_CV",str(test_sample),"_test.tsv ", # testset pseudobulk matrix
                             out_res+"BLADE/"," ", # output folder
                             "--folder_marker ",folder_marker," ", # marker folder
                             "--name ",name+"_"+mode+"_"+str(test_sample)+"_CV"," &" # job name, background run
                            ])
        print("now running in cmd: \r",cmd_BLADE)
        os.system(cmd_BLADE)

In [59]:
def run_cmd_noFS(mode,out,out_res,test_sample=False,
                 path_label="home/cke/Puram/scRNAlabels/",
                 path_bulk=False,name="unnamed_noFS_job"):
    # "out" is folder where input files prepared in last steps
    marker_file = "noFS"
    if mode == 'real':
        print(mode,"mode: no feature selection\r")
        cmd_MuSiC = "".join(["Rscript /home/cke/runscripts/runMuSiC.r ", 
                             out,mode,"_scRNAref.tsv ", # scRNA signature matrix
                            path_bulk," ", # real bulk RNAseq matrix
                             path_label," ", 
                          marker_file," ",
                           name+"_"+mode+"_"+"noFS"+" ", # job name, FS_setup
                          out_res+"MuSiC/"," &", # output frac folder
                            ])
        cmd_BP = "".join(["Rscript /home/cke/runscripts/runBP.r ", 
                             out,mode,"_scRNAref.tsv ", # signature matrix
                            path_bulk," ", # testset pseudobulk matrix
                             path_label," ",
                          marker_file," ",
                           name+"_"+mode+"_"+"noFS"+" ", # job name FS_setup
                          out_res+"BayesPrism/"," ", # output frac folder
                          out_res," &" # output CIBERSORTx prelim signature
                            ])
        print("now running in cmd: \r",cmd_MuSiC)
        print("now running in cmd: \r",cmd_BP)
        os.system(cmd_MuSiC)
        os.system(cmd_BP)
        # for BLADE, feature selection is done within wrapper
        # R is lame, plz use python to develop new tool :)
        cmd_BLADE = "".join(["python /home/cke/runscripts/runBLADE.py ", 
                             out,mode,"_std.tsv ", # std signature matrix
                             out,mode,"_mean.tsv ", # mean signature matrix
                            path_bulk," ", # testset pseudobulk matrix
                             out_res+"BLADE/"," ", # output folder
                             "--name ",name+"_"+mode,"_noFS &" # job name, background run
                            ])
        print("now running in cmd: \r",cmd_BLADE)
        os.system(cmd_BLADE)
    elif mode == "pseudobulk":
        print(mode,"mode: no feature selection\r")
        cmd_MuSiC = "".join(["Rscript /home/cke/runscripts/runMuSiC.r ", 
                             out,mode,"_CV",str(test_sample),"_scRNAtrain.tsv ", # signature matrix
                            out,mode,"_CV",str(test_sample),"_test.tsv ", # testset pseudobulk matrix
                             path_label," ",
                          marker_file," ",
                           name+"_"+mode+"_"+"noFS"+"_"+str(test_sample)+"_CV ", # job name, FS_setup
                          out_res+"MuSiC/"," &", # output frac folder
                            ])
        print("now running in cmd: \r",cmd_MuSiC)
        cmd_BP = "".join(["Rscript /home/cke/runscripts/runBP.r ", 
                             out,mode,"_CV",str(test_sample),"_scRNAtrain.tsv ", # signature matrix
                            out,mode,"_CV",str(test_sample),"_test.tsv ", # testset pseudobulk matrix
                             path_label," ",
                          marker_file," ",
                           name+"_"+mode+"_"+"noFS"+"_"+str(test_sample)+"_CV ", # job name, FS_setup
                          out_res+"BayesPrism/"," ", # output frac folder
                          out_res," &" # output CIBERSORTx prelim signature
                            ])
        print("now running in cmd: \r",cmd_BP)
        os.system(cmd_MuSiC)
        os.system(cmd_BP)
        # for BLADE, feature selection is done within wrapper
        # R is lame, plz use python to develop new tool :)
        cmd_BLADE = "".join(["python /home/cke/runscripts/runBLADE.py ", 
                             out,mode,"_CV",str(test_sample),"_std.tsv ", # std signature matrix
                             out,mode,"_CV"+str(test_sample),"_mean.tsv ", # mean signature matrix
                            out,mode,"_CV",str(test_sample),"_test.tsv ", # testset pseudobulk matrix
                             out_res+"BLADE/"," ", # output folder
                             "--name ",name+"_"+mode+"_"+str(test_sample)+"_CV_noFS"," &" # job name, background run
                            ])
        print("now running in cmd: \r",cmd_BLADE)
        os.system(cmd_BLADE)

In [60]:
def main(path_adata,mode,out,out_res,path_label,path_bulk=False,folder_marker=False,
         name="unnamed_job",keyword=["top","marker","DEG"]):
    # "out" is folder where input files prepared in last steps
#     print(folder_marker)
    final_adata = sc.read_h5ad(path_adata)
    final_adata.uns['log1p']["base"] = None
    final_adata_mean = final_adata.copy()
    sc.pp.log1p(final_adata_mean)
    if folder_marker:
        print("running Feature selection!\r")
        list_markers = getloclist(folder_marker,keyword)
        dict_FS = {}
        for marker_file in list_markers:
#             marker_genes =  pd.read_csv(marker_file,header=None).iloc[0,:]
            dict_FS[os.path.split(marker_file)[1].split("_")[0]] = marker_file
            
        if mode == 'real':
            list_CV = prepare_BLADE_kfoldCV(final_adata_mean,final_adata,mode,out)
            list_CV = prepare_Rdeconv_kfoldCV(final_adata,mode,out)
            run_cmd(mode,out,out_res,dict_FS,folder_marker,
                    path_label=path_label,path_bulk=path_bulk,name=name)
            
        elif mode == 'pseudobulk':
            list_CV = prepare_BLADE_kfoldCV(final_adata_mean,final_adata,mode,out)
            list_CV = prepare_Rdeconv_kfoldCV(final_adata,mode,out)
            for test_sample in list_CV:
                run_cmd(mode,out,out_res,dict_FS,folder_marker,
                        test_sample=test_sample,path_label=path_label,name=name)
#                 if True: 
#                     prepare_CIBERSORTx(path_label,path_signature)
    else:
        print("no Feature selection!\r")
        if mode == 'real':
            list_CV = prepare_BLADE_kfoldCV(final_adata_mean,final_adata,mode,out)
            list_CV = prepare_Rdeconv_kfoldCV(final_adata,mode,out)
            run_cmd_noFS(mode,out,out_res,
                    path_label=path_label,path_bulk=path_bulk,name=name)
        elif mode == 'pseudobulk':
            list_CV = prepare_BLADE_kfoldCV(final_adata_mean,final_adata,mode,out)
            list_CV = prepare_Rdeconv_kfoldCV(final_adata,mode,out)
            for test_sample in list_CV:
                run_cmd_noFS(mode,out,out_res,
                        test_sample=test_sample,path_label=path_label,name=name)

In [61]:
def parse_args():
    """
        Parses inputs from the commandline.
        :return: inputs as a Namespace object
    """
    parser = argparse.ArgumentParser(description='Generates pipeline')
    # Arguments
    parser.add_argument('path_adata', help='directory of preprocessed raw scRNA anndata object')
    parser.add_argument('mode', help='scheme for data processing', choices=['pseudobulk','real'])
    parser.add_argument('out', help='output CV input directory')
    parser.add_argument('out_res', help='output of decon methods directory')
    parser.add_argument('path_label', help='labels of single-cell type identity directory')
    parser.add_argument('--path_bulk', help='bulk rnaseq data directory',default=False)
    parser.add_argument('--folder_marker', help='the folder where markers is stored',default=False)
    parser.add_argument('--name', help='give this job a name to help remember',defualt='unnamed_job')
    parser.add_argument('--keyword', help='keyword in marker file name to identify them',default=["top","marker","DEG"])
    return parser.parse_args()

In [None]:
if __name__ == "__main__":
    args = parse_args()
    path_adata = args.path_adata
    mode = args.mode
    out = args.out
    out_res = args.out_res
    path_label = args.path_label
    path_bulk = args.path_bulk
    name = args.name
    folder_marker = args.folder_marker
    keyword = args.keyword
    if name == "unnamed_job":
        print("You did not name this job!\r")
    else:
        print(name," - pipeline initiated! Welcome, contact author for support: kechanglin1998@hotmail.com\r")

    if mode == "real":
        if path_bulk == False:
            raise ValueError("no bulk RNAseq data input! LOAD UP YOUR AMMO!\r")
        main(path_adata,mode,out,out_res,path_label,path_bulk=path_bulk,folder_marker=folder_marker,
         name=name,keyword=keyword)
    elif mode == 'pseudobulk':
        main(path_adata,mode,out,out_res,path_label,path_bulk=path_bulk,folder_marker=folder_marker,
         name=name,keyword=keyword)


In [62]:
# testing code block
path_adata = "/home/cke/Puram/Puram_scanpy.h5ad"
mode = "real"
out = "/home/cke/PseudoBulk/InputToWrapper/"
out_res = "/home/cke/Real/TCGAResults/"
path_label = "/home/cke/Puram/scRNAlabels/"
path_bulk = "/home/cke/TCGA-HNSC.htseq_counts_exp2_symbol_samplexgene.tsv"
name = "test_fullpipeline_1"
folder_marker = "/home/cke/Puram/markers/" #only 20 marker file is ready now to save time

keyword = ["top","marker","DEG"]

In [17]:
# testing code block
path_adata = "/home/cke/Puram/Puram_scanpy.h5ad"
mode = "pseudobulk"
out = "/home/cke/PseudoBulk/InputToWrapper/"
out_res = "/home/cke/PseudoBulk/Results/"
path_label = "/home/cke/Puram/scRNAlabels/"
path_bulk = False
name = "test_fullpipeline_1"
folder_marker = "/home/cke/Puram/markers/" #only 20 marker file is ready now to save time

keyword = ["top","marker","DEG"]

In [63]:
# testing code block

if name == "unnamed_job":
    print("You did not name this job!\r")
else:
    print(name," - pipeline initiated! Welcome, contact author for support: kechanglin1998@hotmail.com\r")

if mode == "real":
    if path_bulk == False:
        raise ValueError("no bulk RNAseq data input! LOAD UP YOUR AMMO!\r")
    main(path_adata,mode,out,out_res,path_label,path_bulk=path_bulk,folder_marker=folder_marker,
     name=name,keyword=keyword)
elif mode == 'pseudobulk':
    main(path_adata,mode,out,out_res,path_label,path_bulk=path_bulk,folder_marker=folder_marker,
     name=name,keyword=keyword)

test_fullpipeline_1  - pipeline initiated! Welcome, contact author for support: kechanglin1998@hotmail.com
running Feature selection!
 top20markersw with feature selection setup: 
 Rscript /home/cke/runscripts/runMuSiC.r /home/cke/PseudoBulk/InputToWrapper/real_scRNAref.tsv /home/cke/TCGA-HNSC.htseq_counts_exp2_symbol_samplexgene.tsv /home/cke/Puram/scRNAlabels/ /home/cke/Puram/markers/top20markers_de_cor_symbol.txt test_fullpipeline_1_real_top20markers /home/cke/Real/TCGAResults/MuSiC/ &
 Rscript /home/cke/runscripts/runBP.r /home/cke/PseudoBulk/InputToWrapper/real_scRNAref.tsv /home/cke/TCGA-HNSC.htseq_counts_exp2_symbol_samplexgene.tsv /home/cke/Puram/scRNAlabels/ /home/cke/Puram/markers/top20markers_de_cor_symbol.txt test_fullpipeline_1_real_top20markers /home/cke/Real/TCGAResults/BayesPrism/ /home/cke/Real/TCGAResults/ &


Loading required package: nnls
Loading required package: ggplot2


now running in cmd:  python /home/cke/runscripts/runBLADE.py /home/cke/PseudoBulk/InputToWrapper/real_std.tsv /home/cke/PseudoBulk/InputToWrapper/real_mean.tsv /home/cke/TCGA-HNSC.htseq_counts_exp2_symbol_samplexgene.tsv /home/cke/Real/TCGAResults/BLADE/ --folder_marker /home/cke/Puram/markers/ --name test_fullpipeline_1_real &


Loading required package: snowfall
Loading required package: snow
Loading required package: BiocGenerics

Attaching package: ‘BiocGenerics’

The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs

The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, basename, cbind, colnames,
    dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
    grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
    order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
    rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
    union, unique, unsplit, which.max, which.min

Welcome to Bioconductor

    Vignettes contain introductory material; view with
    'browseVignettes()'. To cite Bioconductor, see
    'citation("Biobase")', and for packages 'citation("pkgname")'.


Attaching package: ‘dplyr’

The following object is masked from ‘package:Biobase’:

    combine

The following objects are ma

[1] "running BayesPrism with following args:"
[1] "/home/cke/PseudoBulk/InputToWrapper/real_scRNAref.tsv"       
[2] "/home/cke/TCGA-HNSC.htseq_counts_exp2_symbol_samplexgene.tsv"
[3] "/home/cke/Puram/scRNAlabels/"                                
[4] "/home/cke/Puram/markers/top20markers_de_cor_symbol.txt"      
[5] "test_fullpipeline_1_real_top20markers"                       
[6] "/home/cke/Real/TCGAResults/MuSiC/"                           
[7] "/home/cke/Puram/scRNAlabels/"                                


replacing previous import ‘gplots::lowess’ by ‘stats::lowess’ when loading ‘TED’ 


[1] "running BayesPrism with following args:"
[1] "/home/cke/PseudoBulk/InputToWrapper/real_scRNAref.tsv"       
[2] "/home/cke/TCGA-HNSC.htseq_counts_exp2_symbol_samplexgene.tsv"
[3] "/home/cke/Puram/scRNAlabels/"                                
[4] "/home/cke/Puram/markers/top20markers_de_cor_symbol.txt"      
[5] "test_fullpipeline_1_real_top20markers"                       
[6] "/home/cke/Real/TCGAResults/BayesPrism/"                      
[7] "/home/cke/Real/TCGAResults/"                                 
[8] "/home/cke/Puram/scRNAlabels/"                                


[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.


now with feature selection setup:  top20markers
Get mean and std exp!
Get common genes!  189
cell types:  10
bulk samples:  546
start BLADE!
all of 189 genes are used for optimization.
All samples are used during the optimization.
Initialization with Support vector regression


[Parallel(n_jobs=10)]: Done   5 tasks      | elapsed:    2.4s
[Parallel(n_jobs=10)]: Done  12 tasks      | elapsed:    3.8s
[Parallel(n_jobs=10)]: Done  21 tasks      | elapsed:    5.3s
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    7.6s
[Parallel(n_jobs=10)]: Done  41 tasks      | elapsed:   10.1s
[Parallel(n_jobs=10)]: Done  52 tasks      | elapsed:   12.2s
[Parallel(n_jobs=10)]: Done  65 tasks      | elapsed:   15.7s
[Parallel(n_jobs=10)]: Done  78 tasks      | elapsed:   18.6s
[Parallel(n_jobs=10)]: Done  93 tasks      | elapsed:   21.3s
[Parallel(n_jobs=10)]: Done 108 tasks      | elapsed:   24.2s
[Parallel(n_jobs=10)]: Done 125 tasks      | elapsed:   29.3s
[Parallel(n_jobs=10)]: Done 142 tasks      | elapsed:   32.9s
[Parallel(n_jobs=10)]: Done 161 tasks      | elapsed:   36.8s
[Parallel(n_jobs=10)]: Done 180 tasks      | elapsed:   40.5s
[Parallel(n_jobs=10)]: Done 201 tasks      | elapsed:   45.1s
[Parallel(n_jobs=10)]: Done 222 tasks      | elapsed:   50.3s
[Paralle

[1] "Gene symbols detected. Cleaning up genes based on gene symbols. Recommend to use EMSEMBLE IDs for more unique mapping."
A total of  8  genes from RB chrM chrX chrY  have been excluded 
A total of  0  lowly expressed genes have been excluded 
[1] "removing non-numeric genes..."
[1] "removing outlier genes..."


Creating Relative Abundance Matrix...
[Parallel(n_jobs=10)]: Done 401 tasks      | elapsed:  1.6min


Number of outlier genes filtered= 7 
[1] "aligning reference and mixture..."
[1] "run first sampling"
Start run... This may take a while 
R Version:  R version 4.1.3 (2022-03-10) 



snowfall 1.84-6.2 initialized (using snow 0.4-4): parallel execution on 10 CPUs.

[Parallel(n_jobs=10)]: Done 430 tasks      | elapsed:  1.7min
Creating Variance Matrix...
[Parallel(n_jobs=10)]: Done 461 tasks      | elapsed:  1.9min
Creating Library Size Matrix...
Used 189 common genes...
Used 10 cell types in deconvolution...
TCGA-BB-4224-01A has common genes 188 ...
TCGA-H7-7774-01A has common genes 185 ...
TCGA-CV-6943-01A has common genes 185 ...
TCGA-CN-5374-01A has common genes 187 ...
TCGA-CQ-6227-01A has common genes 187 ...
TCGA-CV-6959-01A has common genes 186 ...
TCGA-F7-A61V-01A has common genes 183 ...
TCGA-CV-7413-01A has common genes 185 ...
TCGA-CV-7247-01A has common genes 184 ...
TCGA-CR-5249-01A has common genes 186 ...
TCGA-CQ-5331-01A has common genes 186 ...
TCGA-BB-8601-01A has common genes 184 ...
TCGA-CV-A45W-01A has common genes 189 ...
TCGA-CQ-7063-01A has common genes 183 ...
TCGA-CN-5373-01A has common genes 185 ...
TCGA-CV-A45T-01A has common genes 186 ..

TCGA-CQ-5324-01A has common genes 182 ...
TCGA-CQ-5333-01A has common genes 185 ...
TCGA-CN-4737-01A has common genes 187 ...
TCGA-C9-A480-01A has common genes 188 ...
TCGA-D6-6824-01A has common genes 184 ...
TCGA-CN-A63T-01A has common genes 187 ...
TCGA-CV-7252-01A has common genes 183 ...
TCGA-CV-7255-11A has common genes 187 ...
TCGA-CV-6003-01A has common genes 184 ...
TCGA-F7-A50I-01A has common genes 184 ...
TCGA-CV-7407-01A has common genes 184 ...
TCGA-CN-5366-01A has common genes 185 ...
TCGA-CN-6011-01A has common genes 180 ...
TCGA-F7-A61S-01A has common genes 184 ...
TCGA-CV-7433-01A has common genes 184 ...
TCGA-CR-6480-01A has common genes 186 ...
TCGA-CV-7423-11A has common genes 186 ...
TCGA-CV-A45O-01A has common genes 181 ...
TCGA-CV-6933-01A has common genes 187 ...
TCGA-CV-A45V-01A has common genes 184 ...
TCGA-CN-A49C-01A has common genes 186 ...
TCGA-DQ-5630-01A has common genes 188 ...
TCGA-UP-A6WW-01A has common genes 188 ...
TCGA-CV-7406-11A has common genes 

TCGA-HD-8635-11A has common genes 182 ...
TCGA-P3-A5Q6-01A has common genes 185 ...
TCGA-F7-A620-01A has common genes 185 ...
TCGA-CN-A49A-01A has common genes 186 ...
TCGA-CV-5430-01A has common genes 186 ...
TCGA-F7-8489-01A has common genes 183 ...
TCGA-KU-A6H8-01A has common genes 186 ...
TCGA-CR-7370-01A has common genes 185 ...
TCGA-CN-4735-01A has common genes 183 ...
TCGA-CN-A6V3-01A has common genes 188 ...
TCGA-CN-5367-01A has common genes 187 ...
TCGA-CV-5431-01A has common genes 185 ...
TCGA-F7-A622-01A has common genes 184 ...
TCGA-UF-A7JA-01A has common genes 185 ...
TCGA-CV-A6JD-01A has common genes 185 ...
TCGA-CR-6481-01A has common genes 187 ...
TCGA-BA-A6DD-01A has common genes 183 ...
TCGA-WA-A7GZ-01A has common genes 187 ...
TCGA-TN-A7HJ-01A has common genes 188 ...
TCGA-UF-A7JC-01A has common genes 183 ...
TCGA-P3-A5Q5-01A has common genes 186 ...
TCGA-UF-A71A-06A has common genes 187 ...
TCGA-D6-6515-01A has common genes 185 ...
TCGA-BA-A4IG-01A has common genes 

  g_Exp = g_Exp_Beta(Nu, Omega, Beta, B0, Ngene, Ncell, Nsample)
  g_Exp = g_Exp_Beta(Nu, Omega, Beta, B0, Ngene, Ncell, Nsample)
  g_Exp = g_Exp_Beta(Nu, Omega, Beta, B0, Ngene, Ncell, Nsample)
  g_Exp = g_Exp_Beta(Nu, Omega, Beta, B0, Ngene, Ncell, Nsample)
  g_Exp = g_Exp_Beta(Nu, Omega, Beta, B0, Ngene, Ncell, Nsample)
  g_Exp = g_Exp_Beta(Nu, Omega, Beta, B0, Ngene, Ncell, Nsample)
  g_Exp = g_Exp_Beta(Nu, Omega, Beta, B0, Ngene, Ncell, Nsample)
  g_Exp = g_Exp_Beta(Nu, Omega, Beta, B0, Ngene, Ncell, Nsample)
  g_Exp = g_Exp_Beta(Nu, Omega, Beta, B0, Ngene, Ncell, Nsample)
  g_Exp = g_Exp_Beta(Nu, Omega, Beta, B0, Ngene, Ncell, Nsample)
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  ret

[1] "merge subtypes"
        Fibroblast tumor B cell myocyte other Macrophage Endothelial T cell
Min.         0.000 0.000  0.000   0.000 0.000      0.000       0.000  0.000
1st Qu.      0.034 0.620  0.000   0.000 0.000      0.004       0.008  0.001
Median       0.101 0.788  0.002   0.000 0.000      0.011       0.015  0.004
Mean         0.148 0.730  0.009   0.029 0.031      0.019       0.021  0.009
3rd Qu.      0.204 0.887  0.009   0.011 0.016      0.027       0.027  0.009
Max.         0.956 0.999  0.207   0.826 0.993      0.237       0.218  0.119
        Dendritic  Mast
Min.        0.000 0.000
1st Qu.     0.000 0.000
Median      0.001 0.000
Mean        0.004 0.000
3rd Qu.     0.003 0.000
Max.        0.089 0.004
[1] "pooling information across samples"
Start optimization... This may take a while 


snowfall 1.84-6.2 initialized (using snow 0.4-4): parallel execution on 10 CPUs.


Stopping cluster



[1] "run final sampling"
Start run... This may take a while 


snowfall 1.84-6.2 initialized (using snow 0.4-4): parallel execution on 10 CPUs.



  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.l

  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.l

  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.l

  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.l

  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
[Parallel(n_jobs=10)]: Done   5 tasks      | elapsed:  4.9min
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
Error in file(file, "rt") : cannot open the connection
Calls: read.table -> file
In file(file, "rt") :
  cannot open file '/home/cke/PseudoBulk/Results/BayesPrism/cellcategory_subtype.csv': No such file or directory
Execution halted
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
[Parallel(n_jobs=10)]: Done   5 tasks      | elapsed:  4.9min
  return -self.Nsample

[Parallel(n_jobs=10)]: Done   5 tasks      | elapsed:  5.1min
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
[Parallel(n_jobs=10)]: Done   5 tasks      | elapsed:  5.0min
[Parallel(n_jobs=10)]: Done  12 tasks      | elapsed:  5.1min
[Parallel(n_jobs=10)]: Done  12 tasks      | elapsed:  5.2min
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
[Parallel(n_jobs=10)]: Done  12 tasks      | elapsed:  5.2min
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:  5.2min
[Parallel(n_jobs=10)]: Done  12 tasks      | elapsed:  5.2min
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
[Parallel(n_jobs=10)]: Done  12 tasks      | elapsed:  5.2min
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsampl

  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
[Parallel(n_jobs=10)]: Done  65 tasks      | elapsed:  5.5min
[Parallel(n_jobs=10)]: Done   5 tasks      | elapsed:  5.5min
[Parallel(n_jobs=10)]: Done  41 tasks      | elapsed:  5.5min
[Parallel(n_jobs=10)]: Done  41 tasks      | elapsed:  5.6min
[Parallel(n_jobs=10)]: Done  41 tasks      | elapsed:  5.5min
[Parallel(n_jobs=10)]: Done  65 tasks      | elapsed:  5.5min
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
[Parallel(n_jobs=10)]: Done  21 tasks      | elapsed:  5.5min
[Parallel(n_jobs=10)]: Done  78 tasks      | elapsed:  5.6min
[Parallel(n_jobs=10)]: Done  78 tasks      | elapsed:  5.5min
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:  5.6min
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
Error in file(file, "rt") : cannot open the connection
Calls: read.table -> file
In file(file, "rt") :
 

[Parallel(n_jobs=10)]: Done  21 tasks      | elapsed:  5.8min
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:  5.7min
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
[Parallel(n_jobs=10)]: Done 108 tasks      | elapsed:  5.8min
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
[Parallel(n_jobs=10)]: Done  41 tasks      | elapsed:  5.8min
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:  5.9min
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
[Parallel(n_jobs=10)]: Done  78 tasks      | elapsed:  5.9min
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  retur

[Parallel(n_jobs=10)]: Done 142 tasks      | elapsed:  5.9min
[Parallel(n_jobs=10)]: Done  93 tasks      | elapsed:  6.0min
[Parallel(n_jobs=10)]: Done  52 tasks      | elapsed:  6.0min
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
[Parallel(n_jobs=10)]: Done  93 tasks      | elapsed:  6.0min
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
[Parallel(n_jobs=10)]: Done 142 tasks      | elapsed:  6.0min
[Parallel(n_jobs=10)]: Done  52 tasks      | elapsed:  5.9min
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
[Parallel(n_jobs=10)]: Done  65 tasks      | elapsed:  6.0min
[Parallel(n_jobs=10)]: Done  65 tasks      | elapsed:  6.0min
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
  return -self.Nsample*np.sum(np.log(Omega))
  return PX+PY+PF-QX-QF
[Parallel(n_jobs=10)]: Done  65 tasks      | elapsed:  6.1min
  return -self.Nsample*np.su

[Parallel(n_jobs=10)]: Done 160 out of 162 | elapsed:  6.3min remaining:    4.8s
[Parallel(n_jobs=10)]: Done 160 out of 162 | elapsed:  6.4min remaining:    4.8s
[Parallel(n_jobs=10)]: Done 162 out of 162 | elapsed:  6.4min finished
[Parallel(n_jobs=10)]: Done 162 out of 162 | elapsed:  6.3min finished
[Parallel(n_jobs=10)]: Done 162 out of 162 | elapsed:  6.4min finished
Error in file(file, "rt") : cannot open the connection
Calls: read.table -> file
In file(file, "rt") :
  cannot open file '/home/cke/PseudoBulk/Results/cellcategory_subtype.csv': No such file or directory
Execution halted
Error in file(file, "rt") : cannot open the connection
Calls: read.table -> file
In file(file, "rt") :
  cannot open file '/home/cke/PseudoBulk/Results/cellcategory_subtype.csv': No such file or directory
Execution halted


No feature filtering is done (fsel = 0)
Done optimization, elapsed time (min): 6.416964999834696
export to:  /home/cke/PseudoBulk/Results/BLADE/test_fullpipeline_1_pseudobulk_28_LOT_BLADEout_top20markers.pickle


Error in file(file, "rt") : cannot open the connection
Calls: read.table -> file
In file(file, "rt") :
  cannot open file '/home/cke/PseudoBulk/Results/cellcategory_subtype.csv': No such file or directory
Execution halted


No feature filtering is done (fsel = 0)
Done optimization, elapsed time (min): 6.514795935153961
export to:  /home/cke/PseudoBulk/Results/BLADE/test_fullpipeline_1_pseudobulk_17_LOT_BLADEout_top20markers.pickle


Error in file(file, "rt") : cannot open the connection
Calls: read.table -> file
In file(file, "rt") :
  cannot open file '/home/cke/PseudoBulk/Results/cellcategory_subtype.csv': No such file or directory
Execution halted
[Parallel(n_jobs=10)]: Done 160 out of 162 | elapsed:  6.5min remaining:    4.9s


No feature filtering is done (fsel = 0)
Done optimization, elapsed time (min): 6.523650479316712
export to:  /home/cke/PseudoBulk/Results/BLADE/test_fullpipeline_1_pseudobulk_26_LOT_BLADEout_top20markers.pickle
No feature filtering is done (fsel = 0)
Done optimization, elapsed time (min): 6.598040560881297
export to:  /home/cke/PseudoBulk/Results/BLADE/test_fullpipeline_1_pseudobulk_6_LOT_BLADEout_top20markers.pickle
No feature filtering is done (fsel = 0)
Done optimization, elapsed time (min): 6.602287614345551
export to:  /home/cke/PseudoBulk/Results/BLADE/test_fullpipeline_1_pseudobulk_18_LOT_BLADEout_top20markers.pickle
No feature filtering is done (fsel = 0)
Done optimization, elapsed time (min): 6.605280884106954
export to:  /home/cke/PseudoBulk/Results/BLADE/test_fullpipeline_1_pseudobulk_16_LOT_BLADEout_top20markers.pickle
No feature filtering is done (fsel = 0)
Done optimization, elapsed time (min): 6.520261077086131
export to:  /home/cke/PseudoBulk/Results/BLADE/test_fullpipe

[Parallel(n_jobs=10)]: Done 162 out of 162 | elapsed:  7.3min finished


No feature filtering is done (fsel = 0)
Done optimization, elapsed time (min): 7.382211204369863
export to:  /home/cke/PseudoBulk/Results/BLADE/test_fullpipeline_1_pseudobulk_22_LOT_BLADEout_top20markers.pickle
