In [1]:
import os
import pandas as pd
from IPython.display import Image
from deside.utility import check_dir
from deside.decon_cf import DeSide
from deside.plot import plot_predicted_result

### Using pre-trained model

In [2]:
# bulk gene expression profiles (GEPs) in TPM formart
bulk_tpm_file_path = './datasets/TCGA/tpm/LUAD/LUAD_TPM.csv'
bulk_tpm = pd.read_csv(bulk_tpm_file_path, index_col=0)
print(bulk_tpm.shape)
bulk_tpm.head(2)

(19712, 515)


Unnamed: 0_level_0,TCGA-55-8508-01A,TCGA-67-3771-01A,TCGA-55-A4DG-01A,TCGA-91-7771-01A,TCGA-91-6849-01A,TCGA-64-5781-01A,TCGA-44-6146-01B,TCGA-97-7552-01A,TCGA-80-5608-01A,TCGA-91-6829-01A,...,TCGA-55-A4DF-01A,TCGA-67-3773-01A,TCGA-55-7573-01A,TCGA-50-5068-01A,TCGA-49-AARN-01A,TCGA-78-7150-01A,TCGA-MP-A4TA-01A,TCGA-55-7907-01A,TCGA-55-5899-01A,TCGA-55-7574-01A
gene_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
TRIM71,0.015,0.041,0.491,0.155,0.016,0.011,1.366,1.079,0.2,0.483,...,0.094,0.0,2.909,0.024,0.155,0.009,0.088,0.053,0.011,0.688
CCR4,2.516,5.237,8.719,12.704,7.546,1.258,12.888,11.499,0.893,1.354,...,6.534,7.799,8.997,7.397,1.229,0.884,2.706,5.959,4.158,22.56


In [3]:
# create output directory
result_dir = './results/E1'
y_pred_file_path = os.path.join(result_dir, 'y_pred.csv')
check_dir(result_dir)

In [4]:
# read pre-trained DeSide model
model_dir = './DeSide_model/'
deside_model = DeSide(model_dir=model_dir)

# predict by pre-trained model
# - transpose=True, if the bulk_tpm_file is provided as genes by samples (rows by columns)
# - we used scaling_by_constant in the manuscript, Scaden used scaling_by_sample
deside_model.predict(input_file=bulk_tpm_file_path, output_file_path=y_pred_file_path, 
                     exp_type='TPM', transpose=True,
                     scaling_by_sample=False, scaling_by_constant=True)

   Start to predict cell fractions by pre-trained model...
6168 common genes will be used, 13544 genes will be removed.
   > 6168 genes included in pre-trained model and will be used for prediction.
   The shape of X is: (515, 6168), (n_sample, n_gene)


2023-05-11 17:22:33.911683: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


   Pre-trained model loaded from ./DeSide_model/model_DeSide.h5.
   Model prediction done.


In [5]:
y_pred = pd.read_csv(y_pred_file_path, index_col=0)
print(y_pred.shape)
y_pred.head(2)

(515, 12)


Unnamed: 0,B Cells,CD4 T,CD8 T,DC,Endothelial Cells,Fibroblasts,Macrophages,Mast Cells,NK,Neutrophils,1-others,Cancer Cells
TCGA-55-8508-01A,0.109,0.022,0.005,0.007,0.058,0.113,0.073,0.002,0.01,0.013,0.588,0.588
TCGA-67-3771-01A,0.044,0.02,0.018,0.005,0.032,0.105,0.216,0.0,0.009,0.001,0.552,0.552


In [6]:
# plot predicted cell proportions
plot_predicted_result(cell_frac_result_fp=y_pred_file_path, bulk_exp_fp=bulk_tpm_file_path,
                      cancer_type='LUAD', model_name='DeSide', result_dir=result_dir, font_scale=2)

   Using previous figure, ./results/E1/CD8A_vs_predicted_CD8 T_proportion.png
   Cell types:  B Cells, CD4 T, CD8 T, Cancer Cells, DC, Endothelial Cells, Fibroblasts, Macrophages, Mast Cells, NK, Neutrophils


In [7]:
Image(url= "./results/E1/pred_cell_prop_before_decon.png", width=1200)