In [1]:
# Libraries
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import deconomix as dcx
from scipy.stats import spearmanr, pearsonr
from eval_functions import create_composition_overview, create_performance_overview

In [2]:
# Ct label aggregation
ct_aggregation = pd.read_csv('../Data/DISCO/cell_type_aggregation.csv')
ct_major = dict(zip(ct_aggregation['Cell type (DISCO)'], ct_aggregation['Cell type (major)']))
ct_minor = dict(zip(ct_aggregation['Cell type (DISCO)'], ct_aggregation['Cell type (minor)']))
ct_aggregation

Unnamed: 0,Cell type (DISCO),Cell type (minor),Cell type (major)
0,Arterial EC,Arterial EC,Endothelial cell
1,B cell,B cell,B cell
2,Breast basal cell,Healthy Epithelial,Healthy Epithelial
3,Breast cancer specific luminal cell,Cancer Epithelial,Cancer Epithelial
4,Breast cancer specific proliferation luminal cell,Cancer Epithelial,Cancer Epithelial
5,CD4 T,CD4 T cell,CD4 T cell
6,CFD fibroblast,Fibroblast,Fibroblast
7,CXCL1/2/3 fibroblast,Fibroblast,Fibroblast
8,CXCL13 exhausted CD8 T,CD8 T cell,CD8 T cell
9,Capillary EC,Capillary EC,Endothelial cell


In [3]:
# Data = Disco Cancer
data = pd.read_pickle('../Data/Preprocessed/disco_cancer_norm.pkl')
# choose minor cts
data.columns = data.columns.map(ct_minor)
data = data.rename(columns={'Cancer Epithelial': 'hidden'})

In [4]:
# Train/Test split
n_obs = data.shape[1]
np.random.seed(42)
train_test_split = np.random.choice([0, 1], size=n_obs, p=[0.5, 0.5])

data_train = data.loc[:,train_test_split==0].drop('hidden', axis=1)
data_test = data.loc[:,train_test_split==1]

In [5]:
table_S1 = create_composition_overview(data_train, data_test)
table_S1.to_latex(buf="../Outputs/Table_S1.tex",
                  index=False,
                  na_rep='-',
                  float_format='%.3f',
                  column_format='l|c|c|c|c')
table_S1

Unnamed: 0,cell type,train number,train percentage,test number,test percentage
0,Arterial EC,335.0,0.555,346,0.351
1,B cell,2524.0,4.184,2505,2.538
2,CD4 T cell,13376.0,22.175,13382,13.559
3,CD8 T cell,10012.0,16.598,9971,10.103
4,Capillary EC,977.0,1.62,1051,1.065
5,Dendritic cell,1679.0,2.783,1571,1.592
6,Fibroblast,8023.0,13.301,7990,8.095
7,Granulocyte,449.0,0.744,439,0.445
8,Healthy Epithelial,5049.0,8.37,5171,5.239
9,IgA plasma,568.0,0.942,570,0.578


In [6]:
table_S2 = create_performance_overview(data_train, data_test)
table_S2.to_latex(buf="../Outputs/Table_S2.tex",
                  index=False,
                  na_rep='-',
                  float_format='%.3f',
                  column_format='l|c|c|c|c|c')
table_S2

  0%|          | 0/1000 [00:00<?, ?it/s]

  r_val, _ = spearmanr(C_true.loc[celltype], C_est.loc[celltype])


  0%|          | 0/200 [00:00<?, ?it/s]

  r_val, _ = spearmanr(C_true.loc[celltype], C_est.loc[celltype])


  0%|          | 0/200 [00:00<?, ?it/s]

  r_val, _ = spearmanr(C_true.loc[celltype], C_est.loc[celltype])


  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  r_val, _ = spearmanr(C_true.loc[celltype], C_est.loc[celltype])


  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  r_val, _ = spearmanr(C_true.loc[celltype], C_est.loc[celltype])


  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

Unnamed: 0,cell type,Train Naive,Train DTD,Test Naive,Test DTD,Test ADTD
0,Arterial EC,0.202,0.675,0.044 ± 0.027,0.454 ± 0.036,0.535 ± 0.032
1,B cell,0.282,0.849,0.184 ± 0.04,0.805 ± 0.014,0.805 ± 0.016
2,CD4 T cell,0.47,0.844,0.414 ± 0.014,0.816 ± 0.012,0.776 ± 0.014
3,CD8 T cell,0.409,0.759,0.317 ± 0.021,0.686 ± 0.024,0.687 ± 0.024
4,Capillary EC,0.257,0.709,0.171 ± 0.027,0.603 ± 0.018,0.643 ± 0.018
5,Dendritic cell,0.319,0.742,0.038 ± 0.031,0.558 ± 0.016,0.62 ± 0.017
6,Fibroblast,0.668,0.943,0.662 ± 0.017,0.897 ± 0.005,0.903 ± 0.005
7,Granulocyte,0.54,0.743,0.463 ± 0.041,0.587 ± 0.032,0.626 ± 0.033
8,Healthy Epithelial,0.725,0.939,0.355 ± 0.031,0.347 ± 0.029,0.483 ± 0.028
9,IgA plasma,0.777,0.88,0.788 ± 0.014,0.835 ± 0.013,0.828 ± 0.012
