# Trajectory inference for hematopoiesis in mouse

Reconstructing myeloid and erythroid differentiation for data of [Paul et al. (2015)](http://doi.org/10.1016/j.cell.2015.11.013).

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as pl
from matplotlib import rcParams
import scanpy as sc
import utils as ut
import glob
from sklearn.metrics import classification_report
from scipy.stats import pearsonr
from utils import de_score
import random
import seaborn as sns
import matplotlib.pyplot as plt


import warnings
warnings.filterwarnings('ignore')
from scipy.stats import ranksums,ttest_ind


In [None]:
sc.settings.verbosity = 0  # verbosity: errors (0), warnings (1), info (2), hints (3)
#sc.logging.print_versions()
sc.settings.set_figure_params(dpi=300, frameon=False, figsize=(3, 3), facecolor='white')  # low dpi (dots per inch) yields small inline figures

In [None]:
# List all files in the directoty
h5ad_list = glob.glob("D:/pyws/trainsource/saved/adata/review/*.h5ad")

In [None]:
h5ad_list[4].split("1214")[0].split("\\")[1]

In [None]:
h5ad_list

In [None]:
CELL_TYPE_KEY = {"GSE117872_HN137":"cell_color","GSE117872_HN120":"cell_color","GSE110894":"Sample name"}

In [None]:
f= h5ad_list[0]
adata = sc.read_h5ad(filename=f)
adata.X.shape

In [None]:
#h5ad_list = [h5ad_list[0]]

# Generated 3 figures comparing before, after tarnsfer, ground truth

And it will save the F1 score before and after the transfer learning

In [None]:
# Random score test
def ran_test_score(adata,n_iters=1000):
    ran_results1 = []
    ran_results2 = []

    s_score,s_pval = pearsonr(adata.obs["1_score"],adata.obs["Sensitive_score"])
    r_score,r_pval = pearsonr(adata.obs["0_score"],adata.obs["Resistant_score"])


    for i in range(0,n_iters):

        gl1 = random.sample(list(adata.var.index),50)
        adata=sc.tl.score_genes(adata, gene_list=gl1,score_name="l1_score",copy=True)

        rand_score1,rand_pval1 = pearsonr(adata.obs["l1_score"],adata.obs["Sensitive_score"])
        rand_score2,rand_pval2 = pearsonr(adata.obs["l1_score"],adata.obs["Resistant_score"])

        ran_results1.append(rand_score1)
        ran_results2.append(rand_score2)
        
        
    return adata,ran_results1, s_score,s_pval ,ran_results2,r_score,r_pval

In [None]:
from sklearn.metrics import (auc, average_precision_score,
                             classification_report, mean_squared_error,
                             precision_recall_curve, r2_score, roc_auc_score)

names = []
score = []
model = []

for f in h5ad_list:
    
    
    adata = sc.read_h5ad(filename=f)
    pretrain_label = adata.obs['sens_label_pret']
    sens_label = adata.obs['sens_label']
    
        
    print(adata.obs.columns)

    grouth_truth = adata.obs['sensitive']
    sens_pb_pret = adata.obs['sens_preds_pret']
    sens_label_pret = adata.obs['sens_label_pret']
    sens_pb = adata.obs['sens_preds']

    name = f.split("1214")[0].split("\\")[1]
    
    if(name=="GSE110894"):
        adata=adata[(adata.obs["Sample name"] != "EMPTY") & \
                    (adata.obs["Sample name"] != "EMPTY ") ,: ]
    
    
    
    report_dict_pret = classification_report(grouth_truth, pretrain_label, output_dict=True)
    classification_report_pret_df = pd.DataFrame(report_dict_pret).T
    f1score_pret = report_dict_pret['weighted avg']['f1-score']
    ap_pret = average_precision_score(grouth_truth, sens_pb_pret)
    auroc_pret = roc_auc_score(grouth_truth, sens_label_pret)

    
    report_dict = classification_report(grouth_truth, sens_label, output_dict=True)
    classification_report_df = pd.DataFrame(report_dict).T
    f1score = report_dict['weighted avg']['f1-score']
    ap = average_precision_score(grouth_truth, sens_pb)
    auroc = roc_auc_score(grouth_truth, sens_label)
    
    names.append(name)
    score.append(f1score_pret)
    model.append("before")
    
    names.append(name)
    score.append(f1score)
    model.append("transfer")

    result = pd.DataFrame({"f1":[f1score_pret,f1score],
                          "ap":[ap_pret,ap],
                          "auroc":[auroc_pret,auroc]},index=["pretrain","transfer"])
    
    print()
    # Save the figure
    sc.pl.umap(adata,color=['sens_preds_pret','sens_preds','sensitive'],save="report_compare"+name + '.pdf')
    # Save the f1 score before and after
    result.to_csv("saved/results/report_compare"+name + '.csv')
    
    # Cal pred senstivie score
    adata = ut.de_score(adata,clustername='sens_label')
    
    adata,s_ran, s_score,s_pval ,r_ran,r_score,r_pval = ran_test_score(adata,1000)
    
    plt.hist(s_ran)
    plt.xlim(xmin=-1, xmax = 1)
    plt.axvline(s_score, color='r', linestyle='dashed', linewidth=1)
    plt.axvline(np.mean(s_ran), color='k', linestyle='dashed', linewidth=1)
    min_ylim, max_ylim = plt.ylim()

    plt.text(-0.95, max_ylim*0.8, 'p < 0.001', color='k')
    plt.text(-0.95, max_ylim*0.9, 'r: {:.2f}'.format(s_score), color='k')

    plt.savefig("saved/figures/random_sensitive_gene"+name + '.svg')
    plt.clf()
    
    
    plt.hist(r_ran)
    plt.xlim(xmin=-1, xmax = 1)
    plt.axvline(r_score, color='r', linestyle='dashed', linewidth=1)
    plt.axvline(np.mean(s_ran), color='k', linestyle='dashed', linewidth=1)
    min_ylim, max_ylim = plt.ylim()

    plt.text(-0.95, max_ylim*0.8, 'p < 0.001', color='k')
    plt.text(-0.95, max_ylim*0.9, 'r: {:.2f}'.format(r_score), color='k')
    plt.savefig("saved/figures/random_resistant_gene"+name + '.svg')
    plt.clf()

In [None]:
result_compare = pd.DataFrame({"data":names,
                      "transfer":model,
                      "score":score})

In [None]:
adata.obs

# Plot the umap highlighting the wrong predictions

In [None]:
idx = adata.obs["sensitive"] != adata.obs["sens_label"]

In [None]:
adata.obs['wrong'] = pd.NA

In [None]:
adata.obs.loc[idx,'wrong'] = "Wrong"
adata.obs['wrong'] = adata.obs.wrong.astype('category')

In [None]:
#sc.pl.umap(adata,color=['ABCC2', 'BIRC3', 'CCND1', 'CFLAR', 'CHEK2', 'ERCC1', 'MT2A', 'POLB', 'GCLC','GSTT2','TP53','MDM2','RAC1'],size=20)

In [None]:
sc.pl.umap(adata,color="wrong",size=20)

In [None]:
df = adata.obs[[CELL_TYPE_KEY[name],"wrong"]]\
.groupby([CELL_TYPE_KEY[name]])\
.aggregate(['count','size'])\
.reset_index()
df['data'] = name
df

In [None]:
result_compare

# Barplot of score before and after the transfer

In [None]:

sc.settings.set_figure_params(dpi=800, frameon=False, figsize=(5, 3), facecolor='white')  # low dpi (dots per inch) yields small inline figures
g = sns.barplot(x="data", y="score",hue="transfer",palette="Greens_r",
                data=result_compare)
plt.legend().remove()
sns.despine(bottom = True, left = True)

plt.setp(g.get_xticklabels(), rotation=-45)

# Figure 2 A panel

In [None]:
adata.obs["pred_binary"] = adata.obs["sens_label"]
adata.obs.sens_label = adata.obs.sens_label.cat.rename_categories({1:"Sensitive", 0:"Resistant"})

In [None]:
#adata.obs.sensitivity = adata.obs.sensitivity.cat.rename_categories({"Sensitive":"Response", "Resistant":"Resistant"})

In [None]:
adata.obs=adata.obs.rename(columns={"sensitivity": "Ground Truth", "sens_label": "Prediction"})

In [None]:
sc.settings.set_figure_params(dpi=250, frameon=False, figsize=(3, 3), facecolor='white')  # low dpi (dots per inch) yields small inline figures
ax = sc.pl.umap(adata,color=['Ground Truth',"Prediction"],size=40,show=False,wspace=0.1)
ax[0].legend().remove()
ax[1].legend(loc ='lower center',bbox_to_anchor=(-0.2, 0),frameon=False) 
plt.savefig("saved/figures/figure2A_1"+name + ".pdf")

In [None]:
adata.obs["Sample name"] = adata.obs["Sample name"]

In [None]:
ax = sc.pl.umap(adata,color=["rest_preds"],size=40,palette="Paired",show=False)
plt.savefig("saved/figures/figure2A_2"+name + ".pdf")

In [None]:
adata.obs["Sample name"]

In [None]:
adata.obs["Sample name"] =adata.obs["Sample name"].cat.rename_categories({'101 CELL CONTROL':"control", \
                                                "MA9 IBET RESISTANT CELLS":"IBET Resistant",\
                                              'MA9 IBET RESISTANT CELLS- WITHDRAWAL':"IBET Resistant(withdraw)",\
                                              'MA9 PARENTALS DMSO':"IBET DMSO",\
                                              "MA9 PARENTALS IBET 400NMOL":"IBET 400NMOL"
                                               })

In [None]:
adata = adata[adata.obs["Sample name"]!="control"]

In [None]:
ax = sc.pl.umap(adata,color=[CELL_TYPE_KEY[name]],size=40,palette="Paired",show=False)
ax.legend(loc ='lower center',bbox_to_anchor=(0.5, -0.55),frameon=False) 
plt.savefig("saved/figures/figure2A_3"+name + ".pdf")

In [None]:
ax = sc.pl.umap(adata,color=['Ground Truth',"Prediction",CELL_TYPE_KEY[name]],size=25,palette="Paired",show=False)
ax[0].legend(loc ='lower center',bbox_to_anchor=(0.5, -0.25)) 
ax[1].legend(loc ='lower center',bbox_to_anchor=(0.5, -0.25)) 
ax[2].legend(loc ='lower center',bbox_to_anchor=(0.5, -0.5)) 
plt.savefig("figure2A_compare"+name + ".pdf")
plt.close()

In [None]:
adata.obs.columns

In [None]:
adata = ut.de_score(adata,clustername='pred_binary')

In [None]:
adata.obs

In [None]:
sens_score,s_pval = pearsonr(adata.obs["1_score"],adata.obs["Sensitive_score"])
resistant_score,r_pval = pearsonr(adata.obs["0_score"],adata.obs["Resistant_score"])

In [None]:
ax = sc.pl.umap(adata,color=["1_score","Sensitive_score"],size=40,palette="Set2",show=False)
ax[0].legend(loc ='lower center',bbox_to_anchor=(0.5, -0.55),frameon=False) 
ax[1].legend(loc ='lower center',bbox_to_anchor=(0.5, -0.55),frameon=False) 
plt.savefig("saved/figures/figure2A_4"+name + ".pdf")
plt.clf()

In [None]:
ax = sc.pl.umap(adata,color=["0_score","Resistant_score"],size=40,palette="flare",show=False)
ax[0].legend(loc ='lower center',bbox_to_anchor=(0.5, -0.55),frameon=False) 
ax[1].legend(loc ='lower center',bbox_to_anchor=(0.5, -0.55),frameon=False) 
plt.savefig("saved/figures/figure2A_5"+name + ".pdf")
plt.clf()

In [None]:
# import random

# import warnings
# warnings.filterwarnings('ignore')


# ran_results1 = []
# ran_results2 = []

# s_results = []
# r_results = []

# sc.settings.verbosity = 0

# for i in range(0,1000):

#     gl1 = random.sample(list(adata.var.index),50)
#     gl2 = random.sample(list(adata.var.index),50)
#     cl1 = random.sample(list(adata.obs.index),200)

#     cdata = adata[cl1,]

#     cdata=sc.tl.score_genes(cdata, gene_list=gl1,score_name="l1_score",copy=True)
#     #cdata=sc.tl.score_genes(cdata, gene_list=gl2,score_name="l2_score",copy=True)

#     rand_score1,rand_pval1 = pearsonr(cdata.obs["l1_score"],cdata.obs["Sensitive_score"])
#     rand_score2,rand_pval2 = pearsonr(cdata.obs["l1_score"],cdata.obs["Resistant_score"])

#     #sens_score,s_pval = pearsonr(adata.obs["l1_score"],adata.obs["l1_score"])
#     ran_results1.append(rand_score1)
#     ran_results2.append(rand_score2)

#     s_score,s_pval = pearsonr(cdata.obs["1_score"],cdata.obs["Sensitive_score"])
#     s_results.append(s_score)
    
#     r_score,r_pval = pearsonr(cdata.obs["0_score"],cdata.obs["Resistant_score"])
#     r_results.append(r_score)

In [None]:

def ran_test_score(adata,n_iters=1000):
    ran_results1 = []
    ran_results2 = []

    s_score,s_pval = pearsonr(adata.obs["1_score"],adata.obs["Sensitive_score"])
    r_score,r_pval = pearsonr(adata.obs["0_score"],adata.obs["Resistant_score"])


    for i in range(0,n_iters):

        gl1 = random.sample(list(adata.var.index),50)
        adata=sc.tl.score_genes(adata, gene_list=gl1,score_name="l1_score",copy=True)

        rand_score1,rand_pval1 = pearsonr(adata.obs["l1_score"],adata.obs["Sensitive_score"])
        rand_score2,rand_pval2 = pearsonr(adata.obs["l1_score"],adata.obs["Resistant_score"])

        ran_results1.append(rand_score1)
        ran_results2.append(rand_score2)
        
        
    return adata,ran_results1, s_score,s_pval ,ran_results2,r_score,r_pval

In [None]:
adata,s_ran, s_score,s_pval ,r_ran,r_score,r_pval = ran_test_score(adata,10)

In [None]:
r_ran

In [None]:
w1,p1 = ranksums(s_results, ran_results1,alternative ="greater")
w2,p2 = ranksums(r_results, ran_results2,alternative ="greater")

In [None]:
np.max(ran_results1)

In [None]:
npr_ran

In [None]:
plt.hist(r_ran)
plt.xlim(xmin=-1, xmax = 1)
plt.axvline(r_score, color='r', linestyle='dashed', linewidth=1)
plt.axvline(np.mean(r_ran), color='k', linestyle='dashed', linewidth=1)

min_ylim, max_ylim = plt.ylim()
#plt.text(-1, max_ylim*0.7, 'Mean: {:.2f}'.format(np.mean(r_ran)))
plt.text(-0.95, max_ylim*0.8, 'p < 0.001', color='k')
plt.text(-0.95, max_ylim*0.9, 'r: {:.2f}'.format(r_score), color='k')
plt.savefig("saved/figures/random_resistant_gene"+name + '.svg')
plt.clf()

In [None]:
plt.hist(s_ran)
plt.xlim(xmin=-1, xmax = 1)
plt.axvline(s_score, color='r', linestyle='dashed', linewidth=1)
plt.axvline(np.mean(s_ran), color='k', linestyle='dashed', linewidth=1)

min_ylim, max_ylim = plt.ylim()
#plt.text(-1, max_ylim*0.7, 'Mean: {:.2f}'.format(np.mean(r_ran)))
plt.text(-0.95, max_ylim*0.8, 'p < 0.001', color='k')
plt.text(-0.95, max_ylim*0.9, 'r: {:.2f}'.format(s_score), color='k')
plt.savefig("saved/figures/random_senstive_gene"+name + '.svg')
plt.clf()

In [None]:
ttest_ind(s_results, ran_results1,alternative ="greater")

In [None]:
np.mean(s_results)

In [None]:
np.mean(r_results)

In [None]:
w1

In [None]:
r_results

In [None]:
r_pval