In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import scanpy as sc
import anndata
import bbknn
import os
from scipy import sparse
import matplotlib.pyplot as plt
# from scanpy_base_moudle_update2 import *
# import scrublet as scr
import datetime
import harmonypy as hm

sc.settings.verbosity = 3
#sc.logging.print_versions()
# 设置图片的分辨率以及其他样式
sc.settings.set_figure_params(dpi=150, figsize = (4, 3), fontsize=12)

import matplotlib.font_manager
flist = matplotlib.font_manager.get_fontconfig_fonts()
names = [matplotlib.font_manager.FontProperties(fname=fname).get_name() for fname in flist]
print(names)

params={
        #'font.style':'italic',
        'font.weight':'normal',    #or 'blod'
        }
plt.rcParams.update(params)

plt.rcParams['font.family']='Arial'

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Stromal.h5ad')
adata

In [None]:
# adata = sc.AnnData(X=adata.raw.X, var=adata.raw.var, obs = adata.obs)

# Preprocessing

In [None]:
# 删除线粒体基因表达比例过高的细胞
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],jitter=0.4, multi_panel=True)

In [None]:
adata = adata[adata.obs.pct_counts_mt < 35, :]
adata

In [None]:
## 进一步去除双胞
sc.settings.set_figure_params(dpi=150, figsize = (4, 3), fontsize=7)

CD45_genes_index = adata.var_names.isin(['PTPRC'])
CD3D_genes_index = adata.var_names.isin(['CD3D'])
CD3E_genes_index = adata.var_names.isin(['CD3E'])
CD79A_genes_index = adata.var_names.isin(['CD79A'])
CD79B_genes_index = adata.var_names.isin(['CD79B'])
EPCAM_genes_index = adata.var_names.isin(['EPCAM'])

# 计算区室评分
adata.obs['count_CD45'] = np.sum(adata[:, CD45_genes_index].X, axis=1)
adata.obs['count_CD3D'] = np.sum(adata[:, CD3D_genes_index].X, axis=1)
adata.obs['count_CD3E'] = np.sum(adata[:, CD3E_genes_index].X, axis=1)
adata.obs['count_CD79A'] = np.sum(adata[:, CD79A_genes_index].X, axis=1)
adata.obs['count_CD79B'] = np.sum(adata[:, CD79B_genes_index].X, axis=1)
adata.obs['count_EPCAM'] = np.sum(adata[:, EPCAM_genes_index].X, axis=1)

# 获取每个细胞的得分
compartments_values_list_except = adata.obs.loc[:,
                                         ['count_CD45','count_CD3D','count_CD3E','count_CD79A','count_CD79B','count_EPCAM']].values.tolist()
cell_compartments_list = []

for i in compartments_values_list_except:

    if ((i[0]<0.1) and (i[1]<0.1) and (i[2]<0.1)and (i[3]<0.1) and (i[4]<0.1) and (i[5]<0.1)):
        j = 'Single'   
    else:
        j = 'Doublets'
        
    cell_compartments_list.append(j)
    
adata.obs['Compartments'] = cell_compartments_list
#sc.pl.umap(adata, color = ['Compartments'])
adata_single_index = adata.obs.loc[adata.obs["Compartments"].isin(['Single']), :].index
adata = adata[adata_single_index, :]
adata

In [None]:
"""
def hvg_regress_scale(adata):
    # 注意，HVG要求输入的矩阵必须是对数化处理的
    sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
    #sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5,batch_key='pan_development_state')
    #print("Highly variable genes intersection: %d"%sum(adata.var.highly_variable_intersection))
    #print("Number of batches where gene is variable:")
    #print(adata.var.highly_variable_nbatches.value_counts())
    adata = adata[:, adata.var.highly_variable]
    print(adata)
    
    sc.pp.scale(adata, zero_center=False)

    return adata

adata.raw = adata
adata = hvg_regress_scale(adata)
"""

# Annotation

## BBKNN

In [None]:
def progress_pca_bbknn_umap_tsne(adata, n_pcs=20, batch_key='batch_name'):
    ## 主成分分析降维
    # sc.tl.pca(adata, svd_solver='arpack')
    # 在PCA坐标中绘制散点图
    # sc.pl.pca(adata)
    # 可视化每个PC对数据总方差的贡献, 这为我们提供了有关为计算单元的邻域关系应考虑的PC数量的信息sc.tl.tsne()
    #sc.pl.pca_variance_ratio(adata, log=True)

    # 使用bbknn算法处理批次效应(代替了neighbors)
    # bbknn要求数据已经过标准化和主成分分析

    # adata.obs['bbknn_batch'] = [str(i)+'-'+str(j) for i,j in zip(adata.obs['dataset'], adata.obs['batch_name'])]
    sc.external.pp.bbknn(adata, batch_key=batch_key, n_pcs=n_pcs, neighbors_within_batch=3)
    
    # 计算UMAP
    sc.tl.umap(adata)
    # sc.tl.tsne(adata, n_jobs=6)

    return adata

adata = progress_pca_bbknn_umap_tsne(adata)

In [None]:
sc.pl.umap(adata, color=['tissue'], frameon=False, title='')

In [None]:
print(adata.obs['tissue'].cat.categories)

old_colors = np.array(adata.uns['tissue_colors'])
new_colors = old_colors
new_colors

In [None]:
# inferior turbinate
new_colors[[0]] = '#ff0000'
# middle turbinate
new_colors[[1]] = '#0077b2'
# polyp
new_colors[[2]] = '#60b55c'

adata.uns['tissue_colors'] = new_colors

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['tissue'], frameon=False, title='')

In [None]:
sc.pl.umap(adata, color=['majority_voting'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
def add_leiden(adata):

    print("Performing clustering with a resolution of 0.5")
    sc.tl.leiden(adata, resolution=0.5)
    adata.obs['leiden-all-0.5'] = adata.obs['leiden']

    print("Performing clustering with a resolution of 1")
    sc.tl.leiden(adata, resolution=1)
    adata.obs['leiden-all-1'] = adata.obs['leiden']

    return adata

adata = add_leiden(adata)

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['leiden-all-1'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='majority_voting')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='majority_voting',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=4,key='cosg')

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='leiden-all-1')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='leiden-all-1',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=4,key='cosg')

In [None]:
sc.pl.umap(adata, color=['PTGDS','CCL19','CCL21'], add_outline=False, frameon=False)

In [None]:
marker_S_dict = { 
                     'Glia': ['CRYAB','S100B','ALDH1A1'],
                     'Arterial endo': ['GJA4','HEY1','HEY2'], # 动脉
                     'Venous endo': ['ACKR1','VWF','ADGRG6'], # 静脉
                     'Capillary endo': ['RGCC','VWA1','CA4'], # 毛细血管
                     'Lymphatic endo': ['TFF3','LYVE1','CCL21'], # 淋巴管
                     'Pericytes': ['MYL9','ACTA2','TINAGL1','NOTCH3'],
                     'Smooth muscle cells': ['ACTG2','CNN1','NPNT'],
                     'Follicular dendritic cells': ['FDCSP','CXCL13','CLU'],
                     'T reticular cells': ['PTGDS','CCL19','CCL21'],
    
                     'Immature RSPO3-fibro': ['APOE','CTSC','CFD'],
                     'RSPO3-fibro': ['APOE','CTSC','CFD'],
                      # 在胚胎发育的文章中定义为了成纤维前体，因其低表达成熟成纤维的标志物，且在胚胎中比例高，且表达HMGA2，有报道称其参与胚胎成纤维形成.
                      # 在UC和CD的文章中均称其为炎症相关或活化成纤维，因其在CD或UC中均比例升高，且表达趋化因子和MMP家族mRNA。
                     'Activated RSPO3-Fibro': ['SOD2','CHI3L1','HMGA1'],
    
                     'RSPO3+Fibro': ['RSPO3','DCN','OGN'],
                     'Activated RSPO3+Fibro': ['EGR1','SOD2','HMGA1'],
                     'WNT5B+Fibro': ['WNT5B','F3','POSTN'],
                     'Cycling fibro': ['MKI67','STMN1']
                     }

sc.settings.set_figure_params(dpi=150, figsize = (4, 3), fontsize=18)
mp = sc.pl.dotplot(adata, 
              marker_S_dict, 
              'leiden-all-1', 
              dendrogram=False, 
              #figsize=(3, 4),
              use_raw=True,
              cmap = 'Reds',
              #var_group_rotation=45,
              #swap_axes=True,
              standard_scale='var',
              colorbar_title=None,
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.style(grid=True,cmap = 'Reds').show() # RdYlBu_r

## Harmony

In [None]:
def progress_pca_harmony_umap(adata):

    # Harmony处理批次效应
    harmony_out = hm.run_harmony(adata.obsm['X_pca'], adata.obs, vars_use = ['batch_name'],max_iter_harmony=20)
    adata.obsm['X_pca_harmony'] = harmony_out.Z_corr.T
    sc.pp.neighbors(adata, n_neighbors=10, use_rep='X_pca_harmony', n_pcs=20)
    
    # 计算UMAP
    sc.tl.umap(adata)
    # sc.tl.tsne(adata, n_jobs=6)

    return adata

adata = progress_pca_harmony_umap(adata)

In [None]:
def add_leiden(adata):

    print("Performing clustering with a resolution of 0.5")
    sc.tl.leiden(adata, resolution=0.5)
    adata.obs['leiden-all-0.5'] = adata.obs['leiden']

    print("Performing clustering with a resolution of 1")
    sc.tl.leiden(adata, resolution=1)
    adata.obs['leiden-all-1'] = adata.obs['leiden']

    return adata

adata = add_leiden(adata)

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['tissue'], frameon=False, title='')
sc.pl.umap(adata, color=['majority_voting'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')
sc.pl.umap(adata, color=['leiden-all-1'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
sc.pl.umap(adata, color=['PTGDS','CCL19','CCL21','PIEZO2'], add_outline=False, frameon=False)
sc.pl.umap(adata, color=['PI16','PIEZO1','MKI67','CXCL8'], add_outline=False, frameon=False)
sc.pl.umap(adata, color=['SOD2','CHI3L1','HMGA1','CCL21'], add_outline=False, frameon=False)

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='leiden-all-1')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='leiden-all-1',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=4,key='cosg')

In [None]:
pd.DataFrame(adata.uns['cosg']['names'])['14'].head(30)

In [None]:
marker_S_dict = { 
                     'Glia': ['CRYAB','S100B','ALDH1A1'],
                     'Arterial endo': ['GJA4','HEY1','HEY2'], # 动脉
                     'Venous endo': ['ACKR1','VWF','ADGRG6'], # 静脉
                     'Capillary endo': ['RGCC','VWA1','CA4'], # 毛细血管
                     'Lymphatic endo': ['TFF3','LYVE1','CCL21'], # 淋巴管
                     'Pericytes': ['MYL9','ACTA2','TINAGL1','NOTCH3'],
                     'Smooth muscle cells': ['ACTG2','CNN1','NPNT'],
                     'Follicular dendritic cells': ['FDCSP','CXCL13','CLU'],
                     'T reticular cells': ['PTGDS','CCL19','CCL21'],
    
                     'Immature RSPO3-fibro': ['APOE','CTSC','CFD'],
                     'RSPO3-fibro': ['APOE','CTSC','CFD'],
                      # 在胚胎发育的文章中定义为了成纤维前体，因其低表达成熟成纤维的标志物，且在胚胎中比例高，且表达HMGA2，有报道称其参与胚胎成纤维形成.
                      # 在UC和CD的文章中均称其为炎症相关或活化成纤维，因其在CD或UC中均比例升高，且表达趋化因子和MMP家族mRNA。
                     'Activated RSPO3-Fibro': ['SOD2','CHI3L1','HMGA1'],
    
                     'RSPO3+Fibro': ['RSPO3','DCN','OGN'],
                     'Activated RSPO3+Fibro': ['EGR1','SOD2','HMGA1'],
                     'WNT5B+Fibro': ['WNT5B','F3','POSTN'],
                     'Cycling fibro': ['MKI67','STMN1']
                     }

sc.settings.set_figure_params(dpi=150, figsize = (4, 3), fontsize=18)
mp = sc.pl.dotplot(adata, 
              marker_S_dict, 
              'leiden-all-1', 
              dendrogram=False, 
              #figsize=(3, 4),
              use_raw=True,
              cmap = 'Reds',
              #var_group_rotation=45,
              #swap_axes=True,
              standard_scale='var',
              colorbar_title=None,
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.style(grid=True,cmap = 'Reds').show() # RdYlBu_r

In [None]:
print("Performing clustering with a resolution of 1.5")
sc.tl.leiden(adata, resolution=1.5)
adata.obs['leiden-all-1.5'] = adata.obs['leiden']

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['leiden-all-1.5'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')
sc.pl.umap(adata, color=['leiden-all-1.5'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False)

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='leiden-all-1.5')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='leiden-all-1.5',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=4,key='cosg')

In [None]:
adata

In [None]:
singlet_index = adata.obs.loc[~adata.obs["leiden-all-1.5"].isin(['17','27','28']), :].index
adata = adata[singlet_index, :]
adata

In [None]:
marker_S_dict = { 
                     'Glia': ['CRYAB','S100B','ALDH1A1'],
                     'Arterial endo': ['GJA4','HEY1','HEY2'], # 动脉
                     'Venous endo': ['ACKR1','VWF','ADGRG6'], # 静脉
                     'Capillary endo': ['RGCC','VWA1','CA4'], # 毛细血管
                     'Lymphatic endo': ['TFF3','LYVE1','CCL21'], # 淋巴管
                     'Pericytes': ['MYL9','ACTA2','TINAGL1','NOTCH3'],
                     'Smooth muscle cells': ['ACTG2','CNN1','NPNT'],
                     'Follicular dendritic cells': ['FDCSP','CXCL13','CLU'],
                     'T reticular cells': ['PTGDS','CCL19','CCL21'],
    
                     'Immature RSPO3-fibro': ['APOE','CTSC','CFD'],
                     'RSPO3-fibro': ['APOE','CTSC','CFD'],
                      # 在胚胎发育的文章中定义为了成纤维前体，因其低表达成熟成纤维的标志物，且在胚胎中比例高，且表达HMGA2，有报道称其参与胚胎成纤维形成.
                      # 在UC和CD的文章中均称其为炎症相关或活化成纤维，因其在CD或UC中均比例升高，且表达趋化因子和MMP家族mRNA。
                     'Activated RSPO3-Fibro': ['SOD2','CHI3L1','HMGA1'],
    
                     'RSPO3+Fibro': ['RSPO3','DCN','OGN'],
                     'Activated RSPO3+Fibro': ['EGR1','SOD2','HMGA1'],
                     'WNT5B+Fibro': ['WNT5B','F3','POSTN'],
                     'Cycling fibro': ['MKI67','STMN1']
                     }

sc.settings.set_figure_params(dpi=150, figsize = (4, 3), fontsize=18)
mp = sc.pl.dotplot(adata, 
              marker_S_dict, 
              'leiden-all-1.5', 
              dendrogram=False, 
              #figsize=(3, 4),
              use_raw=True,
              cmap = 'Reds',
              #var_group_rotation=45,
              #swap_axes=True,
              standard_scale='var',
              colorbar_title=None,
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.style(grid=True,cmap = 'Reds').show() # RdYlBu_r

In [None]:
pd.DataFrame(adata.uns['cosg']['names'])['11'].head(30)

In [None]:
## cell annoation
# 注：17, 27, 28分别为上皮与成纤维和内皮细胞的双胞；

# Glia
S01_index = adata.obs.loc[adata.obs["leiden-all-1.5"].isin(['18']), :].index


# Arterial endo
S02_index = adata.obs.loc[adata.obs["leiden-all-1.5"].isin(['9']), :].index
# Venous endo
S03_index = adata.obs.loc[adata.obs["leiden-all-1.5"].isin(['1','4']), :].index
# Capillary endo
S04_index = adata.obs.loc[adata.obs["leiden-all-1.5"].isin(['7','13','22']), :].index
# Lymphatic endo
S05_index = adata.obs.loc[adata.obs["leiden-all-1.5"].isin(['24']), :].index

# Fibroblast
S06_index = adata.obs.loc[adata.obs["leiden-all-1.5"].isin(['0','2','20','26']), :].index
# MKI67+Cycling fibroblast
S07_index = adata.obs.loc[adata.obs["leiden-all-1.5"].isin(['25']), :].index
# PI16+Adventitial fibroblast
S08_index = adata.obs.loc[adata.obs["leiden-all-1.5"].isin(['5']), :].index
# PTGDS+Adventitial fibroblast  表达促肥大细胞脱颗粒的血管外膜成纤维细胞
S09_index = adata.obs.loc[adata.obs["leiden-all-1.5"].isin(['23']), :].index
# PIEZO2+fibroblast
S10_index = adata.obs.loc[adata.obs["leiden-all-1.5"].isin(['6','12']), :].index
# OXTR+fibroblast 表达催产素受体
S11_index = adata.obs.loc[adata.obs["leiden-all-1.5"].isin(['16']), :].index
# Activated fibroblast 表达CXCL1, CXCL3, CXCL5, CXCL8, CCL20, MMP1, MMP3，可能和干扰素刺激有关
S12_index = adata.obs.loc[adata.obs["leiden-all-1.5"].isin(['21']), :].index


# Pericytes
S13_index = adata.obs.loc[adata.obs["leiden-all-1.5"].isin(['3','15','19']), :].index
# Activated Pericytes 表达CCL2, CCL8, CCL13, CCL19, CCL21, CXCL9
S14_index = adata.obs.loc[adata.obs["leiden-all-1.5"].isin(['11']), :].index
# Smooth muscle cells
S15_index = adata.obs.loc[adata.obs["leiden-all-1.5"].isin(['8','10','14']), :].index

In [None]:
adata.obs['annotation'] = 'C06-S01-Glia'

adata.obs.at[S02_index,'annotation']='C06-S02-Arterial endo'
adata.obs.at[S03_index,'annotation']='C06-S03-Venous endo'
adata.obs.at[S04_index,'annotation']='C06-S04-Capillary endo'
adata.obs.at[S05_index,'annotation']='C06-S05-Lymphatic endo'

adata.obs.at[S06_index,'annotation']='C06-S06-Fibroblast'
adata.obs.at[S07_index,'annotation']='C06-S07-Cycling fibroblast'
adata.obs.at[S08_index,'annotation']='C06-S08-Adventitial fibroblast'
adata.obs.at[S09_index,'annotation']='C06-S09-PTGDS+Adventitial fibroblast'
adata.obs.at[S10_index,'annotation']='C06-S10-PIEZO2+fibroblast'
adata.obs.at[S11_index,'annotation']='C06-S11-OXTR+fibroblast'
adata.obs.at[S12_index,'annotation']='C06-S12-Activated fibroblast'

adata.obs.at[S13_index,'annotation']='C06-S13-Pericytes'
adata.obs.at[S14_index,'annotation']='C06-S14-Activated Pericytes'
adata.obs.at[S15_index,'annotation']='C06-S15-Smooth muscle cells'

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='annotation')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='annotation',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=4,key='cosg')

In [None]:
adata.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Stromal.h5ad')

# Visualization

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Stromal.h5ad')
adata

In [None]:
sc.tl.rank_genes_groups(adata, 
                        groupby = 'annotation',
                        method='wilcoxon')

In [None]:
adata.obs['annotation'].cat.categories

In [None]:
file_path = '/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/csv/'

In [None]:
for i in list(adata.obs['annotation'].cat.categories):
    result_DEG = sc.get.rank_genes_groups_df(adata,group=i)
    result_DEG.to_csv(file_path+i+'.csv')

In [None]:
def progress_pca_bbknn_umap_tsne(adata, n_pcs=20, batch_key='batch_name'):
    ## 主成分分析降维
    #sc.tl.pca(adata, svd_solver='arpack')
    # 在PCA坐标中绘制散点图
    #sc.pl.pca(adata)
    # 可视化每个PC对数据总方差的贡献, 这为我们提供了有关为计算单元的邻域关系应考虑的PC数量的信息sc.tl.tsne()
    #sc.pl.pca_variance_ratio(adata, log=True)

    # 使用bbknn算法处理批次效应(代替了neighbors)
    # bbknn要求数据已经过标准化和主成分分析

    # adata.obs['bbknn_batch'] = [str(i)+'-'+str(j) for i,j in zip(adata.obs['dataset'], adata.obs['batch_name'])]
    sc.external.pp.bbknn(adata, batch_key=batch_key, n_pcs=n_pcs, neighbors_within_batch=3)
    
    # 计算UMAP
    sc.tl.umap(adata)
    # sc.tl.tsne(adata, n_jobs=6)

    return adata

adata = progress_pca_bbknn_umap_tsne(adata)

In [None]:
print(adata.obs['tissue'].cat.categories)

old_colors = np.array(adata.uns['tissue_colors'])
new_colors = old_colors

# inferior turbinate
new_colors[[0]] = '#ff0000'
# middle turbinate
new_colors[[1]] = '#0077b2'
# polyp
new_colors[[2]] = '#60b55c'

adata.uns['tissue_colors'] = new_colors

sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['tissue'], frameon=False, title='')

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False)

In [None]:
pd.DataFrame(adata.uns['cosg']['names'])['C06-S11-OXTR+fibroblast'].head(30)

In [None]:
sc.settings.set_figure_params(dpi=300, figsize = (4, 4), fontsize=12)

from matplotlib import cm, colors
import colorcet as cc

mymap = colors.LinearSegmentedColormap.from_list('my_colormap', cc.CET_L20)

colors2 = mymap(np.linspace(0.2, 1, 128)) # 30%-100%
colors3 = plt.cm.Greys_r(np.linspace(0.8,0.9,5))
colorsComb = np.vstack([colors3, colors2])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

sc.pl.umap(adata, color=['OXTR','OXT','COL1A2'], frameon=False, color_map = mymap)

In [None]:
sc.pl.umap(adata, color=['OXTR','COL10A1','SCG2','ALOX5AP','FAP','ALOX5','VIM','ACTA1'], add_outline=False, frameon=False)

In [None]:
sc.settings.set_figure_params(dpi=300, figsize = (4, 4), fontsize=12)

from matplotlib import cm, colors
import colorcet as cc

mymap = colors.LinearSegmentedColormap.from_list('my_colormap', cc.CET_L20)

colors2 = mymap(np.linspace(0.2, 1, 128)) # 30%-100%
colors3 = plt.cm.Greys_r(np.linspace(0.8,0.9,5))
colorsComb = np.vstack([colors3, colors2])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

sc.pl.umap(adata, color=['TWIST2','SERPINB2','KISS1'], frameon=False, color_map = mymap)

In [None]:
sc.tl.rank_genes_groups(adata, 'tissue', groups=['polyp'], reference='middle turbinate', method='wilcoxon')
sc.pl.rank_genes_groups(adata, groups=['polyp'], n_genes=20)

In [None]:
dedf = sc.get.rank_genes_groups_df(adata, group="polyp")
dedf.set_index('names',inplace=True)
print(dedf)
dedf.loc[['CCL26','CCL24','CCL11','CCL13'],:]

In [None]:
sc.tl.dendrogram(adata, groupby='annotation')

In [None]:
adata.obs['compartment-stromal'] = 'others'
# fibroblast
fib_index = adata.obs.loc[adata.obs["annotation"].isin(['C06-S06-Fibroblast','C06-S07-Cycling fibroblast','C06-S08-Adventitial fibroblast',
                                                        'C06-S09-PTGDS+Adventitial fibroblast','C06-S10-PIEZO2+fibroblast','C06-S11-OXTR+fibroblast',
                                                        'C06-S12-Activated fibroblast']), :].index
adata.obs.at[fib_index,'compartment-stromal']='Fibroblast'

In [None]:
adata = adata[fib_index, :]
adata

In [None]:
adata.obs['tissue'].cat.categories

In [None]:
sc.tl.rank_genes_groups(adata, 'tissue', groups=['polyp'], reference='inferior turbinate', method='wilcoxon')
sc.pl.rank_genes_groups(adata, groups=['polyp'], n_genes=20)

In [None]:
sc.tl.rank_genes_groups(adata, 'tissue', groups=['polyp'], reference='inferior turbinate', method='wilcoxon')
dedf = sc.get.rank_genes_groups_df(adata, group="polyp")
dedf.set_index('names',inplace=True)
print(dedf)
dedf.loc[['TWIST2','SERPINB2','KISS1'],:]

In [None]:
sc.tl.rank_genes_groups(adata, 'tissue', groups=['polyp'], reference='middle turbinate', method='wilcoxon')
dedf = sc.get.rank_genes_groups_df(adata, group="polyp")
dedf.set_index('names',inplace=True)
print(dedf)
dedf.loc[['TWIST2','SERPINB2','KISS1'],:]

In [None]:
help(sc.pl.dotplot)

In [None]:
sc.pl.dotplot(adata, ['TWIST2','ALOX5AP','ALOX5'], groupby='annotation',standard_scale='var')

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='compartment-stromal')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='compartment-stromal',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=15,key='cosg')

In [None]:
sc.pl.umap(adata, color=['DCN','APOD','COL1A2','FBLN1'], add_outline=False, frameon=False)
sc.pl.umap(adata, color=['LUM','SFRP2','SFRP1','ITGBL1'], add_outline=False, frameon=False)
sc.pl.umap(adata, color=['IGF2','NBL1','MRC2','ISLR'], add_outline=False, frameon=False)

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='annotation')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='annotation',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=4,key='cosg')

In [None]:
adata.obs['annotation'].cat.categories

In [None]:
pd.DataFrame(adata.uns['cosg']['names'])['C06-S11-OXTR+fibroblast'].head(30)

In [None]:
pd.DataFrame(adata.uns['cosg']['names'])['C06-S10-PIEZO2+fibroblast'].head(30)

In [None]:
pd.DataFrame(adata.uns['cosg']['names'])['C06-S09-PTGDS+Adventitial fibroblast'].head(30)

In [None]:
pd.DataFrame(adata.uns['cosg']['names'])['C06-S12-Activated fibroblast'].head(30)

In [None]:
pd.DataFrame(adata.uns['cosg']['names'])['C06-S14-Activated Pericytes'].head(30)

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='annotation')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='annotation',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=15,key='cosg')

In [None]:
tf_GENES = '/mnt/data/project/scenic/auxilliaries/lambert2018_c.txt'
amps_pd = pd.read_table(tf_GENES)
len(amps_pd)

In [None]:
# 141
amps_list = list(amps_pd['Gene_name'])
amps_list = [x for x in amps_list if x in adata.raw.var_names]
len(amps_list)

In [None]:
adata_c = sc.AnnData(X=adata.raw.X, var=adata.raw.var, obs = adata.obs)

In [None]:
adata_c = adata_c[:,amps_list]
adata_c

In [None]:
## TFs

import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata_c,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='annotation')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata_c,groupby='annotation',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=4,key='cosg')

In [None]:
Groups_tab_1 = pd.crosstab(index=adata.obs['tissue'],  # Make a crosstab
                        columns=adata.obs['annotation'], margins=True)               # Name the count column
MyTab_1= Groups_tab_1.div(Groups_tab_1["All"], axis=0)
MyTab2_1 = MyTab_1.drop(columns="All")
MyTab2_1 = MyTab2_1.drop(index="All")
MyTab2_1.T

In [None]:
# create a dataset
height = [0.000950, 0.066718]
bars = (' ', ' ')
x_pos = np.arange(len(bars))

fig, ax = plt.subplots(figsize=(2.5, 5))
# Create bars with different colors
plt.bar(x_pos, height, color=['green', 'red'],edgecolor = '#000000',)

# Create names on the x-axis
plt.xticks(x_pos, bars, fontsize = 30)
plt.yticks([0.0, 0.02, 0.04, 0.06, 0.08 ], fontsize = 20)

plt.grid(False)

print(ax.axis())
#ax.axis([-0.54, 2.5400000000000005, 0.0, 1])

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

plt.show()

In [None]:
MyTab2_1 = MyTab2_1.T
order = ['inferior turbinate', 'middle turbinate', 'polyp']
MyTab2_1 = MyTab2_1[order]
MyTab2_1 = MyTab2_1.T
#categories = IMM_group[::-1]

MyTab2_1.columns = pd.CategoricalIndex(MyTab2_1.columns.values)

# Sort the columns (axis=1) by the new categorical ordering
MyTab2_1 = MyTab2_1.sort_index(axis=1)

ax = MyTab2_1.plot.bar(
            figsize=(3.7,5),
            stacked=True,
            edgecolor = '#000000',
            linewidth=0.4,
            width=0.8, 
            fontsize=10,
            # color={"1-Duodenum": "#393b79","2-Jejunum": "#8ca252","3-Ileum": "#e7ba52", "4-Colon": "#e7969c", "5-Rectum": "#de9ed6"}
             )


plt.title("", fontsize=12)
plt.ylabel("Fraction of cells", fontsize=12)
plt.xlabel("", fontsize=12)
plt.ylim=1.0

#plt.gca().get_legend().remove() #remove legend
# plt.legend(categories, loc='center left', bbox_to_anchor=(1, 0.6), fontsize=12)
# plt.savefig('Proportion of clusters accross organs.png')
# 去除刻度
#plt.xticks([])
#plt.yticks([])
# ax.tick_params(bottom=False, top=False, left=False, right=False)
handles, labels = ax.get_legend_handles_labels()
ax.legend(reversed(handles), reversed(labels), loc='center left', bbox_to_anchor=(1, 0.6), fontsize=8)
plt.grid(False)

plt.show()

In [None]:
adata.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Stromal.h5ad')

**Fibroblast**

In [None]:
adata_FB_index = adata.obs.loc[adata.obs["annotation"].isin(['C06-S06-Fibroblast','C06-S07-Cycling fibroblast','C06-S08-Adventitial fibroblast',
                                                             'C06-S09-PTGDS+Adventitial fibroblast','C06-S10-PIEZO2+fibroblast',
                                                             'C06-S11-OXTR+fibroblast','C06-S12-Activated fibroblast']), :].index
adata_FB = adata[adata_FB_index, :]
adata_FB

In [None]:
sc.tl.dendrogram(adata_FB, groupby='annotation')

import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata_FB,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='annotation')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=15)
sc.pl.rank_genes_groups_dotplot(adata_FB,groupby='annotation',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=10,key='cosg')

In [None]:
sc.tl.rank_genes_groups(adata_FB, 
                        groupby = 'annotation',
                        method='wilcoxon')

In [None]:
result_DEG = sc.get.rank_genes_groups_df(adata_FB,group="C06-S11-OXTR+fibroblast")
result_DEG

In [None]:
gene_list = []
for i in list(result_DEG.index):
    # log2FC<=1 and pvalue<0.01
    if (result_DEG.iloc[i,2]>=1 and result_DEG.iloc[i,4]<=1e-2):
        gene_list.append(result_DEG.iloc[i,0])
len(gene_list)

In [None]:
result_DEG_1 = result_DEG.set_index('names').loc[gene_list,:]
result_DEG_1

In [None]:
result_DEG_1.to_csv('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/csv/oxtr.csv')

In [None]:
Groups_tab_1 = pd.crosstab(index=adata_FB.obs['tissue'],  # Make a crosstab
                        columns=adata_FB.obs['annotation'], margins=True)               # Name the count column
MyTab_1= Groups_tab_1.div(Groups_tab_1["All"], axis=0)
MyTab2_1 = MyTab_1.drop(columns="All")
MyTab2_1 = MyTab2_1.drop(index="All")
MyTab2_1 = MyTab2_1.T

order = ['inferior turbinate', 'middle turbinate', 'polyp']
MyTab2_1 = MyTab2_1[order]
MyTab2_1 = MyTab2_1.T
#categories = IMM_group[::-1]

MyTab2_1.columns = pd.CategoricalIndex(MyTab2_1.columns.values)

# Sort the columns (axis=1) by the new categorical ordering
MyTab2_1 = MyTab2_1.sort_index(axis=1)

ax = MyTab2_1.plot.bar(
            figsize=(3.7,5),
            stacked=True,
            edgecolor = '#000000',
            linewidth=0.4,
            width=0.8, 
            fontsize=10,
            # color={"1-Duodenum": "#393b79","2-Jejunum": "#8ca252","3-Ileum": "#e7ba52", "4-Colon": "#e7969c", "5-Rectum": "#de9ed6"}
             )


plt.title("", fontsize=12)
plt.ylabel("Fraction of cells", fontsize=12)
plt.xlabel("", fontsize=12)
plt.ylim=1.0

#plt.gca().get_legend().remove() #remove legend
# plt.legend(categories, loc='center left', bbox_to_anchor=(1, 0.6), fontsize=12)
# plt.savefig('Proportion of clusters accross organs.png')
# 去除刻度
#plt.xticks([])
#plt.yticks([])
# ax.tick_params(bottom=False, top=False, left=False, right=False)
handles, labels = ax.get_legend_handles_labels()
ax.legend(reversed(handles), reversed(labels), loc='center left', bbox_to_anchor=(1, 0.6), fontsize=8)
plt.grid(False)

plt.show()

**Pericytes-SMC**

In [None]:
adata_PS_index = adata.obs.loc[adata.obs["annotation"].isin(['C06-S13-Pericytes',
                                                             'C06-S14-Activated Pericytes',
                                                             'C06-S15-Smooth muscle cells']), :].index
adata_PS = adata[adata_PS_index, :]
adata_PS

In [None]:
Groups_tab_1 = pd.crosstab(index=adata_PS.obs['tissue'],  # Make a crosstab
                        columns=adata_PS.obs['annotation'], margins=True)               # Name the count column
MyTab_1= Groups_tab_1.div(Groups_tab_1["All"], axis=0)
MyTab2_1 = MyTab_1.drop(columns="All")
MyTab2_1 = MyTab2_1.drop(index="All")
MyTab2_1 = MyTab2_1.T

order = ['inferior turbinate', 'middle turbinate', 'polyp']
MyTab2_1 = MyTab2_1[order]
MyTab2_1 = MyTab2_1.T
#categories = IMM_group[::-1]

MyTab2_1.columns = pd.CategoricalIndex(MyTab2_1.columns.values)

# Sort the columns (axis=1) by the new categorical ordering
MyTab2_1 = MyTab2_1.sort_index(axis=1)

ax = MyTab2_1.plot.bar(
            figsize=(3.7,5),
            stacked=True,
            edgecolor = '#000000',
            linewidth=0.4,
            width=0.8, 
            fontsize=10,
            # color={"1-Duodenum": "#393b79","2-Jejunum": "#8ca252","3-Ileum": "#e7ba52", "4-Colon": "#e7969c", "5-Rectum": "#de9ed6"}
             )


plt.title("", fontsize=12)
plt.ylabel("Fraction of cells", fontsize=12)
plt.xlabel("", fontsize=12)
plt.ylim=1.0

#plt.gca().get_legend().remove() #remove legend
# plt.legend(categories, loc='center left', bbox_to_anchor=(1, 0.6), fontsize=12)
# plt.savefig('Proportion of clusters accross organs.png')
# 去除刻度
#plt.xticks([])
#plt.yticks([])
# ax.tick_params(bottom=False, top=False, left=False, right=False)
handles, labels = ax.get_legend_handles_labels()
ax.legend(reversed(handles), reversed(labels), loc='center left', bbox_to_anchor=(1, 0.6), fontsize=8)
plt.grid(False)

plt.show()

**Endothelial cells**

In [None]:
adata_EC_index = adata.obs.loc[adata.obs["annotation"].isin(['C06-S02-Arterial endo',
                                                             'C06-S03-Venous endo',
                                                             'C06-S04-Capillary endo',
                                                             'C06-S05-Lymphatic endo']), :].index
adata_EC = adata[adata_EC_index, :]
adata_EC

In [None]:
Groups_tab_1 = pd.crosstab(index=adata_EC.obs['tissue'],  # Make a crosstab
                        columns=adata_EC.obs['annotation'], margins=True)               # Name the count column
MyTab_1= Groups_tab_1.div(Groups_tab_1["All"], axis=0)
MyTab2_1 = MyTab_1.drop(columns="All")
MyTab2_1 = MyTab2_1.drop(index="All")
MyTab2_1 = MyTab2_1.T

order = ['inferior turbinate', 'middle turbinate', 'polyp']
MyTab2_1 = MyTab2_1[order]
MyTab2_1 = MyTab2_1.T
#categories = IMM_group[::-1]

MyTab2_1.columns = pd.CategoricalIndex(MyTab2_1.columns.values)

# Sort the columns (axis=1) by the new categorical ordering
MyTab2_1 = MyTab2_1.sort_index(axis=1)

ax = MyTab2_1.plot.bar(
            figsize=(3.7,5),
            stacked=True,
            edgecolor = '#000000',
            linewidth=0.4,
            width=0.8, 
            fontsize=10,
            # color={"1-Duodenum": "#393b79","2-Jejunum": "#8ca252","3-Ileum": "#e7ba52", "4-Colon": "#e7969c", "5-Rectum": "#de9ed6"}
             )


plt.title("", fontsize=12)
plt.ylabel("Fraction of cells", fontsize=12)
plt.xlabel("", fontsize=12)
plt.ylim=1.0

#plt.gca().get_legend().remove() #remove legend
# plt.legend(categories, loc='center left', bbox_to_anchor=(1, 0.6), fontsize=12)
# plt.savefig('Proportion of clusters accross organs.png')
# 去除刻度
#plt.xticks([])
#plt.yticks([])
# ax.tick_params(bottom=False, top=False, left=False, right=False)
handles, labels = ax.get_legend_handles_labels()
ax.legend(reversed(handles), reversed(labels), loc='center left', bbox_to_anchor=(1, 0.6), fontsize=8)
plt.grid(False)

plt.show()

# 受配体

## 趋化因子

In [None]:
# 趋化因子受体 42

R_CC = 'CCR1,CCR2,CCR3,CCR4,CCR5,CCR6,CCR7,CCR8,CCR9,CCR10'.split(',')
R_CXC = 'CXCR1,CXCR2,CXCR3,CXCR4,CXCR5,CXCR6'.split(',')
R_C = ['XCR1']
R_CX3C = ['CX3CR1']
R_Atypical = 'ACKR1,ACKR2,ACKR3,ACKR4,CCRL2'.split(',')
R_other = 'C5AR1,CMKLR1,FPR1,LRP6,GPR35,HRH4,DPP4,IDE,SLC7A1,CNR2,GPRC5D,VSIR,GPR101,FFAR2,GPR42,PGRMC2,GPR75,FCGR2A,KIR2DL3'.split(',')

R_Chemotaxis = R_CC+R_CXC+R_C+R_CX3C+R_Atypical+R_other
R_Chemotaxis = [x for x in R_Chemotaxis if x in adata.raw.var_names]

In [None]:
# 趋化因子配体 54--46

L_CXC = 'CXCL1,CXCL2,CXCL3,CXCL4,CXCL5,CXCL6,CXCL7,CXCL8,CXCL9,CXCL10,CXCL11,CXCL12,CXCL13,CXCL14,CXCL15,CXCL16,CXCL17'.split(',')
L_C = 'XCL1,XCL2'.split(',')
L_CC = 'CCL1,CCL2,CCL3,CCL4,CCL5,CCL6,CCL7,CCL8,CCL9,CCL10,CCL11,CCL12,CCL13,CCL14,CCL15,CCL16,CCL17,CCL18,CCL19,CCL20,CCL21,CCL22,CCL23,CCL24,CCL25,CCL26,CCL27,CCL28,'.split(',')
L_CX3C = ['CX3CL1']
L_other = 'C5,CKLF,CCL3L3,PF4,SPP1'.split(',')

L_Chemotaxis = L_CXC+L_C+L_CC+L_CX3C+L_other
L_Chemotaxis = [x for x in L_Chemotaxis if x in adata.raw.var_names]

In [None]:
# 趋化因子受体
mp = sc.pl.dotplot(adata, 
              R_Chemotaxis, 
              'annotation', 
              dendrogram=False, 
              #figsize=(8,10),
              use_raw=True,
              cmap = 'RdYlBu_r',
              var_group_rotation=45,
              swap_axes=True,
              standard_scale='var',
              colorbar_title='Scaled expression in var',
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.add_totals().style(grid=True).show()

# 趋化因子配体
mp = sc.pl.dotplot(adata, 
              L_Chemotaxis, 
              'annotation', 
              dendrogram=False, 
              #figsize=(8,10),
              use_raw=True,
              cmap = 'RdYlBu_r',
              var_group_rotation=45,
              swap_axes=True,
              standard_scale='var',
              colorbar_title='Scaled expression in var',
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.add_totals().style(grid=True).show()

## 细胞因子

In [None]:
# 细胞因子受体
IL_R = ''.split(',')
IFN_R = ''.split(',')
TNF_R = 'EGFR,NRG1'.split(',')
CSF_R = 'NOTCH1,NOTCH2,NOTCH3,NOTCH4'.split(',')

cytokines_R = IL_R+IFN_R+TNF_R+CSF_R
cytokines_R = [x for x in cytokines_R if x in adata.raw.var_names]

In [None]:
# 细胞因子配体
IL_L = 'IL1A,IL1B,IL2,IL4,IL5,IL6,IL7,IL10,IL11,IL13,IL15,IL16,IL17A,IL17C,IL17F,IL18,IL19,IL20,IL21,IL22,IL23A,IL24,IL26,IL32,IL33,IL34,IL37'.split(',')
IFN_L = 'IFNE,IFNG,IFNL1'.split(',')
TNF_L = 'EGF,NRG1,AREG,BTC,CNTF,COPA,EPGN,GRN,HBEGF,MIF,TGFA,TGFB1,EREG'.split(',')
CSF_L = 'DLL1,DLL3,DLL4,JAG1,JAG2,SCGB3A1,TNF,IL24,WNT4'.split(',')

cytokines_L = IL_L+IFN_L+TNF_L+CSF_L
cytokines_L = [x for x in cytokines_L if x in adata.raw.var_names]

In [None]:
# 细胞因子配体
mp = sc.pl.dotplot(adata, 
              cytokines_L, 
              'annotation', 
              dendrogram=False, 
              #figsize=(8,10),
              use_raw=True,
              cmap = 'RdYlBu_r',
              var_group_rotation=45,
              swap_axes=True,
              standard_scale='var',
              colorbar_title='Scaled expression in var',
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.add_totals().style(grid=True).show()

## 生长因子

In [None]:
# 生长因子受体
WNT_R = 'FZD1,FZD2,FZD3,FZD4,FZD5,FZD6,FZD7,FZD8,FZD9,FZD10,CD36,ROR1,ROR2,RYK,LRP1,SMO,ANTXR1,EPHA7,PTPRK,LDLR,KLRG2,NOTCH1'.split(',')
BMP_R = 'BMPR1A,BMPR1B,BMR1A,BMR1B,BMPR2,ACR2A,AVR2B,ACVR1,SMO,PTPRK,SLAMF1,PLAUR'.split(',')
EGF_R = 'EGFR,NRG1'.split(',')
NOTCH_R = 'NOTCH1,NOTCH2,NOTCH3,NOTCH4'.split(',')

GROW_R = WNT_R+BMP_R+EGF_R+NOTCH_R
GROW_R = [x for x in GROW_R if x in adata.raw.var_names]

In [None]:
# 生长因子配体
WNT_L = 'WNT1,WNT2,WNT2B,WNT3,WNT3A,WNT4,WNT5A,WNT5B,WNT7A,WNT7B,WNT11'.split(',')
BMP_L = 'BMP2,BMP3,BMP4,BMP5,BMP6,BMP7,BMP8A,BMP8B'.split(',')
EGF_L = 'EGF,NRG1,AREG,BTC,CNTF,COPA,EPGN,GRN,HBEGF,MIF,TGFA,TGFB1,EREG'.split(',')
NOTCH_L = 'DLL1,DLL3,DLL4,JAG1,JAG2,SCGB3A1,TNF,IL24,WNT4'.split(',')

GROW_L = WNT_L+BMP_L+EGF_L+NOTCH_L
GROW_L = [x for x in GROW_L if x in adata.raw.var_names]

In [None]:
# 生长因子受体
mp = sc.pl.dotplot(adata, 
              GROW_R, 
              'annotation', 
              dendrogram=False, 
              #figsize=(8,10),
              use_raw=True,
              cmap = 'RdYlBu_r',
              var_group_rotation=45,
              swap_axes=True,
              standard_scale='var',
              colorbar_title='Scaled expression in var',
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.add_totals().style(grid=True).show()

# 生长因子配体
mp = sc.pl.dotplot(adata, 
              GROW_L, 
              'annotation', 
              dendrogram=False, 
              #figsize=(8,10),
              use_raw=True,
              cmap = 'RdYlBu_r',
              var_group_rotation=45,
              swap_axes=True,
              standard_scale='var',
              colorbar_title='Scaled expression in var',
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.add_totals().style(grid=True).show()