In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import scanpy as sc
import anndata
import bbknn
import os
from scipy import sparse
import matplotlib.pyplot as plt
# from scanpy_base_moudle_update2 import *
# import scrublet as scr
import datetime
import harmonypy as hm

sc.settings.verbosity = 3
#sc.logging.print_versions()
# 设置图片的分辨率以及其他样式
sc.settings.set_figure_params(dpi=150, figsize = (4, 3), fontsize=12)

import matplotlib.font_manager
flist = matplotlib.font_manager.get_fontconfig_fonts()
names = [matplotlib.font_manager.FontProperties(fname=fname).get_name() for fname in flist]
print(names)

params={
        #'font.style':'italic',
        'font.weight':'normal',    #or 'blod'
        }
plt.rcParams.update(params)

plt.rcParams['font.family']='Arial'

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/EPI.h5ad')
adata

# Annotation

## BBKNN

In [None]:
def progress_pca_bbknn_umap_tsne(adata, n_pcs=20, batch_key='batch_name'):
    ## 主成分分析降维
    #sc.tl.pca(adata, svd_solver='arpack')
    # 在PCA坐标中绘制散点图
    #sc.pl.pca(adata)
    # 可视化每个PC对数据总方差的贡献, 这为我们提供了有关为计算单元的邻域关系应考虑的PC数量的信息sc.tl.tsne()
    #sc.pl.pca_variance_ratio(adata, log=True)

    # 使用bbknn算法处理批次效应(代替了neighbors)
    # bbknn要求数据已经过标准化和主成分分析

    # adata.obs['bbknn_batch'] = [str(i)+'-'+str(j) for i,j in zip(adata.obs['dataset'], adata.obs['batch_name'])]
    sc.external.pp.bbknn(adata, batch_key=batch_key, n_pcs=n_pcs, neighbors_within_batch=3)
    
    # 计算UMAP
    sc.tl.umap(adata)
    # sc.tl.tsne(adata, n_jobs=6)

    return adata

adata = progress_pca_bbknn_umap_tsne(adata)

In [None]:
print(adata.obs['tissue'].cat.categories)

old_colors = np.array(adata.uns['tissue_colors'])
new_colors = old_colors
new_colors

In [None]:
# inferior turbinate
new_colors[[0]] = '#ff0000'
# middle turbinate
new_colors[[1]] = '#0077b2'
# polyp
new_colors[[2]] = '#60b55c'

adata.uns['tissue_colors'] = new_colors

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['tissue'], frameon=False, title='')

In [None]:
sc.pl.umap(adata, color=['majority_voting'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
def add_leiden(adata):

    print("Performing clustering with a resolution of 0.5")
    sc.tl.leiden(adata, resolution=0.5)
    adata.obs['leiden-all-0.5'] = adata.obs['leiden']

    print("Performing clustering with a resolution of 1")
    sc.tl.leiden(adata, resolution=1)
    adata.obs['leiden-all-1'] = adata.obs['leiden']

    print("Performing clustering with a resolution of 2")
    sc.tl.leiden(adata, resolution=2)
    adata.obs['leiden-all-2'] = adata.obs['leiden']
    return adata

adata = add_leiden(adata)

In [None]:
sc.pl.umap(adata, color=['leiden-all-1'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
sc.pl.umap(adata, color=['leiden-all-2'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='leiden-all-2')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='leiden-all-2',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=4,key='cosg')

In [None]:
pd.DataFrame(adata.uns['cosg']['names'])['26'].head(30)

In [None]:
from matplotlib import cm, colors
import colorcet as cc

mymap = colors.LinearSegmentedColormap.from_list('my_colormap', cc.CET_L20)

colors2 = mymap(np.linspace(0.2, 1, 128)) # 30%-100%
colors3 = plt.cm.Greys_r(np.linspace(0.8,0.9,5))
colorsComb = np.vstack([colors3, colors2])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

In [None]:
# 
sc.pl.umap(adata, color=['S100A8','SPRR1B','KRT13','CPA4'], add_outline=False, frameon=False,color_map = mymap)

In [None]:
marker_genes_dict = {'Basal 1': ['DLK2','KRT15','KHDRBS2'], # E1
    'Basal 2': ['DAPL1','NOTCH1'], # E1
    'Cycling Basal': ['MKI67','NUSAP1'], # E2
    'Hillock': ['KRT14','KRT6A','KRT13'], # E3
    'Squamous': ['KRT78','SPRR3'], # E4
    'Duct': ['MIA'], # E5
    'Club': ['SCGB3A1','SCGB1A1'], # E6
    'Goblet': ['MUC5AC','TFF1','BPIFA2','PGC','MUC6','TFF2'], # E7
    'Deuterosomal': ['FOXN4','CDC20B'], # E9
    'Ciliated': ['PIFO','OMG','CFAP54','CCDC40'], # E10
    'Ionocyte':['FOXI1','ASCL3'], # E11
    'Brush/Tuft': ['BMX','RGS13'], # E9
    'Neuroendocrine': ['PCSK1N','BEX1'], # E10
    'Melanocyte': ['PMEL','MLANA'], # E10
}
sc.settings.set_figure_params(dpi=150, figsize = (4, 3), fontsize=18)
mp = sc.pl.dotplot(adata, 
              marker_genes_dict, 
              'leiden-all-2', 
              dendrogram=False, 
              #figsize=(3, 4),
              use_raw=True,
              cmap = 'Reds',
              var_group_rotation=45,
              #swap_axes=True,
              standard_scale='var',
              colorbar_title=None,
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.style(grid=True,cmap = 'Reds').show() # RdYlBu_r

## Harmony(Figure)

In [None]:
def progress_pca_harmony_umap(adata):

    # Harmony处理批次效应
    harmony_out = hm.run_harmony(adata.obsm['X_pca'], adata.obs, vars_use = ['batch_name'],max_iter_harmony=20)
    adata.obsm['X_pca_harmony'] = harmony_out.Z_corr.T
    sc.pp.neighbors(adata, n_neighbors=10, use_rep='X_pca_harmony', n_pcs=20)
    
    # 计算UMAP
    sc.tl.umap(adata)
    # sc.tl.tsne(adata, n_jobs=6)

    return adata

adata = progress_pca_harmony_umap(adata)

In [None]:
def add_leiden(adata):

    print("Performing clustering with a resolution of 0.5")
    sc.tl.leiden(adata, resolution=0.5)
    adata.obs['leiden-all-0.5'] = adata.obs['leiden']

    print("Performing clustering with a resolution of 1")
    sc.tl.leiden(adata, resolution=1)
    adata.obs['leiden-all-1'] = adata.obs['leiden']

    return adata

adata = add_leiden(adata)

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['tissue'], frameon=False, title='')
sc.pl.umap(adata, color=['majority_voting'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')
sc.pl.umap(adata, color=['leiden-all-1'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='leiden-all-1')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='leiden-all-1',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=4,key='cosg')

In [None]:
sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='leiden-all-1',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=10,key='cosg')

In [None]:
pd.DataFrame(adata.uns['cosg']['names'])['8'].head(30)

In [None]:
marker_genes_dict = {'Basal 1': ['DLK2','KRT15','KHDRBS2'], # E1
    'Basal 2': ['DAPL1','NOTCH1'], # E1
    'Cycling Basal': ['MKI67','NUSAP1'], # E2
    'Hillock': ['KRT14','KRT6A','KRT13','ECM1','S100A11','CLDN3'], # E3
    'Squamous': ['KRT78','SPRR3'], # E4
    'Duct': ['MIA'], # E5
    'Club': ['SCGB3A1','SCGB1A1'], # E6
    'Goblet': ['MUC5AC','TFF1','BPIFA2','PGC','MUC6','TFF2'], # E7
    'Deuterosomal': ['FOXN4','CDC20B'], # E9
    'Ciliated': ['PIFO','OMG','CFAP54','CCDC40'], # E10
    'Ionocyte':['FOXI1','ASCL3'], # E11
    'Brush/Tuft': ['LRMP','ASCL2'], # E9
    'Neuroendocrine': ['PCSK1N','CHGA'], # E10
    'Melanocyte': ['PMEL','MLANA'], # E10
}
sc.settings.set_figure_params(dpi=150, figsize = (4, 3), fontsize=18)
mp = sc.pl.dotplot(adata, 
              marker_genes_dict, 
              'leiden-all-1', 
              dendrogram=False, 
              #figsize=(3, 4),
              use_raw=True,
              cmap = 'Reds',
              var_group_rotation=45,
              #swap_axes=True,
              standard_scale='var',
              colorbar_title=None,
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.style(grid=True,cmap = 'Reds').show() # RdYlBu_r

In [None]:
sc.pl.umap(adata, color=['KRT5','TP63','SCGB1A1','FOXJ1','MUC5AC','LYZ','SLPI','AZGP1','BPIFB1','PRB1','PRB2','EPCAM'], add_outline=False, frameon=False,color_map = mymap)

In [None]:
## cell annoation
# 注：16，18，19分别为上皮与免疫、成纤维和内皮细胞的双胞；7群的特征不明确，暂定为分泌系前体细胞

# Basal resting
E01_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['0','3']), :].index
# Basal cycling
E02_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['14']), :].index
# Club
E03_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['1','2','20']), :].index
# Goblet
E04_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['11']), :].index
# Secretory precusor
E05_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['7']), :].index
# SMG (submucosal gland)
E06_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['4','5','6']), :].index
# MUC5B+SMG
E07_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['9']), :].index
# MMP7+SMG
E08_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['10']), :].index
# PRB1+SMG
E09_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['12']), :].index

# Ionocyte
E10_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['17']), :].index
# Ciliated 
E11_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['13']), :].index
# S100A8+APC
E12_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['15']), :].index
# KRT4+EMT cell
E13_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['8']), :].index

In [None]:
adata

In [None]:
singlet_index = adata.obs.loc[~adata.obs["leiden-all-1"].isin(['16','18','19']), :].index
singlet_index

In [None]:
adata_single = adata[singlet_index, :]
adata_single

In [None]:
adata = adata_single

In [None]:
adata.obs['annotation'] = 'C01-E01-Basal resting'
adata.obs.at[E02_index,'annotation']='C01-E02-Basal cycling'
adata.obs.at[E03_index,'annotation']='C01-E03-Club'
adata.obs.at[E04_index,'annotation']='C01-E04-Goblet'
adata.obs.at[E05_index,'annotation']='C01-E05-Secretory precusor'
adata.obs.at[E06_index,'annotation']='C01-E06-SMG'
adata.obs.at[E07_index,'annotation']='C01-E07-MUC5B+SMG'
adata.obs.at[E08_index,'annotation']='C01-E08-MMP7+SMG'
adata.obs.at[E09_index,'annotation']='C01-E09-PRB1+SMG'
adata.obs.at[E10_index,'annotation']='C01-E10-Ionocyte'
adata.obs.at[E11_index,'annotation']='C01-E11-Ciliated'
adata.obs.at[E12_index,'annotation']='C01-E12-S100A8+APC'
adata.obs.at[E13_index,'annotation']='C01-E13-KRT4+EMT cell'

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['tissue'], frameon=False, title='')
sc.pl.umap(adata, color=['majority_voting'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
adata.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/EPI.h5ad')

# Visualization

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/EPI.h5ad')
adata

In [None]:
def progress_pca_bbknn_umap_tsne(adata, n_pcs=20, batch_key='batch_name'):
    ## 主成分分析降维
    #sc.tl.pca(adata, svd_solver='arpack')
    # 在PCA坐标中绘制散点图
    #sc.pl.pca(adata)
    # 可视化每个PC对数据总方差的贡献, 这为我们提供了有关为计算单元的邻域关系应考虑的PC数量的信息sc.tl.tsne()
    #sc.pl.pca_variance_ratio(adata, log=True)

    # 使用bbknn算法处理批次效应(代替了neighbors)
    # bbknn要求数据已经过标准化和主成分分析

    # adata.obs['bbknn_batch'] = [str(i)+'-'+str(j) for i,j in zip(adata.obs['dataset'], adata.obs['batch_name'])]
    sc.external.pp.bbknn(adata, batch_key=batch_key, n_pcs=n_pcs, neighbors_within_batch=3)
    
    # 计算UMAP
    sc.tl.umap(adata)
    # sc.tl.tsne(adata, n_jobs=6)

    return adata

adata = progress_pca_bbknn_umap_tsne(adata)

In [None]:
print(adata.obs['tissue'].cat.categories)

old_colors = np.array(adata.uns['tissue_colors'])
new_colors = old_colors

# inferior turbinate
new_colors[[0]] = '#ff0000'
# middle turbinate
new_colors[[1]] = '#0077b2'
# polyp
new_colors[[2]] = '#60b55c'

adata.uns['tissue_colors'] = new_colors

sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['tissue'], frameon=False, title='')

In [None]:
E_index = adata.obs.loc[adata.obs["annotation"].isin(['C01-E12-KRT4+EMT cell']), :].index
adata.obs['annotation'] = adata.obs['annotation'].to_list()
adata.obs.at[E_index,'annotation']='C01-E12-KRT14+EMT cell'

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False)

**注意，E05-Secretory precusor细胞大概率是低质量细胞，删除**

In [None]:
singlet_index = adata.obs.loc[~adata.obs["annotation"].isin(['C01-E05-Secretory precusor']), :].index
singlet_index

In [None]:
adata_single = adata[singlet_index, :]
adata_single

In [None]:
adata = adata_single

In [None]:
## cell annoation
# 注：16，18，19分别为上皮与免疫、成纤维和内皮细胞的双胞；7群的特征不明确，暂定为分泌系前体细胞

# Basal resting
E01_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['0','3']), :].index
# Basal cycling
E02_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['14']), :].index
# Club
E03_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['1','2','20']), :].index
# Goblet
E04_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['11']), :].index
# Secretory precusor
#E05_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['7']), :].index
# SMG (submucosal gland)
E06_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['4','5','6']), :].index
# MUC5B+SMG
E07_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['9']), :].index
# MMP7+SMG
E08_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['10']), :].index
# PRB1+SMG
E09_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['12']), :].index

# Ionocyte
E10_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['17']), :].index
# Ciliated 
E11_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['13']), :].index
# S100A8+APC
E12_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['15']), :].index
# KRT14+EMT cell
E13_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['8']), :].index

In [None]:
adata.obs['annotation'] = 'C01-E01-Basal resting'
adata.obs.at[E02_index,'annotation']='C01-E02-Basal cycling'
adata.obs.at[E03_index,'annotation']='C01-E03-Club'
adata.obs.at[E04_index,'annotation']='C01-E04-Goblet'

adata.obs.at[E06_index,'annotation']='C01-E05-SMG'
adata.obs.at[E07_index,'annotation']='C01-E06-MUC5B+SMG'
adata.obs.at[E08_index,'annotation']='C01-E07-MMP7+SMG'
adata.obs.at[E09_index,'annotation']='C01-E08-PRB1+SMG'
adata.obs.at[E10_index,'annotation']='C01-E09-Ionocyte'
adata.obs.at[E11_index,'annotation']='C01-E10-Ciliated'
adata.obs.at[E12_index,'annotation']='C01-E11-S100A8+APC'
adata.obs.at[E13_index,'annotation']='C01-E12-KRT14+EMT cell'

In [None]:
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, title='')

In [None]:
sc.settings.set_figure_params(dpi=300, figsize = (4, 4), fontsize=12)

from matplotlib import cm, colors
import colorcet as cc

mymap = colors.LinearSegmentedColormap.from_list('my_colormap', cc.CET_L20)

colors2 = mymap(np.linspace(0.2, 1, 128)) # 30%-100%
colors3 = plt.cm.Greys_r(np.linspace(0.8,0.9,5))
colorsComb = np.vstack([colors3, colors2])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

sc.pl.umap(adata, color=['OXT','AVP','OXTR','HLA-DQA2'], frameon=False, color_map = mymap)
sc.pl.umap(adata, color=['HLA-DQA1','DEFB1','CXCL9','CXCL10'], frameon=False, color_map = mymap)
sc.pl.umap(adata, color=['CXCL11','EREG','DUOX2','LYPD3'], frameon=False, color_map = mymap)

In [None]:
sc.pl.umap(adata, color=['tissue'], frameon=False, title='')

In [None]:
adata.obs['annotation'].cat.categories

In [None]:
sc.tl.dendrogram(adata, groupby='annotation')

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='annotation')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='annotation',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=4,key='cosg')

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='annotation')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='annotation',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=15,key='cosg')

In [None]:
tf_GENES = '/mnt/data/project/scenic/auxilliaries/lambert2018_c.txt'
amps_pd = pd.read_table(tf_GENES)
len(amps_pd)

In [None]:
# 141
amps_list = list(amps_pd['Gene_name'])
amps_list = [x for x in amps_list if x in adata.raw.var_names]
len(amps_list)

In [None]:
adata_c = sc.AnnData(X=adata.raw.X, var=adata.raw.var, obs = adata.obs)

In [None]:
adata_c = adata_c[:,amps_list]
adata_c

In [None]:
## TFs

import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata_c,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='annotation')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata_c,groupby='annotation',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=4,key='cosg')

In [None]:
Groups_tab_1 = pd.crosstab(index=adata.obs['tissue'],  # Make a crosstab
                        columns=adata.obs['annotation'], margins=True)               # Name the count column
MyTab_1= Groups_tab_1.div(Groups_tab_1["All"], axis=0)
MyTab2_1 = MyTab_1.drop(columns="All")
MyTab2_1 = MyTab2_1.drop(index="All")
MyTab2_1.T

In [None]:
MyTab2_1 = MyTab2_1.T
order = ['inferior turbinate', 'middle turbinate', 'polyp']
MyTab2_1 = MyTab2_1[order]
MyTab2_1 = MyTab2_1.T
#categories = IMM_group[::-1]

MyTab2_1.columns = pd.CategoricalIndex(MyTab2_1.columns.values)

# Sort the columns (axis=1) by the new categorical ordering
MyTab2_1 = MyTab2_1.sort_index(axis=1)

ax = MyTab2_1.plot.bar(
            figsize=(3.7,5),
            stacked=True,
            edgecolor = '#000000',
            linewidth=0.4,
            width=0.8, 
            fontsize=10,
            # color={"1-Duodenum": "#393b79","2-Jejunum": "#8ca252","3-Ileum": "#e7ba52", "4-Colon": "#e7969c", "5-Rectum": "#de9ed6"}
             )


plt.title("", fontsize=12)
plt.ylabel("Fraction of cells", fontsize=12)
plt.xlabel("", fontsize=12)
plt.ylim=1.0

#plt.gca().get_legend().remove() #remove legend
# plt.legend(categories, loc='center left', bbox_to_anchor=(1, 0.6), fontsize=12)
# plt.savefig('Proportion of clusters accross organs.png')
# 去除刻度
#plt.xticks([])
#plt.yticks([])
# ax.tick_params(bottom=False, top=False, left=False, right=False)
handles, labels = ax.get_legend_handles_labels()
ax.legend(reversed(handles), reversed(labels), loc='center left', bbox_to_anchor=(1, 0.6), fontsize=8)
plt.grid(False)

plt.show()

In [None]:
adata.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/EPI.h5ad')

In [None]:
Groups_tab_1 = pd.crosstab(index=adata.obs['batch_name'],  # Make a crosstab
                        columns=adata.obs['tissue'], margins=True)               # Name the count column
Groups_tab_1

In [None]:
singlet_index = adata.obs.loc[adata.obs["batch_name"].isin(['Qilu_0222-1-220211','Qilu_0222-2-220211','Qilu_0222-3-220211',
                                                            'Qilu_0224-1-220211','Qilu_0224-2-220211','Qilu_0224-3-220211',
                                                            'Qilu_0226-1-220211','Qilu_0226-2-220211','Qilu_0226-3-220211']), :].index
adata_single = adata[singlet_index, :]
adata_single

In [None]:
Groups_tab_1 = pd.crosstab(index=adata_single.obs['tissue'],  # Make a crosstab
                        columns=adata_single.obs['annotation'], margins=True)               # Name the count column
MyTab_1= Groups_tab_1.div(Groups_tab_1["All"], axis=0)
MyTab2_1 = MyTab_1.drop(columns="All")
MyTab2_1 = MyTab2_1.drop(index="All")

MyTab2_1 = MyTab2_1.T
order = ['inferior turbinate', 'middle turbinate', 'polyp']
MyTab2_1 = MyTab2_1[order]
MyTab2_1 = MyTab2_1.T
#categories = IMM_group[::-1]

MyTab2_1.columns = pd.CategoricalIndex(MyTab2_1.columns.values)

# Sort the columns (axis=1) by the new categorical ordering
MyTab2_1 = MyTab2_1.sort_index(axis=1)

ax = MyTab2_1.plot.bar(
            figsize=(3.7,5),
            stacked=True,
            edgecolor = '#000000',
            linewidth=0.4,
            width=0.8, 
            fontsize=10,
            # color={"1-Duodenum": "#393b79","2-Jejunum": "#8ca252","3-Ileum": "#e7ba52", "4-Colon": "#e7969c", "5-Rectum": "#de9ed6"}
             )


plt.title("", fontsize=12)
plt.ylabel("Fraction of cells", fontsize=12)
plt.xlabel("", fontsize=12)
plt.ylim=1.0

#plt.gca().get_legend().remove() #remove legend
# plt.legend(categories, loc='center left', bbox_to_anchor=(1, 0.6), fontsize=12)
# plt.savefig('Proportion of clusters accross organs.png')
# 去除刻度
#plt.xticks([])
#plt.yticks([])
# ax.tick_params(bottom=False, top=False, left=False, right=False)
handles, labels = ax.get_legend_handles_labels()
ax.legend(reversed(handles), reversed(labels), loc='center left', bbox_to_anchor=(1, 0.6), fontsize=8)
plt.grid(False)

plt.show()

In [None]:
sc.pl.umap(adata, color=['S100A2','KRT5','KRT15','POSTN'], frameon=False)
sc.pl.umap(adata, color=['SERPINB3','KRT19','S100A6','AGR2'], frameon=False)
sc.pl.umap(adata, color=['LYZ','SLPI','AZGP1','PIGR'], frameon=False)
sc.pl.umap(adata, color=['CAPS','C9orf24','TSPAN1','PIFO'], frameon=False)

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/EPI.h5ad')
adata

In [None]:
pd.DataFrame(adata.uns['cosg']['names'])['C01-E08-PRB1+SMG'].head(30)

In [None]:
pd.DataFrame(adata.uns['cosg']['names'])['C01-E07-MMP7+SMG'].head(30)

In [None]:
pd.DataFrame(adata.uns['cosg']['names'])['C01-E06-MUC5B+SMG'].head(30)

In [None]:
pd.DataFrame(adata.uns['cosg']['names'])['C01-E12-KRT14+EMT cell'].head(30)

In [None]:
pd.DataFrame(adata.uns['cosg']['names'])['C01-E11-S100A8+APC'].head(30)

In [None]:
sc.settings.set_figure_params(dpi=300, figsize = (4, 4), fontsize=12)

from matplotlib import cm, colors
import colorcet as cc

mymap = colors.LinearSegmentedColormap.from_list('my_colormap', cc.CET_L20)

colors2 = mymap(np.linspace(0.2, 1, 128)) # 30%-100%
colors3 = plt.cm.Greys_r(np.linspace(0.8,0.9,5))
colorsComb = np.vstack([colors3, colors2])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

sc.pl.umap(adata, color=['OXTR','OXT','AVP'], frameon=False, color_map = mymap)

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/EPI.h5ad')
adata

In [None]:
E_index = adata.obs.loc[adata.obs["annotation"].isin(['C01-E10-Ciliated']), :].index
adata = adata[E_index, :]
adata

In [None]:
E_index = adata.obs.loc[adata.obs["tissue"].isin(['inferior turbinate']), :].index
adata = adata[E_index, :]
adata

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='Health')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=15)
sc.pl.rank_genes_groups_dotplot(adata,groupby='Health',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=15,key='cosg')

In [None]:
sc.tl.rank_genes_groups(adata, 
                        groupby = 'Health',
                        method='wilcoxon')

In [None]:
result_DEG = sc.get.rank_genes_groups_df(adata,group="healthy control")
result_DEG

In [None]:
gene_list = []
for i in list(result_DEG.index):
    # log2FC<=1 and pvalue<0.01
    if (result_DEG.iloc[i,2]>=1 and result_DEG.iloc[i,4]<=1e-2):
        gene_list.append(result_DEG.iloc[i,0])
len(gene_list)

In [None]:
result_DEG_1 = result_DEG.set_index('names').loc[gene_list,:]
result_DEG_1

In [None]:
result_DEG_1.to_csv('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/csv/ciliated.csv')

In [None]:
sc.tl.rank_genes_groups(adata, 
                        groupby = 'annotation',
                        method='wilcoxon')

In [None]:
result_DEG = sc.get.rank_genes_groups_df(adata,group="C01-E01-Basal resting")
result_DEG

In [None]:
result_DEG.to_csv('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/csv/Basal_resting.csv')

In [None]:
result_DEG = sc.get.rank_genes_groups_df(adata,group="C01-E02-Basal cycling")
result_DEG

In [None]:
result_DEG.to_csv('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/csv/Basal_cycling.csv')

In [None]:
result_DEG = sc.get.rank_genes_groups_df(adata,group="C01-E03-Club")
result_DEG.to_csv('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/csv/Club.csv')
result_DEG

In [None]:
result_DEG = sc.get.rank_genes_groups_df(adata,group="C01-E04-Goblet")
result_DEG.to_csv('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/csv/C01-E04-Goblet.csv')
result_DEG

In [None]:
result_DEG = sc.get.rank_genes_groups_df(adata,group="C01-E05-SMG")
result_DEG.to_csv('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/csv/C01-E05-SMG.csv')
result_DEG

In [None]:
result_DEG = sc.get.rank_genes_groups_df(adata,group="C01-E06-MUC5B+SMG")
result_DEG.to_csv('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/csv/C01-E06-MUC5B+SMG.csv')
result_DEG

In [None]:
result_DEG = sc.get.rank_genes_groups_df(adata,group="C01-E07-MMP7+SMG")
result_DEG.to_csv('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/csv/C01-E07-MMP7+SMG.csv')
result_DEG

In [None]:
result_DEG = sc.get.rank_genes_groups_df(adata,group="C01-E08-PRB1+SMG")
result_DEG.to_csv('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/csv/C01-E08-PRB1+SMG.csv')
result_DEG

In [None]:
result_DEG = sc.get.rank_genes_groups_df(adata,group="C01-E09-Ionocyte")
result_DEG.to_csv('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/csv/C01-E09-Ionocyte.csv')
result_DEG

In [None]:
result_DEG = sc.get.rank_genes_groups_df(adata,group="C01-E10-Ciliated")
result_DEG.to_csv('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/csv/C01-E10-Ciliated.csv')
result_DEG

In [None]:
result_DEG = sc.get.rank_genes_groups_df(adata,group="C01-E11-S100A8+APC")
result_DEG.to_csv('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/csv/C01-E11-S100A8+APC.csv')
result_DEG

In [None]:
adata.obs['annotation'].cat.categories

In [None]:
result_DEG = sc.get.rank_genes_groups_df(adata,group="C01-E12-KRT14+EMT cell")
result_DEG.to_csv('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/csv/C01-E12-KRT14+EMT_cell.csv')
result_DEG

# 受配体

## 趋化因子

In [None]:
# 趋化因子受体 42

R_CC = 'CCR1,CCR2,CCR3,CCR4,CCR5,CCR6,CCR7,CCR8,CCR9,CCR10'.split(',')
R_CXC = 'CXCR1,CXCR2,CXCR3,CXCR4,CXCR5,CXCR6'.split(',')
R_C = ['XCR1']
R_CX3C = ['CX3CR1']
R_Atypical = 'ACKR1,ACKR2,ACKR3,ACKR4,CCRL2'.split(',')
R_other = 'C5AR1,CMKLR1,FPR1,LRP6,GPR35,HRH4,DPP4,IDE,SLC7A1,CNR2,GPRC5D,VSIR,GPR101,FFAR2,GPR42,PGRMC2,GPR75,FCGR2A,KIR2DL3'.split(',')

R_Chemotaxis = R_CC+R_CXC+R_C+R_CX3C+R_Atypical+R_other
R_Chemotaxis = [x for x in R_Chemotaxis if x in adata.raw.var_names]

In [None]:
# 趋化因子配体 54--46

L_CXC = 'CXCL1,CXCL2,CXCL3,CXCL4,CXCL5,CXCL6,CXCL7,CXCL8,CXCL9,CXCL10,CXCL11,CXCL12,CXCL13,CXCL14,CXCL15,CXCL16,CXCL17'.split(',')
L_C = 'XCL1,XCL2'.split(',')
L_CC = 'CCL1,CCL2,CCL3,CCL4,CCL5,CCL6,CCL7,CCL8,CCL9,CCL10,CCL11,CCL12,CCL13,CCL14,CCL15,CCL16,CCL17,CCL18,CCL19,CCL20,CCL21,CCL22,CCL23,CCL24,CCL25,CCL26,CCL27,CCL28,'.split(',')
L_CX3C = ['CX3CL1']
L_other = 'C5,CKLF,CCL3L3,PF4,SPP1'.split(',')

L_Chemotaxis = L_CXC+L_C+L_CC+L_CX3C+L_other
L_Chemotaxis = [x for x in L_Chemotaxis if x in adata.raw.var_names]

In [None]:
# 趋化因子受体
mp = sc.pl.dotplot(adata, 
              R_Chemotaxis, 
              'annotation', 
              dendrogram=False, 
              #figsize=(8,10),
              use_raw=True,
              cmap = 'RdYlBu_r',
              var_group_rotation=45,
              swap_axes=True,
              standard_scale='var',
              colorbar_title='Scaled expression in var',
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.add_totals().style(grid=True).show()

# 趋化因子配体
mp = sc.pl.dotplot(adata, 
              L_Chemotaxis, 
              'annotation', 
              dendrogram=False, 
              #figsize=(8,10),
              use_raw=True,
              cmap = 'RdYlBu_r',
              var_group_rotation=45,
              swap_axes=True,
              standard_scale='var',
              colorbar_title='Scaled expression in var',
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.add_totals().style(grid=True).show()

## 细胞因子

In [None]:
# 细胞因子受体
IL_R = ''.split(',')
IFN_R = ''.split(',')
TNF_R = 'EGFR,NRG1'.split(',')
CSF_R = 'NOTCH1,NOTCH2,NOTCH3,NOTCH4'.split(',')

cytokines_R = IL_R+IFN_R+TNF_R+CSF_R
cytokines_R = [x for x in cytokines_R if x in adata.raw.var_names]

In [None]:
# 细胞因子配体
IL_L = 'IL1A,IL1B,IL2,IL4,IL5,IL6,IL7,IL10,IL11,IL13,IL15,IL16,IL17A,IL17C,IL17F,IL18,IL19,IL20,IL21,IL22,IL23A,IL24,IL26,IL32,IL33,IL34,IL37'.split(',')
IFN_L = 'IFNE,IFNG,IFNL1'.split(',')
TNF_L = 'EGF,NRG1,AREG,BTC,CNTF,COPA,EPGN,GRN,HBEGF,MIF,TGFA,TGFB1,EREG'.split(',')
CSF_L = 'DLL1,DLL3,DLL4,JAG1,JAG2,SCGB3A1,TNF,IL24,WNT4'.split(',')

cytokines_L = IL_L+IFN_L+TNF_L+CSF_L
cytokines_L = [x for x in cytokines_L if x in adata.raw.var_names]

In [None]:
# 细胞因子配体
mp = sc.pl.dotplot(adata, 
              cytokines_L, 
              'annotation', 
              dendrogram=False, 
              #figsize=(8,10),
              use_raw=True,
              cmap = 'RdYlBu_r',
              var_group_rotation=45,
              swap_axes=True,
              standard_scale='var',
              colorbar_title='Scaled expression in var',
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.add_totals().style(grid=True).show()

## 生长因子

In [None]:
# 生长因子受体
WNT_R = 'FZD1,FZD2,FZD3,FZD4,FZD5,FZD6,FZD7,FZD8,FZD9,FZD10,CD36,ROR1,ROR2,RYK,LRP1,SMO,ANTXR1,EPHA7,PTPRK,LDLR,KLRG2,NOTCH1'.split(',')
BMP_R = 'BMPR1A,BMPR1B,BMR1A,BMR1B,BMPR2,ACR2A,AVR2B,ACVR1,SMO,PTPRK,SLAMF1,PLAUR'.split(',')
EGF_R = 'EGFR,NRG1'.split(',')
NOTCH_R = 'NOTCH1,NOTCH2,NOTCH3,NOTCH4'.split(',')

GROW_R = WNT_R+BMP_R+EGF_R+NOTCH_R
GROW_R = [x for x in GROW_R if x in adata.raw.var_names]

In [None]:
# 生长因子配体
WNT_L = 'WNT1,WNT2,WNT2B,WNT3,WNT3A,WNT4,WNT5A,WNT5B,WNT7A,WNT7B,WNT11'.split(',')
BMP_L = 'BMP2,BMP3,BMP4,BMP5,BMP6,BMP7,BMP8A,BMP8B'.split(',')
EGF_L = 'EGF,NRG1,AREG,BTC,CNTF,COPA,EPGN,GRN,HBEGF,MIF,TGFA,TGFB1,EREG'.split(',')
NOTCH_L = 'DLL1,DLL3,DLL4,JAG1,JAG2,SCGB3A1,TNF,IL24,WNT4'.split(',')

GROW_L = WNT_L+BMP_L+EGF_L+NOTCH_L
GROW_L = [x for x in GROW_L if x in adata.raw.var_names]

In [None]:
# 生长因子受体
mp = sc.pl.dotplot(adata, 
              GROW_R, 
              'annotation', 
              dendrogram=False, 
              #figsize=(8,10),
              use_raw=True,
              cmap = 'RdYlBu_r',
              var_group_rotation=45,
              swap_axes=True,
              standard_scale='var',
              colorbar_title='Scaled expression in var',
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.add_totals().style(grid=True).show()

# 生长因子配体
mp = sc.pl.dotplot(adata, 
              GROW_L, 
              'annotation', 
              dendrogram=False, 
              #figsize=(8,10),
              use_raw=True,
              cmap = 'RdYlBu_r',
              var_group_rotation=45,
              swap_axes=True,
              standard_scale='var',
              colorbar_title='Scaled expression in var',
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.add_totals().style(grid=True).show()