In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import scanpy as sc
import anndata
import bbknn
import os
from scipy import sparse
import matplotlib.pyplot as plt
# from scanpy_base_moudle_update2 import *
# import scrublet as scr
import datetime
import harmonypy as hm

sc.settings.verbosity = 3
#sc.logging.print_versions()
# 设置图片的分辨率以及其他样式
sc.settings.set_figure_params(dpi=150, figsize = (4, 3), fontsize=12)

import matplotlib.font_manager
flist = matplotlib.font_manager.get_fontconfig_fonts()
names = [matplotlib.font_manager.FontProperties(fname=fname).get_name() for fname in flist]
print(names)

params={
        #'font.style':'italic',
        'font.weight':'normal',    #or 'blod'
        }
plt.rcParams.update(params)

plt.rcParams['font.family']='Arial'

# CellTypist

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/IMM.h5ad')
adata = sc.AnnData(X=adata.raw.X, var=adata.raw.var, obs = adata.obs)
adata

In [None]:
import celltypist
from celltypist import models

models.models_path

In [None]:
model = models.Model.load(model = 'Immune_All_Low.pkl')
#The model summary information.
model

In [None]:
#Examine cell types contained in the model.
model.cell_types

In [None]:
predictions = celltypist.annotate(adata, model = 'Immune_All_Low.pkl', majority_voting = True, mode = 'best match')

In [None]:
adata = predictions.to_adata()
adata

In [None]:
adata_imm = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/IMM.h5ad')
adata_imm.obs['annotation'] = adata.obs['majority_voting']

In [None]:
adata_imm.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/IMM.h5ad')

# Visualization

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/IMM.h5ad')
adata

In [None]:
def progress_pca_bbknn_umap_tsne(adata, n_pcs=20, batch_key='batch_name'):
    ## 主成分分析降维
    #sc.tl.pca(adata, svd_solver='arpack')
    # 在PCA坐标中绘制散点图
    #sc.pl.pca(adata)
    # 可视化每个PC对数据总方差的贡献, 这为我们提供了有关为计算单元的邻域关系应考虑的PC数量的信息sc.tl.tsne()
    #sc.pl.pca_variance_ratio(adata, log=True)

    # 使用bbknn算法处理批次效应(代替了neighbors)
    # bbknn要求数据已经过标准化和主成分分析

    # adata.obs['bbknn_batch'] = [str(i)+'-'+str(j) for i,j in zip(adata.obs['dataset'], adata.obs['batch_name'])]
    sc.external.pp.bbknn(adata, batch_key=batch_key, n_pcs=n_pcs, neighbors_within_batch=3)
    
    # 计算UMAP
    sc.tl.umap(adata)
    # sc.tl.tsne(adata, n_jobs=6)

    return adata

adata = progress_pca_bbknn_umap_tsne(adata)

In [None]:
print(adata.obs['tissue'].cat.categories)

old_colors = np.array(adata.uns['tissue_colors'])
new_colors = old_colors

# inferior turbinate
new_colors[[0]] = '#ff0000'
# middle turbinate
new_colors[[1]] = '#0077b2'
# polyp
new_colors[[2]] = '#60b55c'

adata.uns['tissue_colors'] = new_colors

sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['tissue'], frameon=False, title='')

In [None]:
sc.pl.umap(adata, color=['majority_voting'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, title='')

sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, title='')

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='annotation')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='annotation',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=4,key='cosg')

In [None]:
sc.pl.umap(adata, color=['EPCAM','KRT8'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False)

# Fine_tune

删除双胞

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/IMM.h5ad')
adata

In [None]:
singlet_index = adata.obs.loc[~adata.obs["annotation"].isin(['Endothelial cells','Epithelial cells','Fibroblasts']), :].index
singlet_index

In [None]:
adata = adata[singlet_index, :]
adata

In [None]:
adata.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/IMM.h5ad')

## T/ILCs

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/IMM.h5ad')
adata

In [None]:
adata.obs['annotation'].cat.categories

In [None]:
T_index = adata.obs.loc[adata.obs["annotation"].isin(['CD16+ NK cells', 'CD16- NK cells','ILC3',
                                                      'Regulatory T cells','Tcm/Naive helper T cells','Tem/Effector helper T cells',
                                                      'Tem/Temra cytotoxic T cells','Tem/Trm cytotoxic T cells','Trm cytotoxic T cells',
                                                      'Type 1 helper T cells','Type 17 helper T cells']), :].index
T_index

In [None]:
adata = adata[T_index, :]
adata

In [None]:
def progress_pca_harmony_umap(adata):
    
    # sc.tl.pca(adata, svd_solver='arpack')

    # Harmony处理批次效应
    harmony_out = hm.run_harmony(adata.obsm['X_pca'], adata.obs, vars_use = ['batch_name'],max_iter_harmony=20)
    adata.obsm['X_pca_harmony'] = harmony_out.Z_corr.T
    sc.pp.neighbors(adata, n_neighbors=10, use_rep='X_pca_harmony', n_pcs=20)
    
    # 计算UMAP
    sc.tl.umap(adata)
    # sc.tl.tsne(adata, n_jobs=6)

    return adata

adata = progress_pca_harmony_umap(adata)

In [None]:
def add_leiden(adata):

    #print("Performing clustering with a resolution of 0.5")
    #sc.tl.leiden(adata, resolution=0.5)
    #adata.obs['leiden-all-0.5'] = adata.obs['leiden']

    #print("Performing clustering with a resolution of 1")
    #sc.tl.leiden(adata, resolution=1)
    #adata.obs['leiden-all-1'] = adata.obs['leiden']
    
    print("Performing clustering with a resolution of 2")
    sc.tl.leiden(adata, resolution=2)
    adata.obs['leiden-all-2'] = adata.obs['leiden']

    return adata

adata = add_leiden(adata)

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['tissue'], frameon=False, title='')
sc.pl.umap(adata, color=['majority_voting'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')
sc.pl.umap(adata, color=['leiden-all-2'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
# get the Singles
Single_index = adata.obs.loc[~adata.obs["leiden-all-2"].isin(['23','24','27']), :].index

adata = adata[Single_index, :]
adata

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='leiden-all-2')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='leiden-all-2',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=4,key='cosg')

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['tissue'], frameon=False, title='')
sc.pl.umap(adata, color=['majority_voting'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')
sc.pl.umap(adata, color=['leiden-all-2'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='annotation')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='annotation',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=4,key='cosg')

In [None]:
sc.tl.dendrogram(adata, groupby='leiden-all-2')

In [None]:
marker_genes_dict = {'Pan-marker': ['PTPRC','CD3D','CD3E'],
    'CD4/CD8': ['CD4','CD8A','CD8B'],
    'non_alpha_beta T': ['TYROBP','FCER1G','TRDC'],
    'NK and ILC1': ['GNLY','GZMA','GZMB','GZMK','IFNG','PRF1','NKG7','EOMES','CCL3','CCL4','CCL5','XCL1','XCL1'],
    'ILC2 and 3': ['AREG','DLL1','IL2RA','KIT','IL7R','IL22','IL23R','CCR6','LTB','LST1','IL4I1','CCL20'],
    'gamma_delta T': ['GFI1','MYBL1','TRDC','CCL5','ITGA1','TRAC','ENTPD1','ABI3','CD247','CD7','CDH17'],                     
    'Naive/Tcm': ['TCF7','LEF1','RBAK','CCR7','KLF2','SELL','NOSIP','CD27'], 
    'CD4+Treg': ['FOXP3','BATF','TIGIT','CTLA4','TNFRSF4','TNFRSF18','ICA1'],
    'CD4+Type 1 cytokines Trm': ['IFNG','IL2','TNF','CXCR3','CCR9','CCL5'],
    'CD4+Type 3 cytokines Trm': ['IL17A','IL17F','IL22','CCR6','DPP4','IL23R','CCL20'], 
    'CD8+Cytotoxic Trm': ['IL7R','KLRB1','S100A4','MGAT4A','FKBP11','CD9','SPINK2'],
    'CD8+CTL Tem': ['GZMK','GZMB','GZMH','KLRG1','NKG7','CD44','CCL3','CCL4','CCL5','CST7'],
    'CD8+IEL': ['KLRD1','CD7','CCL5','HOPX','ENTPD1','KLRC1','KLRC2','KLRC3','NR4A1','NR4A2','NR4A3'],
}
sc.settings.set_figure_params(dpi=150, figsize = (4, 3), fontsize=18)
mp = sc.pl.dotplot(adata, 
              marker_genes_dict, 
              'leiden-all-2', 
              dendrogram=True, 
              #figsize=(3, 4),
              use_raw=True,
              cmap = 'Reds',
              var_group_rotation=45,
              #swap_axes=True,
              standard_scale='var',
              colorbar_title=None,
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.style(grid=True,cmap = 'Reds').show() # RdYlBu_r

In [None]:
sc.settings.set_figure_params(dpi=150, figsize = (4, 3), fontsize=15)

# Cycling/ Naive T / Tcm
sc.pl.umap(adata, color=['MKI67','CCR7','SELL','LEF1'], frameon=False)

# 根据CD3D的表达区分T与non_T细胞
sc.pl.umap(adata, color=['CD3D','CD4','CD8A','CD8B'], frameon=False)
# 区分ILC1和gammadelta T细胞，ILC1表达这4个标志物，但不表达CD3D；gammadelta T表达这些标志物同时表达CD3D
sc.pl.umap(adata, color=['KLRB1','TYROBP','FCER1G','TRDC'], frameon=False)

# NK
sc.pl.umap(adata, color=['GNLY','GZMA','GZMB','GZMK'], frameon=False)
sc.pl.umap(adata, color=['IFNG','PRF1','NKG7'], frameon=False)

In [None]:
# ILC3 转录因子
sc.pl.umap(adata, color=['LST1','IL4I1','DLL1'], frameon=False)
sc.pl.umap(adata, color=['GFI1','IKZF2','ARNTL','TCF7'], frameon=False)

# ILC1
sc.pl.umap(adata, color=['CCL3','CCL4','CCL5'], frameon=False)
sc.pl.umap(adata, color=['XCL1','XCL2'], frameon=False)
# NK-ILC1
sc.pl.umap(adata, color=['EOMES','GATA3','TBX21'], frameon=False)
sc.pl.umap(adata, color=['TOX','FCGR3A'], frameon=False)

# ILC2
sc.pl.umap(adata, color=['AREG','IL2RA','KIT','IL7R'], frameon=False)
sc.pl.umap(adata, color=['GATA3','ID2','RORA','IL13'], frameon=False)
# ILC3
sc.pl.umap(adata, color=['IL22','IL23R','IL7R'], frameon=False)
sc.pl.umap(adata, color=['AHR','ID2','RORC'], frameon=False)

In [None]:
sc.settings.set_figure_params(dpi=150, figsize = (4, 3), fontsize=15)
# CD4+Treg
sc.pl.umap(adata, color=['FOXP3','BATF','TIGIT','CTLA4','TNFRSF4','TNFRSF18','ICA1'], frameon=False)
# CD4+Tfh
sc.pl.umap(adata, color=['PDCD1','CXCR3','CXCR5','ICOS','BATF','BCL6','IL21'], frameon=False)

In [None]:
sc.pl.umap(adata, color=['CD3D','CXCL8','CXCR3','IFNG'], frameon=False)

### ILCs, gammadelta T, Th2

In [None]:
Single_index = adata.obs.loc[adata.obs["leiden-all-2"].isin(['15','5','12','19']), :].index

adata_c = adata[Single_index, :]
adata_c

In [None]:
    print("Performing clustering with a resolution of 2")
    sc.tl.leiden(adata_c, resolution=2)
    adata_c.obs['leiden-all-2'] = adata_c.obs['leiden']

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)

sc.pl.umap(adata_c, color=['majority_voting'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')
sc.pl.umap(adata_c, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')
sc.pl.umap(adata_c, color=['leiden-all-2'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
# CD8+ GNLY+ T cells
T01_index = adata_c.obs.loc[adata_c.obs["leiden-all-2"].isin(['1']), :].index
# NK cells
T02_index = adata_c.obs.loc[adata_c.obs["leiden-all-2"].isin(['0','22','4','7']), :].index
# ILC1
# T03_index = adata_c.obs.loc[adata_c.obs["leiden-all-2"].isin([]), :].index
# ILC2
T04_index = adata_c.obs.loc[adata_c.obs["leiden-all-2"].isin(['17','19']), :].index
# ILC3
T05_index = adata_c.obs.loc[adata_c.obs["leiden-all-2"].isin(['14','11']), :].index
# CD4+Th2
T06_index = adata_c.obs.loc[adata_c.obs["leiden-all-2"].isin(['9']), :].index

In [None]:
# Cycling T
T07_index = adata.obs.loc[adata.obs["leiden-all-2"].isin(['25']), :].index

In [None]:
adata_c.obs['annotation'] = 'ILC1'
adata_c.obs.at[T01_index,'annotation']='CD8+ GNLY+ T cells'
adata_c.obs.at[T02_index,'annotation']='NK cells'
#adata_c.obs.at[T03_index,'annotation']='ILC1'
adata_c.obs.at[T04_index,'annotation']='ILC2'
adata_c.obs.at[T05_index,'annotation']='ILC3'
adata_c.obs.at[T06_index,'annotation']='CD4+Th2'

In [None]:
T03_index = adata_c.obs.loc[adata_c.obs["annotation"].isin(['ILC1']), :].index

In [None]:
sc.pl.umap(adata_c, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False)

In [None]:
adata.obs['ann'] = adata.obs['majority_voting'].to_list()

In [None]:
adata.obs.at[T01_index,'ann']='CD8+ GNLY+ T cells'
adata.obs.at[T02_index,'ann']='NK cells'
adata.obs.at[T03_index,'ann']='ILC1'
adata.obs.at[T04_index,'ann']='ILC2'
adata.obs.at[T05_index,'ann']='ILC3'
adata.obs.at[T06_index,'ann']='CD4+Th2'
adata.obs.at[T07_index,'ann']='Cycling T'

In [None]:
sc.pl.umap(adata, color=['ann'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False)

In [None]:
# 根据CD3D的表达区分T与non_T细胞
sc.pl.umap(adata_c, color=['CD3D','CD4','CD8A','CD8B'], frameon=False)
# 区分ILC1和gammadelta T细胞，ILC1表达这4个标志物，但不表达CD3D；gammadelta T表达这些标志物同时表达CD3D
sc.pl.umap(adata_c, color=['KLRB1','TYROBP','FCER1G','TRDC'], frameon=False)

# NK
sc.pl.umap(adata_c, color=['GNLY','GZMA','GZMB','GZMK'], frameon=False)
sc.pl.umap(adata_c, color=['IFNG','PRF1','NKG7','EOMES'], frameon=False)

# ILC1
sc.pl.umap(adata_c, color=['CCL3','CCL4','CCL5'], frameon=False)
sc.pl.umap(adata_c, color=['XCL1','XCL2'], frameon=False)
# NK-ILC1
sc.pl.umap(adata_c, color=['EOMES','GATA3','TBX21'], frameon=False)
sc.pl.umap(adata_c, color=['TOX','FCGR3A'], frameon=False)

# ILC2
sc.pl.umap(adata_c, color=['AREG','IL2RA','KIT','IL7R'], frameon=False)
sc.pl.umap(adata_c, color=['GATA3','ID2','RORA','IL13'], frameon=False)
# ILC3
sc.pl.umap(adata_c, color=['IL22','IL23R','IL7R','LST1'], frameon=False)
sc.pl.umap(adata_c, color=['AHR','ID2','RORC','IL4I1','DLL1'], frameon=False)

### CD4+

In [None]:
sc.pl.umap(adata, color=['ann'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False)

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['leiden-all-2'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
Single_index = adata.obs.loc[adata.obs["leiden-all-2"].isin(['2','4','6','7','26']), :].index

adata_c = adata[Single_index, :]
adata_c

In [None]:
    print("Performing clustering with a resolution of 1")
    sc.tl.leiden(adata_c, resolution=1)
    adata_c.obs['leiden-all-1'] = adata_c.obs['leiden']

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)

sc.pl.umap(adata_c, color=['majority_voting'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')
sc.pl.umap(adata_c, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')
sc.pl.umap(adata_c, color=['leiden-all-1'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
sc.tl.dendrogram(adata_c, groupby='leiden-all-1')

In [None]:
marker_genes_dict = {'Pan-marker': ['PTPRC','CD3D','CD3E'],
    'CD4/CD8': ['CD4','CD8A','CD8B'],                   
    'Naive/Tcm': ['TCF7','LEF1','RBAK','CCR7','KLF2','SELL','NOSIP','CD27'], 
    'CD4+Treg': ['FOXP3','BATF','TIGIT','CTLA4','TNFRSF4','TNFRSF18','ICA1'],
    'CD4+Type 1 cytokines Trm': ['IFNG','IL2','TNF','CXCR3','CCR9','CCL5'],
    'CD4+Type 3 cytokines Trm': ['IL17A','IL17F','IL22','CCR6','DPP4','IL23R','CCL20'], 
    'CD8+Cytotoxic Trm': ['IL7R','KLRB1','S100A4','MGAT4A','FKBP11','CD9','SPINK2'],
    'CD8+CTL Tem': ['GZMK','GZMB','GZMH','KLRG1','NKG7','CD44','CCL3','CCL4','CCL5','CST7'],
    'CD8+IEL': ['KLRD1','CD7','CCL5','HOPX','ENTPD1','KLRC1','KLRC2','KLRC3','NR4A1','NR4A2','NR4A3'],
}
sc.settings.set_figure_params(dpi=150, figsize = (4, 3), fontsize=18)
mp = sc.pl.dotplot(adata_c, 
              marker_genes_dict, 
              'leiden-all-1', 
              dendrogram=True, 
              #figsize=(3, 4),
              use_raw=True,
              cmap = 'Reds',
              var_group_rotation=45,
              #swap_axes=True,
              standard_scale='var',
              colorbar_title=None,
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.style(grid=True,cmap = 'Reds').show() # RdYlBu_r

In [None]:
# CD4+ Naive T
T08_index = adata_c.obs.loc[adata_c.obs["leiden-all-1"].isin(['1','10']), :].index
# CD4+ Treg
T09_index = adata_c.obs.loc[adata_c.obs["leiden-all-1"].isin(['2']), :].index
# CD4+ Trm
T10_index = adata_c.obs.loc[adata_c.obs["leiden-all-1"].isin(['0','3','4','5','6','7','9','11']), :].index
# CD4+ Th17
T11_index = adata_c.obs.loc[adata_c.obs["leiden-all-1"].isin(['8']), :].index

In [None]:
adata_c.obs['annotation'] = 'CD4+ Naive T'
adata_c.obs.at[T09_index,'annotation']='CD4+ Treg'
adata_c.obs.at[T10_index,'annotation']='CD4+ Trm'
adata_c.obs.at[T11_index,'annotation']='CD4+ Th17'

In [None]:
sc.pl.umap(adata_c, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False)

In [None]:
adata.obs['ann'] = adata.obs['majority_voting'].to_list()

adata.obs.at[T01_index,'ann']='CD8+ GNLY+ T cells'
adata.obs.at[T02_index,'ann']='NK cells'
adata.obs.at[T03_index,'ann']='ILC1'
adata.obs.at[T04_index,'ann']='ILC2'
adata.obs.at[T05_index,'ann']='ILC3'
adata.obs.at[T06_index,'ann']='CD4+Th2'
adata.obs.at[T07_index,'ann']='Cycling T'
adata.obs.at[T08_index,'ann']='CD4+ Naive T'
adata.obs.at[T09_index,'ann']='CD4+ Treg'
adata.obs.at[T10_index,'ann']='CD4+ Trm'
adata.obs.at[T11_index,'ann']='CD4+ Th17'

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['ann'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False)

In [None]:
sc.pl.umap(adata, color=['CD3D','CD4','CD8A','CD8B'], frameon=False)
sc.pl.umap(adata, color=['pct_counts_mt','pct_counts_rp'], frameon=False)

In [None]:
sc.pl.umap(adata, color=['leiden-all-2'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
adata

In [None]:
# 删除线粒体基因表达过高的群
Single_index = adata.obs.loc[~adata.obs["leiden-all-2"].isin(['18']), :].index

adata = adata[Single_index, :]
adata

In [None]:
adata.obs['ann'] = 'C02-T10-CD8+ T cells'

adata.obs.at[T01_index,'ann']='C02-T11-CD8+ GNLY+ T cells'
adata.obs.at[T02_index,'ann']='C02-T01-NK cells'
adata.obs.at[T03_index,'ann']='C02-T02-ILC1'
adata.obs.at[T04_index,'ann']='C02-T03-ILC2'
adata.obs.at[T05_index,'ann']='C02-T04-ILC3'
adata.obs.at[T06_index,'ann']='C02-T07-CD4+Th2'
adata.obs.at[T07_index,'ann']='C02-T12-Cycling T'
adata.obs.at[T08_index,'ann']='C02-T05-CD4+ Naive T'
adata.obs.at[T09_index,'ann']='C02-T06-CD4+ Treg'
adata.obs.at[T10_index,'ann']='C02-T09-CD4+ Trm'
adata.obs.at[T11_index,'ann']='C02-T08-CD4+ Th17'

In [None]:
adata.obs['annotation'] = adata.obs['ann']

In [None]:
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
adata.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/T_annotation.h5ad')

## B cell

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/IMM.h5ad')
adata

In [None]:
adata.obs['annotation'].cat.categories

In [None]:
B_index = adata.obs.loc[adata.obs["annotation"].isin(['Memory B cells','Naive B cells','Plasma cells']), :].index
B_index

In [None]:
adata = adata[B_index, :]
adata

In [None]:
def progress_pca_harmony_umap(adata):
    
    # sc.tl.pca(adata, svd_solver='arpack')

    # Harmony处理批次效应
    harmony_out = hm.run_harmony(adata.obsm['X_pca'], adata.obs, vars_use = ['batch_name'],max_iter_harmony=20)
    adata.obsm['X_pca_harmony'] = harmony_out.Z_corr.T
    sc.pp.neighbors(adata, n_neighbors=10, use_rep='X_pca_harmony', n_pcs=20)
    
    # 计算UMAP
    sc.tl.umap(adata)
    # sc.tl.tsne(adata, n_jobs=6)

    return adata

adata = progress_pca_harmony_umap(adata)

In [None]:
def add_leiden(adata):

    #print("Performing clustering with a resolution of 0.5")
    #sc.tl.leiden(adata, resolution=0.5)
    #adata.obs['leiden-all-0.5'] = adata.obs['leiden']

    #print("Performing clustering with a resolution of 1")
    #sc.tl.leiden(adata, resolution=1)
    #adata.obs['leiden-all-1'] = adata.obs['leiden']
    
    print("Performing clustering with a resolution of 2")
    sc.tl.leiden(adata, resolution=1)
    adata.obs['leiden-all-1'] = adata.obs['leiden']

    return adata

adata = add_leiden(adata)

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['tissue'], frameon=False, title='')
sc.pl.umap(adata, color=['majority_voting'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')
sc.pl.umap(adata, color=['leiden-all-1'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
sc.pl.umap(adata, color=['pct_counts_mt','pct_counts_rp'], frameon=False)

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='annotation')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='annotation',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=8,key='cosg')

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='leiden-all-1')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='leiden-all-1',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=4,key='cosg')

In [None]:
B01_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['12']), :].index

adata.obs['ann'] = adata.obs['annotation'].to_list()

adata.obs.at[B01_index,'ann']='Cycling B cells'

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['ann'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
# Cycling B
B01_index = adata.obs.loc[adata.obs["ann"].isin(['Cycling B cells']), :].index
# Naive B cells
B02_index = adata.obs.loc[adata.obs["ann"].isin(['Naive B cells']), :].index
# Memory B cells
B03_index = adata.obs.loc[adata.obs["ann"].isin(['Memory B cells']), :].index
# Plasma cells
B04_index = adata.obs.loc[adata.obs["ann"].isin(['Plasma cells']), :].index

In [None]:
adata.obs['ann'] = 'C03-B01-Naive B cells'

adata.obs.at[B03_index,'ann']='C03-B02-Memory B cells'
adata.obs.at[B01_index,'ann']='C03-B03-Cycling B'
adata.obs.at[B04_index,'ann']='C03-B04-Plasma cells'

In [None]:
adata.obs['annotation'] = adata.obs['ann']

sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
adata.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/B_annotation.h5ad')

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/B_annotation.h5ad')
adata

In [None]:
sc.pl.umap(adata, color=['IGHA1','IGHG1','IGHGP','IGHD'], frameon=False)

In [None]:
sc.pl.umap(adata, color=['leiden-all-1'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
# Cycling B
B01_index = adata.obs.loc[adata.obs["ann"].isin(['C03-B03-Cycling B']), :].index
# Naive B cells
B02_index = adata.obs.loc[adata.obs["ann"].isin(['C03-B01-Naive B cells']), :].index
# Memory B cells
B03_index = adata.obs.loc[adata.obs["ann"].isin(['C03-B02-Memory B cells']), :].index
# IgA Plasma cells
B04_index = adata.obs.loc[adata.obs["ann"].isin(['C03-B04-Plasma cells']), :].index
# IgG Plasma
B05_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['10','5','7']), :].index

In [None]:
adata.obs['ann'] = 'C03-B01-Naive B cells'

adata.obs.at[B03_index,'ann']='C03-B02-Memory B cells'
adata.obs.at[B01_index,'ann']='C03-B03-Cycling B'
adata.obs.at[B04_index,'ann']='C03-B04-IgA Plasma cells'
adata.obs.at[B05_index,'ann']='C03-B05-IgG Plasma cells'

In [None]:
adata.obs['annotation'] = adata.obs['ann']

sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
adata.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/B_annotation.h5ad')

## Myeloid

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/IMM.h5ad')
adata

In [None]:
adata.obs['annotation'].cat.categories

In [None]:
M_index = adata.obs.loc[adata.obs["annotation"].isin(['Alveolar macrophages','Classical monocytes','DC1','DC2',
                                                      'Erythrophagocytic macrophages','Intermediate macrophages','Intestinal macrophages','Macrophages',
                                                      'Migratory DCs','pDC']), :].index
M_index

In [None]:
adata = adata[M_index, :]
adata

In [None]:
def progress_pca_harmony_umap(adata):
    
    # sc.tl.pca(adata, svd_solver='arpack')

    # Harmony处理批次效应
    harmony_out = hm.run_harmony(adata.obsm['X_pca'], adata.obs, vars_use = ['batch_name'],max_iter_harmony=20)
    adata.obsm['X_pca_harmony'] = harmony_out.Z_corr.T
    sc.pp.neighbors(adata, n_neighbors=10, use_rep='X_pca_harmony', n_pcs=20)
    
    # 计算UMAP
    sc.tl.umap(adata)
    # sc.tl.tsne(adata, n_jobs=6)

    return adata

adata = progress_pca_harmony_umap(adata)

In [None]:
def add_leiden(adata):

    #print("Performing clustering with a resolution of 0.5")
    #sc.tl.leiden(adata, resolution=0.5)
    #adata.obs['leiden-all-0.5'] = adata.obs['leiden']

    #print("Performing clustering with a resolution of 1")
    #sc.tl.leiden(adata, resolution=1)
    #adata.obs['leiden-all-1'] = adata.obs['leiden']
    
    print("Performing clustering with a resolution of 2")
    sc.tl.leiden(adata, resolution=1)
    adata.obs['leiden-all-1'] = adata.obs['leiden']

    return adata

adata = add_leiden(adata)

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['tissue'], frameon=False, title='')
sc.pl.umap(adata, color=['majority_voting'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')
sc.pl.umap(adata, color=['leiden-all-1'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='annotation')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='annotation',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=8,key='cosg')

In [None]:
# Macrophages
M01_index = adata.obs.loc[adata.obs["annotation"].isin(['Macrophages']), :].index
# CCL18+ Macrophages
M02_index = adata.obs.loc[adata.obs["annotation"].isin(['Erythrophagocytic macrophages']), :].index
# CCL13+ Macrophages
M03_index = adata.obs.loc[adata.obs["annotation"].isin(['Intestinal macrophages']), :].index
# CXCL8+ Macrophages
M04_index = adata.obs.loc[adata.obs["annotation"].isin(['Alveolar macrophages','Intermediate macrophages']), :].index

# Monocytes
M05_index = adata.obs.loc[adata.obs["annotation"].isin(['Classical monocytes']), :].index
# DC1
M06_index = adata.obs.loc[adata.obs["annotation"].isin(['DC1']), :].index
# DC2
M07_index = adata.obs.loc[adata.obs["annotation"].isin(['DC2']), :].index
# Migratory DCs
M08_index = adata.obs.loc[adata.obs["annotation"].isin(['Migratory DCs']), :].index
# pDC
M09_index = adata.obs.loc[adata.obs["annotation"].isin(['pDC']), :].index

In [None]:
adata.obs['ann'] = 'C04-M01-Macrophages'

adata.obs.at[M02_index,'ann']='C04-M02-CCL18+ Macrophages'
adata.obs.at[M03_index,'ann']='C04-M03-CCL13+ Macrophages'
adata.obs.at[M04_index,'ann']='C04-M04-CXCL8+ Macrophages'

adata.obs.at[M05_index,'ann']='C04-M05-Monocytes'
adata.obs.at[M06_index,'ann']='C04-M06-DC1'
adata.obs.at[M07_index,'ann']='C04-M07-DC2'

adata.obs.at[M08_index,'ann']='C04-M08-Migratory DCs'
adata.obs.at[M09_index,'ann']='C04-M09-pDC'

In [None]:
adata.obs['annotation'] = adata.obs['ann']

sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
adata.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/M_annotation.h5ad')

## Mast

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/IMM.h5ad')
adata

In [None]:
adata.obs['annotation'].cat.categories

In [None]:
Ma_index = adata.obs.loc[adata.obs["annotation"].isin(['Mast cells']), :].index
Ma_index

In [None]:
adata = adata[Ma_index, :]
adata

In [None]:
def progress_pca_harmony_umap(adata):
    
    # sc.tl.pca(adata, svd_solver='arpack')

    # Harmony处理批次效应
    harmony_out = hm.run_harmony(adata.obsm['X_pca'], adata.obs, vars_use = ['batch_name'],max_iter_harmony=20)
    adata.obsm['X_pca_harmony'] = harmony_out.Z_corr.T
    sc.pp.neighbors(adata, n_neighbors=10, use_rep='X_pca_harmony', n_pcs=20)
    
    # 计算UMAP
    sc.tl.umap(adata)
    # sc.tl.tsne(adata, n_jobs=6)

    return adata

adata = progress_pca_harmony_umap(adata)

In [None]:
def add_leiden(adata):

    print("Performing clustering with a resolution of 0.5")
    sc.tl.leiden(adata, resolution=0.5)
    adata.obs['leiden-all-0.5'] = adata.obs['leiden']
    
    print("Performing clustering with a resolution of 1")
    sc.tl.leiden(adata, resolution=1)
    adata.obs['leiden-all-1'] = adata.obs['leiden']

    return adata

adata = add_leiden(adata)

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['tissue'], frameon=False, title='')
sc.pl.umap(adata, color=['majority_voting'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')
sc.pl.umap(adata, color=['leiden-all-0.5'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='leiden-all-0.5')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='leiden-all-0.5',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=8,key='cosg')

In [None]:
# get the Singles
Single_index = adata.obs.loc[adata.obs["leiden-all-0.5"].isin(['0','1','2','3','4','7']), :].index

adata = adata[Single_index, :]
adata

In [None]:
sc.pl.umap(adata, color=['GPR42','FFAR3','IL5','IL13'], frameon=False)

In [None]:
adata.obs['annotation'] = 'C05-Mast cell'

In [None]:
adata.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Mast_annotation.h5ad')

# Merge

In [None]:
adata_T = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/T_annotation.h5ad')
adata_B = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/B_annotation.h5ad')
adata_M = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/M_annotation.h5ad')
adata_Mast = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Mast_annotation.h5ad')

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/IMM.h5ad')
adata

In [None]:
T01_index = adata_T.obs.loc[adata_T.obs["annotation"].isin(['C02-T11-CD8+ GNLY+ T cells']), :].index
T02_index = adata_T.obs.loc[adata_T.obs["annotation"].isin(['C02-T01-NK cells']), :].index
T03_index = adata_T.obs.loc[adata_T.obs["annotation"].isin(['C02-T02-ILC1']), :].index
T04_index = adata_T.obs.loc[adata_T.obs["annotation"].isin(['C02-T03-ILC2']), :].index
T05_index = adata_T.obs.loc[adata_T.obs["annotation"].isin(['C02-T04-ILC3']), :].index
T06_index = adata_T.obs.loc[adata_T.obs["annotation"].isin(['C02-T07-CD4+Th2']), :].index
T07_index = adata_T.obs.loc[adata_T.obs["annotation"].isin(['C02-T12-Cycling T']), :].index
T08_index = adata_T.obs.loc[adata_T.obs["annotation"].isin(['C02-T05-CD4+ Naive T']), :].index
T09_index = adata_T.obs.loc[adata_T.obs["annotation"].isin(['C02-T06-CD4+ Treg']), :].index
T10_index = adata_T.obs.loc[adata_T.obs["annotation"].isin(['C02-T09-CD4+ Trm']), :].index
T11_index = adata_T.obs.loc[adata_T.obs["annotation"].isin(['C02-T08-CD4+ Th17']), :].index
T12_index = adata_T.obs.loc[adata_T.obs["annotation"].isin(['C02-T10-CD8+ T cells']), :].index

In [None]:
B01_index = adata_B.obs.loc[adata_B.obs["annotation"].isin(['C03-B01-Naive B cells']), :].index
B02_index = adata_B.obs.loc[adata_B.obs["annotation"].isin(['C03-B02-Memory B cells']), :].index
B03_index = adata_B.obs.loc[adata_B.obs["annotation"].isin(['C03-B03-Cycling B']), :].index
B04_index = adata_B.obs.loc[adata_B.obs["annotation"].isin(['C03-B04-Plasma cells']), :].index

In [None]:
M01_index = adata_M.obs.loc[adata_M.obs["annotation"].isin(['C04-M01-Macrophages']), :].index
M02_index = adata_M.obs.loc[adata_M.obs["annotation"].isin(['C04-M02-CCL18+ Macrophages']), :].index
M03_index = adata_M.obs.loc[adata_M.obs["annotation"].isin(['C04-M03-CCL13+ Macrophages']), :].index
M04_index = adata_M.obs.loc[adata_M.obs["annotation"].isin(['C04-M04-CXCL8+ Macrophages']), :].index
M05_index = adata_M.obs.loc[adata_M.obs["annotation"].isin(['C04-M05-Monocytes']), :].index
M06_index = adata_M.obs.loc[adata_M.obs["annotation"].isin(['C04-M06-DC1']), :].index
M07_index = adata_M.obs.loc[adata_M.obs["annotation"].isin(['C04-M07-DC2']), :].index
M08_index = adata_M.obs.loc[adata_M.obs["annotation"].isin(['C04-M08-Migratory DCs']), :].index
M09_index = adata_M.obs.loc[adata_M.obs["annotation"].isin(['C04-M09-pDC']), :].index

In [None]:
MA_index = adata_Mast.obs.loc[adata_Mast.obs["annotation"].isin(['C05-Mast cell']), :].index

In [None]:
adata.obs['ann'] = 'Undefined'

adata.obs.at[T01_index,'ann']='C02-T11-CD8+ GNLY+ T cells'
adata.obs.at[T02_index,'ann']='C02-T01-NK cells'
adata.obs.at[T03_index,'ann']='C02-T02-ILC1'
adata.obs.at[T04_index,'ann']='C02-T03-ILC2'
adata.obs.at[T05_index,'ann']='C02-T04-ILC3'
adata.obs.at[T06_index,'ann']='C02-T07-CD4+Th2'
adata.obs.at[T07_index,'ann']='C02-T12-Cycling T'
adata.obs.at[T08_index,'ann']='C02-T05-CD4+ Naive T'
adata.obs.at[T09_index,'ann']='C02-T06-CD4+ Treg'
adata.obs.at[T10_index,'ann']='C02-T09-CD4+ Trm'
adata.obs.at[T11_index,'ann']='C02-T08-CD4+ Th17'
adata.obs.at[T12_index,'ann']='C02-T10-CD8+ T cells'

adata.obs.at[B01_index,'ann']='C03-B01-Naive B cells'
adata.obs.at[B02_index,'ann']='C03-B02-Memory B cells'
adata.obs.at[B03_index,'ann']='C03-B03-Cycling B'
adata.obs.at[B04_index,'ann']='C03-B04-Plasma cells'

adata.obs.at[M01_index,'ann']='C04-M01-Macrophages'
adata.obs.at[M02_index,'ann']='C04-M02-CCL18+ Macrophages'
adata.obs.at[M03_index,'ann']='C04-M03-CCL13+ Macrophages'
adata.obs.at[M04_index,'ann']='C04-M04-CXCL8+ Macrophages'
adata.obs.at[M05_index,'ann']='C04-M05-Monocytes'
adata.obs.at[M06_index,'ann']='C04-M06-DC1'
adata.obs.at[M07_index,'ann']='C04-M07-DC2'
adata.obs.at[M08_index,'ann']='C04-M08-Migratory DCs'
adata.obs.at[M09_index,'ann']='C04-M09-pDC'

adata.obs.at[MA_index,'ann']='C05-Mast cell'

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['tissue'], frameon=False, title='')
sc.pl.umap(adata, color=['ann'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, title='')
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, title='')

In [None]:
adata.obs['annotation'] =  adata.obs['ann']

In [None]:
adata

In [None]:
# get the Singles
Single_index = adata.obs.loc[~adata.obs["annotation"].isin(['Undefined']), :].index
adata = adata[Single_index, :]
adata

In [None]:
def progress_pca_bbknn_umap_tsne(adata, n_pcs=20, batch_key='batch_name'):
    ## 主成分分析降维
    #sc.tl.pca(adata, svd_solver='arpack')
    # 在PCA坐标中绘制散点图
    #sc.pl.pca(adata)
    # 可视化每个PC对数据总方差的贡献, 这为我们提供了有关为计算单元的邻域关系应考虑的PC数量的信息sc.tl.tsne()
    #sc.pl.pca_variance_ratio(adata, log=True)

    # 使用bbknn算法处理批次效应(代替了neighbors)
    # bbknn要求数据已经过标准化和主成分分析

    # adata.obs['bbknn_batch'] = [str(i)+'-'+str(j) for i,j in zip(adata.obs['dataset'], adata.obs['batch_name'])]
    sc.external.pp.bbknn(adata, batch_key=batch_key, n_pcs=n_pcs, neighbors_within_batch=3)
    
    # 计算UMAP
    sc.tl.umap(adata)
    # sc.tl.tsne(adata, n_jobs=6)

    return adata

adata = progress_pca_bbknn_umap_tsne(adata)

In [None]:
print(adata.obs['tissue'].cat.categories)

old_colors = np.array(adata.uns['tissue_colors'])
new_colors = old_colors

# inferior turbinate
new_colors[[0]] = '#ff0000'
# middle turbinate
new_colors[[1]] = '#0077b2'
# polyp
new_colors[[2]] = '#60b55c'

adata.uns['tissue_colors'] = new_colors

sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['tissue'], frameon=False, title='')
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, title='')

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/IMM.h5ad')

adata.obs['annotation'] = adata.obs['annotation'].astype('string')
adata.obs.at[B04_index,'annotation']='C03-B04-IgA Plasma cells'
adata.obs.at[B05_index,'annotation']='C03-B05-IgG Plasma cells'

In [None]:
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, title='')
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=4)
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
sc.tl.dendrogram(adata, groupby='annotation')

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='annotation')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='annotation',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=2,key='cosg')

**All**

In [None]:
Groups_tab_1 = pd.crosstab(index=adata.obs['tissue'],  # Make a crosstab
                        columns=adata.obs['annotation'], margins=True)               # Name the count column
MyTab_1= Groups_tab_1.div(Groups_tab_1["All"], axis=0)
MyTab2_1 = MyTab_1.drop(columns="All")
MyTab2_1 = MyTab2_1.drop(index="All")
MyTab2_1.T

In [None]:
MyTab2_1 = MyTab2_1.T
order = ['inferior turbinate', 'middle turbinate', 'polyp']
MyTab2_1 = MyTab2_1[order]
MyTab2_1 = MyTab2_1.T
#categories = IMM_group[::-1]

MyTab2_1.columns = pd.CategoricalIndex(MyTab2_1.columns.values)

# Sort the columns (axis=1) by the new categorical ordering
MyTab2_1 = MyTab2_1.sort_index(axis=1)

ax = MyTab2_1.plot.bar(
            figsize=(3.7,5),
            stacked=True,
            edgecolor = '#000000',
            linewidth=0.4,
            width=0.8, 
            fontsize=10,
            # color={"1-Duodenum": "#393b79","2-Jejunum": "#8ca252","3-Ileum": "#e7ba52", "4-Colon": "#e7969c", "5-Rectum": "#de9ed6"}
             )


plt.title("", fontsize=12)
plt.ylabel("Fraction of cells", fontsize=12)
plt.xlabel("", fontsize=12)
plt.ylim=1.0

#plt.gca().get_legend().remove() #remove legend
# plt.legend(categories, loc='center left', bbox_to_anchor=(1, 0.6), fontsize=12)
# plt.savefig('Proportion of clusters accross organs.png')
# 去除刻度
#plt.xticks([])
#plt.yticks([])
# ax.tick_params(bottom=False, top=False, left=False, right=False)
handles, labels = ax.get_legend_handles_labels()
ax.legend(reversed(handles), reversed(labels), loc='center left', bbox_to_anchor=(1, 0.6), fontsize=8)
plt.grid(False)

plt.show()

**T/ILCs**

In [None]:
Groups_tab_1 = pd.crosstab(index=adata_T.obs['tissue'],  # Make a crosstab
                        columns=adata_T.obs['annotation'], margins=True)               # Name the count column
MyTab_1= Groups_tab_1.div(Groups_tab_1["All"], axis=0)
MyTab2_1 = MyTab_1.drop(columns="All")
MyTab2_1 = MyTab2_1.drop(index="All")
MyTab2_1 = MyTab2_1.T

order = ['inferior turbinate', 'middle turbinate', 'polyp']
MyTab2_1 = MyTab2_1[order]
MyTab2_1 = MyTab2_1.T
#categories = IMM_group[::-1]

MyTab2_1.columns = pd.CategoricalIndex(MyTab2_1.columns.values)

# Sort the columns (axis=1) by the new categorical ordering
MyTab2_1 = MyTab2_1.sort_index(axis=1)

ax = MyTab2_1.plot.bar(
            figsize=(3.7,5),
            stacked=True,
            edgecolor = '#000000',
            linewidth=0.4,
            width=0.8, 
            fontsize=10,
            # color={"1-Duodenum": "#393b79","2-Jejunum": "#8ca252","3-Ileum": "#e7ba52", "4-Colon": "#e7969c", "5-Rectum": "#de9ed6"}
             )


plt.title("", fontsize=12)
plt.ylabel("Fraction of cells", fontsize=12)
plt.xlabel("", fontsize=12)
plt.ylim=1.0

#plt.gca().get_legend().remove() #remove legend
# plt.legend(categories, loc='center left', bbox_to_anchor=(1, 0.6), fontsize=12)
# plt.savefig('Proportion of clusters accross organs.png')
# 去除刻度
#plt.xticks([])
#plt.yticks([])
# ax.tick_params(bottom=False, top=False, left=False, right=False)
handles, labels = ax.get_legend_handles_labels()
ax.legend(reversed(handles), reversed(labels), loc='center left', bbox_to_anchor=(1, 0.6), fontsize=8)
plt.grid(False)

plt.show()

**B**

In [None]:
Groups_tab_1 = pd.crosstab(index=adata_B.obs['tissue'],  # Make a crosstab
                        columns=adata_B.obs['annotation'], margins=True)               # Name the count column
MyTab_1= Groups_tab_1.div(Groups_tab_1["All"], axis=0)
MyTab2_1 = MyTab_1.drop(columns="All")
MyTab2_1 = MyTab2_1.drop(index="All")
MyTab2_1 = MyTab2_1.T

order = ['inferior turbinate', 'middle turbinate', 'polyp']
MyTab2_1 = MyTab2_1[order]
MyTab2_1 = MyTab2_1.T
#categories = IMM_group[::-1]

MyTab2_1.columns = pd.CategoricalIndex(MyTab2_1.columns.values)

# Sort the columns (axis=1) by the new categorical ordering
MyTab2_1 = MyTab2_1.sort_index(axis=1)

ax = MyTab2_1.plot.bar(
            figsize=(3.7,5),
            stacked=True,
            edgecolor = '#000000',
            linewidth=0.4,
            width=0.8, 
            fontsize=10,
            # color={"1-Duodenum": "#393b79","2-Jejunum": "#8ca252","3-Ileum": "#e7ba52", "4-Colon": "#e7969c", "5-Rectum": "#de9ed6"}
             )


plt.title("", fontsize=12)
plt.ylabel("Fraction of cells", fontsize=12)
plt.xlabel("", fontsize=12)
plt.ylim=1.0

#plt.gca().get_legend().remove() #remove legend
# plt.legend(categories, loc='center left', bbox_to_anchor=(1, 0.6), fontsize=12)
# plt.savefig('Proportion of clusters accross organs.png')
# 去除刻度
#plt.xticks([])
#plt.yticks([])
# ax.tick_params(bottom=False, top=False, left=False, right=False)
handles, labels = ax.get_legend_handles_labels()
ax.legend(reversed(handles), reversed(labels), loc='center left', bbox_to_anchor=(1, 0.6), fontsize=8)
plt.grid(False)

plt.show()

**Myeloid**

In [None]:
Groups_tab_1 = pd.crosstab(index=adata_M.obs['tissue'],  # Make a crosstab
                        columns=adata_M.obs['annotation'], margins=True)               # Name the count column
MyTab_1= Groups_tab_1.div(Groups_tab_1["All"], axis=0)
MyTab2_1 = MyTab_1.drop(columns="All")
MyTab2_1 = MyTab2_1.drop(index="All")
MyTab2_1 = MyTab2_1.T

order = ['inferior turbinate', 'middle turbinate', 'polyp']
MyTab2_1 = MyTab2_1[order]
MyTab2_1 = MyTab2_1.T
#categories = IMM_group[::-1]

MyTab2_1.columns = pd.CategoricalIndex(MyTab2_1.columns.values)

# Sort the columns (axis=1) by the new categorical ordering
MyTab2_1 = MyTab2_1.sort_index(axis=1)

ax = MyTab2_1.plot.bar(
            figsize=(3.7,5),
            stacked=True,
            edgecolor = '#000000',
            linewidth=0.4,
            width=0.8, 
            fontsize=10,
            # color={"1-Duodenum": "#393b79","2-Jejunum": "#8ca252","3-Ileum": "#e7ba52", "4-Colon": "#e7969c", "5-Rectum": "#de9ed6"}
             )


plt.title("", fontsize=12)
plt.ylabel("Fraction of cells", fontsize=12)
plt.xlabel("", fontsize=12)
plt.ylim=1.0

#plt.gca().get_legend().remove() #remove legend
# plt.legend(categories, loc='center left', bbox_to_anchor=(1, 0.6), fontsize=12)
# plt.savefig('Proportion of clusters accross organs.png')
# 去除刻度
#plt.xticks([])
#plt.yticks([])
# ax.tick_params(bottom=False, top=False, left=False, right=False)
handles, labels = ax.get_legend_handles_labels()
ax.legend(reversed(handles), reversed(labels), loc='center left', bbox_to_anchor=(1, 0.6), fontsize=8)
plt.grid(False)

plt.show()

## Write

In [None]:
adata.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/IMM.h5ad')

# 配受体

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/IMM.h5ad')
adata

## 趋化因子

In [None]:
# 趋化因子受体 42

R_CC = 'CCR1,CCR2,CCR3,CCR4,CCR5,CCR6,CCR7,CCR8,CCR9,CCR10'.split(',')
R_CXC = 'CXCR1,CXCR2,CXCR3,CXCR4,CXCR5,CXCR6'.split(',')
R_C = ['XCR1']
R_CX3C = ['CX3CR1']
R_Atypical = 'ACKR1,ACKR2,ACKR3,ACKR4,CCRL2'.split(',')
R_other = 'C5AR1,CMKLR1,FPR1,LRP6,GPR35,HRH4,DPP4,IDE,SLC7A1,CNR2,GPRC5D,VSIR,GPR101,FFAR2,GPR42,PGRMC2,GPR75,FCGR2A,KIR2DL3'.split(',')

R_Chemotaxis = R_CC+R_CXC+R_C+R_CX3C+R_Atypical+R_other
R_Chemotaxis = [x for x in R_Chemotaxis if x in adata.raw.var_names]

In [None]:
# 趋化因子配体 54--45

L_CXC = 'CXCL1,CXCL2,CXCL3,CXCL4,CXCL5,CXCL6,CXCL7,CXCL8,CXCL9,CXCL10,CXCL11,CXCL12,CXCL13,CXCL14,CXCL15,CXCL16,CXCL17'.split(',')
L_C = 'XCL1,XCL2'.split(',')
L_CC = 'CCL1,CCL2,CCL3,CCL4,CCL5,CCL6,CCL7,CCL8,CCL9,CCL10,CCL11,CCL12,CCL13,CCL14,CCL15,CCL16,CCL17,CCL18,CCL19,CCL20,CCL21,CCL22,CCL23,CCL24,CCL25,CCL26,CCL27,CCL28,'.split(',')
L_CX3C = ['CX3CL1']
L_other = 'C5,CKLF,CCL3L3,PF4,SPP1'.split(',')

L_Chemotaxis = L_CXC+L_C+L_CC+L_CX3C+L_other
L_Chemotaxis = [x for x in L_Chemotaxis if x in adata.raw.var_names]

In [None]:
# 趋化因子受体
mp = sc.pl.dotplot(adata, 
              R_Chemotaxis, 
              'annotation', 
              dendrogram=False, 
              #figsize=(8,10),
              use_raw=True,
              cmap = 'RdYlBu_r',
              var_group_rotation=45,
              swap_axes=True,
              standard_scale='var',
              colorbar_title='Scaled expression in var',
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.add_totals().style(grid=True).show()

# 趋化因子配体
mp = sc.pl.dotplot(adata, 
              L_Chemotaxis, 
              'annotation', 
              dendrogram=False, 
              #figsize=(8,10),
              use_raw=True,
              cmap = 'RdYlBu_r',
              var_group_rotation=45,
              swap_axes=True,
              standard_scale='var',
              colorbar_title='Scaled expression in var',
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.add_totals().style(grid=True).show()

## 细胞因子

In [None]:
# 细胞因子配体
IL_L = 'IL1A,IL1B,IL2,IL4,IL5,IL6,IL7,IL10,IL11,IL13,IL15,IL16,IL17A,IL17C,IL17F,IL18,IL19,IL20,IL21,IL22,IL23A,IL24,IL26,IL32,IL33,IL34,IL37'.split(',')
IFN_L = 'IFNE,IFNG,IFNL1'.split(',')
TNF_L = 'EGF,NRG1,AREG,BTC,CNTF,COPA,EPGN,GRN,HBEGF,MIF,TGFA,TGFB1,EREG'.split(',')
CSF_L = 'DLL1,DLL3,DLL4,JAG1,JAG2,SCGB3A1,TNF,IL24,WNT4'.split(',')

cytokines_L = IL_L+IFN_L+TNF_L+CSF_L
cytokines_L = [x for x in cytokines_L if x in adata.raw.var_names]

In [None]:
# 细胞因子配体
mp = sc.pl.dotplot(adata, 
              cytokines_L, 
              'annotation', 
              dendrogram=False, 
              #figsize=(8,10),
              use_raw=True,
              cmap = 'RdYlBu_r',
              var_group_rotation=45,
              swap_axes=True,
              standard_scale='var',
              colorbar_title='Scaled expression in var',
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.add_totals().style(grid=True).show()

## 生长因子

In [None]:
# 生长因子受体
WNT_R = 'FZD1,FZD2,FZD3,FZD4,FZD5,FZD6,FZD7,FZD8,FZD9,FZD10,CD36,ROR1,ROR2,RYK,LRP1,SMO,ANTXR1,EPHA7,PTPRK,LDLR,KLRG2,NOTCH1'.split(',')
BMP_R = 'BMPR1A,BMPR1B,BMR1A,BMR1B,BMPR2,ACR2A,AVR2B,ACVR1,SMO,PTPRK,SLAMF1,PLAUR'.split(',')
EGF_R = 'EGFR,NRG1'.split(',')
NOTCH_R = 'NOTCH1,NOTCH2,NOTCH3,NOTCH4'.split(',')

GROW_R = WNT_R+BMP_R+EGF_R+NOTCH_R
GROW_R = [x for x in GROW_R if x in adata.raw.var_names]

In [None]:
# 生长因子配体
WNT_L = 'WNT1,WNT2,WNT2B,WNT3,WNT3A,WNT4,WNT5A,WNT5B,WNT7A,WNT7B,WNT11'.split(',')
BMP_L = 'BMP2,BMP3,BMP4,BMP5,BMP6,BMP7,BMP8A,BMP8B'.split(',')
EGF_L = 'EGF,NRG1,AREG,BTC,CNTF,COPA,EPGN,GRN,HBEGF,MIF,TGFA,TGFB1,EREG'.split(',')
NOTCH_L = 'DLL1,DLL3,DLL4,JAG1,JAG2,SCGB3A1,TNF,IL24,WNT4'.split(',')

GROW_L = WNT_L+BMP_L+EGF_L+NOTCH_L
GROW_L = [x for x in GROW_L if x in adata.raw.var_names]

# Others

In [None]:
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, title='')
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=4)
sc.pl.umap(adata, color=['annotation'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
sc.settings.set_figure_params(dpi=300, figsize = (4, 4), fontsize=12)

from matplotlib import cm, colors
import colorcet as cc

mymap = colors.LinearSegmentedColormap.from_list('my_colormap', cc.CET_L20)

colors2 = mymap(np.linspace(0.2, 1, 128)) # 30%-100%
colors3 = plt.cm.Greys_r(np.linspace(0.8,0.9,5))
colorsComb = np.vstack([colors3, colors2])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

sc.pl.umap(adata, color=['ALOX5','ALOX5AP'], frameon=False, color_map = mymap)

In [None]:
sc.settings.set_figure_params(dpi=150, figsize = (4, 4), fontsize=12)

from matplotlib import cm, colors
import colorcet as cc

mymap = colors.LinearSegmentedColormap.from_list('my_colormap', cc.CET_L20)

colors2 = mymap(np.linspace(0.2, 1, 128)) # 30%-100%
colors3 = plt.cm.Greys_r(np.linspace(0.8,0.9,5))
colorsComb = np.vstack([colors3, colors2])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

sc.pl.umap(adata, color=['FFAR2','CLC','CEBPE','ALOX5'], frameon=False, color_map = mymap)
sc.pl.umap(adata, color=['IL1B','HCAR2','HCAR3','FCGR3B'], frameon=False, color_map = mymap)
sc.pl.umap(adata, color=['CXCR1','CXCR2','CXCR3','CCR1'], frameon=False, color_map = mymap)

In [None]:
sc.settings.set_figure_params(dpi=150, figsize = (4, 4), fontsize=12)
sc.pl.dotplot(adata, ['CXCR1','CXCR2','CCR1','CCR3'], groupby='annotation',standard_scale='var')

In [None]:
sc.settings.set_figure_params(dpi=150, figsize = (4, 4), fontsize=12)
sc.pl.dotplot(adata, ['FFAR2','ALOX15','ALOX5AP','ALOX5','LTA4H','LTC4S','LTB4R','LTB4R2','CYSLTR1','CYSLTR2'], groupby='annotation',standard_scale='var')

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/IMM.h5ad')
adata

In [None]:
sc.settings.set_figure_params(dpi=300, figsize = (4, 4), fontsize=12)

from matplotlib import cm, colors
import colorcet as cc

mymap = colors.LinearSegmentedColormap.from_list('my_colormap', cc.CET_L20)

colors2 = mymap(np.linspace(0.2, 1, 128)) # 30%-100%
colors3 = plt.cm.Greys_r(np.linspace(0.8,0.9,5))
colorsComb = np.vstack([colors3, colors2])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

sc.pl.umap(adata, color=['OXTR','OXT','AVP'], frameon=False, color_map = mymap)