In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import scanpy as sc
import anndata
import bbknn
import os
from scipy import sparse
import matplotlib.pyplot as plt
# from scanpy_base_moudle_update2 import *
import scrublet as scr
import harmonypy as hm

sc.settings.verbosity = 3
#sc.logging.print_versions()
# 设置图片的分辨率以及其他样式
sc.settings.set_figure_params(dpi=150, figsize = (4, 3), fontsize=12)

import matplotlib.font_manager
flist = matplotlib.font_manager.get_fontconfig_fonts()
names = [matplotlib.font_manager.FontProperties(fname=fname).get_name() for fname in flist]
print(names)

params={
        #'font.style':'italic',
        'font.weight':'normal',    #or 'blod'
        }
plt.rcParams.update(params)

plt.rcParams['font.family']='Arial'

# Add metadata

In [None]:
# 共用模块
    
def change_obs_index_v1(x):
    """
    This function is used to change barcodes' names, adding sample information, so as to add meta-data to Anndata.
    """
    x = barcode_name + '.' + x
    #x = x[:-2]
    
    return x

def change_obs_index_v2(x):
    """
    该函数用于修正concatenate样本或数据集后变更的barcodes名。
    """  
    x = x[:-4]
    return x


def concatenate_samples(file_name_list, file_output_h5ad, joint='outer'):
    """
    该函数将一个数据集中的多个样本h5ad合并为一个adata。
    """ 
    adata_list = []
    
    for sample_id in file_name_list:
        adata_single = sc.read(file_output_h5ad + sample_id + '.h5ad')
        adata_list.append(adata_single)
        
    # batch_list单个数据集的样本数限制在999以下时适用，此步骤是因为Anndata的concatenate函数会自动在barcode name尾部一个批次编号，
    # 在这里设置格式化的编号以便删除。
    batch_list = []
    for i in range(len(file_name_list)):
        if i+1 < 10:
            j = '00'+ str(i+1)
        elif i+1 < 100:
            j = '0'+ str(i+1)
        else: 
            j = str(i+1)
        batch_list.append(j)
    
    adata = adata_list[0].concatenate(adata_list[1:len(batch_list)],join=joint, batch_categories=batch_list)
    # 删除barcode name的尾部编号
    adata.obs.rename(index=change_obs_index_v2,inplace=True)
    
    return adata

# This moudle if for data format of 10x mtx.
def file_name(file_path):
    for root, dirs, files in os.walk(file_path):
        print('The dataset has '+str(len(dirs))+' samples.')
        print('sub_dirs:', dirs)  # 当前路径下所有子目录
        file_name_list = dirs
        return file_name_list
    
def mtx_to_adata(file_name_list, # dirs
                 dataset_name,  # 'qilu_'
                 file_path, 
                 file_output_h5ad,
                 do_prefix = False):
    """
    This function is used to bulk convert 10X single-cell matrix to annddata format.
    """
    for sample_id in file_name_list:
        if (not do_prefix):
            prefix = None
        else:
            prefix = sample_id+'_'

        adata = sc.read_10x_mtx(path=file_path+sample_id+'/',prefix = prefix)
        print('The anndata of '+sample_id+' is:')
        print(adata)
        global barcode_name
        barcode_name = dataset_name + sample_id
        adata.obs.rename(index=change_obs_index_v1,inplace=True)
        adata.write(file_output_h5ad + sample_id + '.h5ad')
        
    return adata # 此函数返回最后一个（或唯一的）adata文件，用于一个数据集中仅有一个样本时的10x solution。

def unfiltered_mtx_to_adata(file_name_list, # dirs
                 dataset_name,  # 'qilu_'
                 file_path, 
                 file_output_h5ad,
                 do_prefix = False,
                 min_genes=250):
    """
    This function is used to bulk convert unfiltered 10X single-cell matrix to annddata format.
    Cells would be removed as their min-genes < 250.
    """
    for sample_id in file_name_list:
        if (not do_prefix):
            prefix = None
        else:
            prefix = sample_id+'_'
            
        adata = sc.read_10x_mtx(path=file_path+sample_id+'/',prefix=prefix)
        sc.pp.filter_cells(adata, min_genes=min_genes)
        print('The anndata of '+sample_id+' is:')
        print(adata)
        global barcode_name
        barcode_name = dataset_name + sample_id
        adata.obs.rename(index=change_obs_index_v1,inplace=True)
        adata.write(file_output_h5ad + sample_id + '.h5ad')
        
    return adata # 此函数返回最后一个（或唯一的）adata文件，用于一个数据集中仅有一个样本时的10x solution。

def solution_concatenate_10x(file_path, 
                             file_output_h5ad, 
                             dataset_name, 
                             dataset_output, 
                             adata_name, 
                             filtered = True, 
                             min_genes=250, 
                             do_prefix=False,
                             joint = 'outer'):
    """
    批量处理大规模10x CellRanger下机的矩阵文件的解决方案，整合多样本，在barcode name中添加数据集和样本信息;
    生成每一个样本的h5ad文件，以及合并样本的总adata;
    生成的每个样本的adata文件存放于file_output_h5ad中，合并样本的adata存放于dataset_output;
    adata的df为原始基因表达。
    filtered默认为True，表示矩阵是10X CellRanger过滤的矩阵；
    """
    print('using the framework of scRNA-seq analysis developed by Yue Wang, qilu hospital of Shandong University')
    file_name_list = file_name(file_path)
    
    # 判断是否为10x的过滤矩阵
    if filtered:
        adata_last = mtx_to_adata(file_name_list, dataset_name, file_path, file_output_h5ad, do_prefix = do_prefix)
    else:
        adata_last = unfiltered_mtx_to_adata(file_name_list, dataset_name, file_path, file_output_h5ad, min_genes=min_genes, do_prefix=do_prefix)
        
    # 判断数据集内的样本数
    if len(file_name_list)>1:
        adata = concatenate_samples(file_name_list, file_output_h5ad, joint=joint)
    else:
        # 当数据集仅有一个样本时
        adata = adata_last
    adata.write(dataset_output + adata_name + '.h5ad')
    
    print('The final adata is: ')
    print(adata)
    
    return adata

In [None]:
file_path = '/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/Matrix_20220809/'
file_output_h5ad = '/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/H5ad/'
dataset_name = 'Qilu_'
adata_name = 'Qilu_Otorhinolaryngology'
dataset_output = '/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/'
joint = 'outer' # 此数据集每个样本的基因数都不同

In [None]:
##  Integration the matrices of 10x format and rename the barcodes.

# AnnData object with n_obs × n_vars = 85162 × 27319
adata = solution_concatenate_10x(file_path, file_output_h5ad, dataset_name, dataset_output, adata_name, joint = 'outer')

In [None]:
adata.var = adata.var[[]]
adata

In [None]:
adata.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Qilu_Otorhinolaryngology_outer_raw.h5ad')

In [None]:
file_name_list = file_name(file_path)

In [None]:
adata = concatenate_samples(file_name_list, file_output_h5ad)
adata.var = adata.var[[]]
adata

In [None]:
adata.obs

In [None]:
## Add the meta-data
## 数据集来源、物种、样本批次、受试者编号、器官、定位、健康情况、疾病状态、发育状态、是否核测序
## 注意，所有Term的内容全部首字母大写

# get the list of barcodes' name
obs_name_list = adata.obs_names.to_list()
obs_name_list = [i.split(".") for i in obs_name_list]

# dataset for for changing gene symbols
dataset_name_list = []
for i in obs_name_list:
    j = 'Qilu_Otorhinolaryngology'
    dataset_name_list.append(j)
    
adata.obs['dataset'] = dataset_name_list


# batch information for batch correction
batch_name_list = []
for i in obs_name_list:
    j = i[0]
    batch_name_list.append(j)
    
adata.obs['batch'] = batch_name_list
adata.obs['batch_name'] = adata.obs['batch']

"""
# patient_id
id_name_list = []
for i in obs_name_list:
    j = 'Qilu_patient' + (i[0].split('_'))[1]
    id_name_list.append(j)
    
adata.obs['patient_id'] = id_name_list
"""

# Health
Health_name_list = []
for i in obs_name_list:
    if ((i[0].split('_'))[1]).split('-')[0] == '4144':
        j = 'healthy control'
    else:
        j = 'polyp'
        
    Health_name_list.append(j)
    
adata.obs['Health'] = Health_name_list

# Disease_state
Disease_name_list = []
for i in obs_name_list:
    if ((i[0].split('_'))[1]).split('-')[0] in ['0222','3604','3610','4070','4073','4121','4142']:
        j = 'polyp'
    elif ((i[0].split('_'))[1]).split('-')[0] in ['3615']:
        j = 'edematous'
    else:
        j = 'normal'
        
    Disease_name_list.append(j)
    
adata.obs['Disease_state'] = Disease_name_list

tissue_name_list = []
for i in obs_name_list:
    if ((i[0].split('_'))[1]).split('-')[0] in ['0222','3604','3610','4070','4073','4121','4142']:
        j = 'polyp'
    elif ((i[0].split('_'))[1]).split('-')[0] in ['0224','3615','4071']:
        j = 'middle turbinate'
    else:
        j = 'inferior turbinate'
        
    tissue_name_list.append(j)
    
adata.obs['tissue'] = tissue_name_list

adata.obs

In [None]:
Groups_tab_1 = pd.crosstab(index=adata.obs['batch_name'],  # Make a crosstab
                columns=adata.obs['tissue'], margins=True)  # Name the count column
Groups_tab_1

In [None]:
# 添加过Meta-data的h5ad文件
adata.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Qilu_Otorhinolaryngology_outer_metadata.h5ad')
adata

# Doublet removal

In [None]:
# 函数定义
def change_obs_index_v2(x):
    """
    该函数用于修正concatenate样本或数据集后变更的barcodes名。
    """  
    x = x[:-4]
    return x

def scr_doublets_removal(adata_file, adata_file_output):
    
    ## 读取adata
    adata = sc.read(adata_file)
    print('adata before doublet remove')    
    print(adata)

    ## 删除细胞数小于31的样本，因为Scrublet的设置（nPC=30）
    Groups_tab_1 = pd.crosstab(index=adata.obs['batch_name'],  # Make a crosstab
                    columns=adata.obs['tissue'], margins=True)  # Name the count column
    few_cells_sample_list = Groups_tab_1[Groups_tab_1.loc[:, 'All']<31].index.to_list()
    adata_index = adata.obs.loc[~adata.obs["batch_name"].isin(few_cells_sample_list), :].index
    adata = adata[adata_index, :]
    print('adata after samples with few cells removed')
    print(adata)
    
    ## Scrublet process
    batch_name = adata.obs['batch_name'].cat.categories.to_list()
    
    sample_after_scrublet_list = []
    THRs = []
    
    for i in batch_name:
        adata_index = adata.obs.loc[adata.obs["batch_name"].isin([i]),:].index
        adata_single = adata[adata_index, :]

        print('doublets removal ...')
        scrub = scr.Scrublet(adata_single.X, expected_doublet_rate=0.1)
        scores, doublets = scrub.scrub_doublets(log_transform = True)
        
        try:  # not always can calculate automatic threshold
            THRs.append(scrub.threshold_)
            print('Threshold found by scrublet')
        except:
            THRs.append(0.4)
            print('No threshold found, assigning 0.4 to')
            doublets = scrub.call_doublets(threshold=0.4) # so that it can make the plot

        # scrub.plot_histogram();
        list_doublet = []

        for i in doublets:
            if i:
                j = 'doublet'
            else:
                j = 'single'

            list_doublet.append(j)

        adata_single.obs['Doublet_scrublet'] = list_doublet
        adata_single.obs['Doublet_score_scrublet'] = scores
        sample_after_scrublet_list.append(adata_single)
        
    # adata_single重新合并为一个adata，批次的上限为999
    batch_list = []
    
    for i in range(len(batch_name)):
        if i+1 < 10:
            j = '00'+ str(i+1)
        elif i+1 < 100:
            j = '0'+ str(i+1)
        else: 
            j = str(i+1)
        batch_list.append(j)
        
    adata = sample_after_scrublet_list[0].concatenate(sample_after_scrublet_list[1:len(batch_list)],batch_categories=batch_list)
    # 删除barcode name的尾部编号
    adata.obs.rename(index=change_obs_index_v2,inplace=True)
    
    # doublet remove
    adata_single_scrublet_index = adata.obs.loc[adata.obs["Doublet_scrublet"].isin(['single']), :].index
    adata_after_doublet_removed = adata[adata_single_scrublet_index, :]
    
    print('adata after doublet remove')    
    print(adata_after_doublet_removed)
    
    adata_after_doublet_removed.write(adata_file_output)
    
    # return adata_after_doublet_removed

In [None]:
adata_file = '/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Qilu_Otorhinolaryngology_outer_metadata.h5ad'
adata_out_file = '/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Qilu_Otorhinolaryngology_doublet_removal.h5ad'

# AnnData object with n_obs × n_vars = 243709 × 53907 (5762 cells removed, 2.3%)
scr_doublets_removal(adata_file,adata_out_file)

# Quality control

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Qilu_Otorhinolaryngology_doublet_removal.h5ad')
adata

In [None]:
sc.pp.filter_cells(adata, min_genes=250)
adata.var['mt'] = adata.var_names.str.startswith('MT-')
adata.var['rp'] = adata.var_names.str.startswith(("RPS","RPL"))
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
sc.pp.calculate_qc_metrics(adata, qc_vars=['rp'], percent_top=None, log1p=False, inplace=True)

In [None]:
adata

In [None]:
adata.obs

In [None]:
sc.pl.highest_expr_genes(adata, n_top=20, )
sc.pl.violin(adata, ['n_genes', 'total_counts', 'pct_counts_mt'],jitter=0.4, multi_panel=True)

In [None]:
def quality_control(adata):
    
    # 剔除MT-基因
    non_MT_genes_list = [name for name in adata.var_names if not name.startswith('MT-')]
    adata = adata[:, non_MT_genes_list]
    # adata = adata[adata.obs.pct_counts_mt < 10, :]
    
    # 剔除RP基因
    non_RP_genes_list = [name for name in adata.var_names if not name.startswith(("RPS","RPL"))]
    adata = adata[:, non_RP_genes_list]
    
    sc.pp.filter_genes(adata, min_cells=20)
    
    return adata

In [None]:
adata = quality_control(adata)
adata

In [None]:
adata.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Qilu_Otorhinolaryngology_doublet_removal.h5ad')

In [None]:
adata_file = '/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Qilu_Otorhinolaryngology_doublet_removal.h5ad'

adata = sc.read(adata_file)
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
adata.raw = adata
adata.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Normalized_all.h5ad')

# CellTypist

In [None]:
adata

In [None]:
import celltypist
from celltypist import models

In [None]:
models.download_models(force_update = True)

In [None]:
models.models_path

In [None]:
model = models.Model.load(model = 'Human_Lung_Atlas.pkl')
#The model summary information.
model

In [None]:
#Examine cell types contained in the model.
model.cell_types

In [None]:
predictions = celltypist.annotate(adata, model = 'Human_Lung_Atlas.pkl', majority_voting = True, mode = 'best match')

In [None]:
predictions.predicted_labels

In [None]:
adata = predictions.to_adata()

In [None]:
adata

In [None]:
adata.obs

In [None]:
adata.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Normalized_celltypist.h5ad')

# UMAP and CellTypist

In [None]:
def cycle_score(adata, cycle_gene_file='/mnt/data/project/qilu_singlecell_1/ref_geneset/regev_lab_cell_cycle_genes.txt'):
    cell_cycle_genes = [x.strip() for x in open(cycle_gene_file)]
    s_genes = cell_cycle_genes[:43]
    g2m_genes = cell_cycle_genes[43:]

    cell_cycle_genes = [x for x in cell_cycle_genes if x in adata.var_names]
    s_genes = [x for x in s_genes if x in adata.var_names]
    g2m_genes = [x for x in g2m_genes if x in adata.var_names]

    # sc.pp.scale(adata, zero_center=False) # 计算得分前的scale并非必须
    sc.tl.score_genes_cell_cycle(adata, s_genes=s_genes, g2m_genes=g2m_genes)

    adata_cc_genes = adata[:, cell_cycle_genes]
    sc.tl.pca(adata_cc_genes)
    sc.pl.pca_scatter(adata_cc_genes, color='phase')

    return adata

def EIS_score(adata):
    # 请注意B的基因集会导致滤泡B细胞被归为MNPs区室，需根据CD79A表达来区别
    pan_epi_marker = 'KRT8,EPCAM,KRT18,KRT19,CLDN3,CLDN7,ELF3,S100A6,C15orf48,CLDN4,SMIM22,S100A14,SPINK2'.split(',')
    pan_T_ILCs_marker = 'TRAC,IL32,CD3D,CCL5,CD2,TRBC2,CD3E,CD3G,EVL,IL7R,CD7,HCST,KLRB1,LCK,FYB1,CXCR4,CORO1A,TRDC,GNLY,GZMA,IFNG,PRF1,TNF,CCL3,CCL4,XCL1,XCL2,NKG7,IL22,IL2RA,IL7R,IL23R'.split(',')
    pan_B_marker = 'CD79A,IGHA1,MZB1,DERL3,IGHA2,HERPUD1,SSR4,TNFRSF17,SEC11C,UBE2J1,PRDX4,GNG7,XBP1,EAF2,PLPP5,CD27,IGLC3,SSR3,TNFRSF13B'.split(',')
    pan_MNPs_marker = 'HLA-DRA,CST3,HLA-DPB1,CD74,HLA-DPA1,AIF1,LYZ,HLA-DQA1,C1QA,MS4A6A,HLA-DMA,C1QC,C1QB,FCGRT,DNASE1E3,LST1,SELENOP,FGL2,HLA-DMB,CTSB,GRN'.split(',')
    pan_Mast_marker = 'TPSAB1,CPA3,TPSB2,CD9,HPGDS,ANXA1,NFKBIA,MS4A2,CD63,LAPTM4A,SRGN,LMNA,LTC4S,FCER1G,VWA5A,CTSG,KIT,CLU'.split(',')
    pan_Stromal_marker = 'IGFBP7,IFITM3,SPARC,A2M,CALD1,GSN,LGALS1,VIM,SPARCL1,CXCL14,COL1A2,COL3A1,COL6A2,TIMP1,C1S,S100A13,C1R,PLAT,MFAP4,RARRES2,COL1A1'.split(',')
    
    pan_epi_marker = [x for x in pan_epi_marker if x in adata.var_names]
    pan_T_ILCs_marker = [x for x in pan_T_ILCs_marker if x in adata.var_names]
    pan_B_marker = [x for x in pan_B_marker if x in adata.var_names]
    pan_MNPs_marker = [x for x in pan_MNPs_marker if x in adata.var_names]
    pan_Mast_marker = [x for x in pan_Mast_marker if x in adata.var_names]
    pan_Stromal_marker = [x for x in pan_Stromal_marker if x in adata.var_names]
    # pan_all_marker = pan_epi_marker + pan_imm_marker + pan_str_marker

    epi_genes_index = adata.var_names.isin(pan_epi_marker)
    T_ILCs_genes_index = adata.var_names.isin(pan_T_ILCs_marker)
    B_genes_index = adata.var_names.isin(pan_B_marker)
    MNPs_genes_index = adata.var_names.isin(pan_MNPs_marker)
    Mast_genes_index = adata.var_names.isin(pan_Mast_marker)
    Stromal_genes_index = adata.var_names.isin(pan_Stromal_marker)
    
    # 计算区室评分

    adata.obs['mean_epi_score'] = np.sum(adata[:, epi_genes_index].X, axis=1) / len(pan_epi_marker)
    adata.obs['mean_T_ILCs_score'] = np.sum(adata[:, T_ILCs_genes_index].X, axis=1) / len(pan_T_ILCs_marker)
    adata.obs['mean_B_score'] = np.sum(adata[:, B_genes_index].X, axis=1) / len(pan_B_marker)
    adata.obs['mean_MNPs_score'] = np.sum(adata[:, MNPs_genes_index].X, axis=1) / len(pan_MNPs_marker)
    adata.obs['mean_Mast_score'] = np.sum(adata[:, Mast_genes_index].X, axis=1) / len(pan_Mast_marker)
    adata.obs['mean_Stromal_score'] = np.sum(adata[:, Stromal_genes_index].X, axis=1) / len(pan_Stromal_marker)
    
    # 获取每个细胞的三类区室的得分
    compartments_values_list = adata.obs.loc[:, ['mean_epi_score', 'mean_T_ILCs_score', 'mean_B_score','mean_MNPs_score','mean_Mast_score','mean_Stromal_score']].values.tolist()

    return adata, compartments_values_list

def hvg_regress_scale(adata):
    # 注意，HVG要求输入的矩阵必须是对数化处理的
    sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
    adata = adata[:, adata.var.highly_variable]
    print(adata)
    
    # sc.pp.regress_out(adata, ['S_score', 'G2M_score'])
    
    sc.pp.scale(adata, zero_center=False)

    return adata

def progress_pca_bbknn_umap_tsne(adata, n_pcs=20, batch_key='bbknn_batch'):
    ## 主成分分析降维
    sc.tl.pca(adata, svd_solver='arpack')
    # 在PCA坐标中绘制散点图
    sc.pl.pca(adata)
    # 可视化每个PC对数据总方差的贡献, 这为我们提供了有关为计算单元的邻域关系应考虑的PC数量的信息sc.tl.tsne()
    sc.pl.pca_variance_ratio(adata, log=True)

    # 使用bbknn算法处理批次效应(代替了neighbors)
    # bbknn要求数据已经过标准化和主成分分析
    adata.obs['bbknn_batch'] = [str(i)+'-'+str(j) for i,j in zip(adata.obs['dataset'], adata.obs['batch_name'])]
    sc.external.pp.bbknn(adata, batch_key=batch_key, n_pcs=n_pcs)
    
    # 计算UMAP
    sc.tl.umap(adata)
    # sc.tl.tsne(adata, n_jobs=6)

    return adata

def progress_pca_harmony_umap(adata):
    sc.tl.pca(adata, svd_solver='arpack')
    sc.pl.pca(adata)
    sc.pl.pca_variance_ratio(adata, log=True)

    # Harmony处理批次效应
    harmony_out = hm.run_harmony(adata.obsm['X_pca'], adata.obs, vars_use = ['dataset', 'batch_name'])
    adata.obsm['X_pca_harmony'] = harmony_out.Z_corr.T
    sc.pp.neighbors(adata, n_neighbors=10, use_rep='X_pca_harmony', n_pcs=20)
    
    # 计算UMAP
    sc.tl.umap(adata)
    # sc.tl.tsne(adata, n_jobs=6)

    return adata

def score_compartments(adata, compartments_values_list):
    # 每个细胞的Compartments标签为得分最高的类
    cell_compartments_list = []


    for i in compartments_values_list:
        a = np.where(i == np.max(i))
        if a[0][0] == 0:
            j = 'epi'
        elif a[0][0] == 1:
            j = 'T_ILCs'
        elif a[0][0] == 2:
            j = 'B'
        if a[0][0] == 3:
            j = 'MNPs'
        elif a[0][0] == 4:
            j = 'Mast'
        elif a[0][0] == 5:
            j = 'Stromal'

        cell_compartments_list.append(j)

    adata.obs['Compartments'] = cell_compartments_list

    return adata

def add_leiden(adata):

    print("Performing clustering with a resolution of 0.5")
    sc.tl.leiden(adata, resolution=0.5)
    adata.obs['leiden-all-0.5'] = adata.obs['leiden']

    print("Performing clustering with a resolution of 1")
    sc.tl.leiden(adata, resolution=1)
    adata.obs['leiden-all-1'] = adata.obs['leiden']

    return adata

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Normalized_celltypist.h5ad')
adata

In [None]:
adata = sc.AnnData(X=adata.raw.X, var=adata.raw.var, obs = adata.obs)
adata

In [None]:
adata = cycle_score(adata = adata)
adata, compartments_values_list = EIS_score(adata)
adata = hvg_regress_scale(adata)

In [None]:
adata = progress_pca_bbknn_umap_tsne(adata)

In [None]:
compartments_values_list = adata.obs.loc[:, ['mean_epi_score', 'mean_T_ILCs_score', 'mean_B_score','mean_MNPs_score','mean_Mast_score','mean_Stromal_score']].values.tolist()

adata = score_compartments(adata, compartments_values_list = compartments_values_list)

adata = add_leiden(adata)

In [None]:
sc.settings.set_figure_params(dpi=150, figsize = (4, 3), fontsize=12)
sc.pl.umap(adata, color=['Health'])
sc.pl.umap(adata, color=['Disease_state'])
sc.pl.umap(adata, color=['tissue'])
sc.pl.umap(adata, color=['Compartments'])
sc.pl.umap(adata, color=['phase'])

In [None]:
sc.settings.set_figure_params(dpi=150, figsize = (4, 3), fontsize=10)
sc.pl.umap(adata, color=['leiden-all-0.5'], add_outline=True,
           palette=sc.pl.palettes.vega_20_scanpy, outline_width = (0.2, 0.05), frameon=False, legend_loc='on data')
sc.pl.umap(adata, color=['leiden-all-1'], add_outline=True,
           palette=sc.pl.palettes.vega_20_scanpy, outline_width = (0.2, 0.05), frameon=False, legend_loc='on data')

In [None]:
sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['Compartments'], add_outline=True,frameon=False, title='')
sc.pl.umap(adata, color=['majority_voting'], add_outline=True, palette="tab20", frameon=False, title='')

In [None]:
sc.pl.umap(adata, color=['majority_voting'], add_outline=True, outline_width = (0.2, 0.05), palette="tab20", frameon=False, legend_loc='on data')

In [None]:
import cosg as cosg
import time
t0= time.clock()
cosg.cosg(adata,
    key_added='cosg',
        mu=1,
        n_genes_user=50,
               groupby='majority_voting')
runtime_cosg = time.clock() - t0

sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=20)
sc.pl.rank_genes_groups_dotplot(adata,groupby='majority_voting',
                                cmap='Spectral_r',
                                 standard_scale='var',
                                       n_genes=4,key='cosg')

In [None]:
adata_raw = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Normalized_celltypist.h5ad')
adata_raw = sc.AnnData(X=adata_raw.raw.X, var=adata_raw.raw.var, obs = adata_raw.obs)
adata_raw

In [None]:
adata.raw = adata_raw

In [None]:
adata.raw

In [None]:
adata

In [None]:
adata.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Qilu_Otorhinolaryngology_bbknn.h5ad')

# Distinguishing cellular compartments

In [None]:
adata = sc.read('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Qilu_Otorhinolaryngology_bbknn.h5ad')
adata

In [None]:
adata.obs['majority_voting'].cat.categories

In [None]:
adata_EPI_index = adata.obs.loc[adata.obs["majority_voting"].isin(['Basal resting','Suprabasal','Club (nasal)','Goblet (nasal)','Ionocyte','Multiciliated (non-nasal)','SMG duct','SMG mucous','SMG serous (nasal)']), :].index
adata_T_ILCs_index = adata.obs.loc[adata.obs["majority_voting"].isin(['CD4 T cells','CD8 T cells','NK cells','T cells proliferating']), :].index
adata_B_index = adata.obs.loc[adata.obs["majority_voting"].isin(['B cells','Plasma cells']), :].index
adata_MNPs_index = adata.obs.loc[adata.obs["majority_voting"].isin(['Classical monocytes','DC1','DC2','Interstitial Mφ perivascular','Migratory DCs','Plasmacytoid DCs']), :].index
adata_Mast_index = adata.obs.loc[adata.obs["majority_voting"].isin(['Mast cells']), :].index
adata_Stromal_index = adata.obs.loc[adata.obs["majority_voting"].isin(['Adventitial fibroblasts','Alveolar fibroblasts',
                                                                       'EC arterial','EC venous systemic','Lymphatic EC differentiating','Peribronchial fibroblasts','Pericytes','SM activated stress response','Smooth muscle']), :].index

In [None]:
adata_Imm_index = adata.obs.loc[adata.obs["majority_voting"].isin(['CD4 T cells','CD8 T cells','NK cells','T cells proliferating',
                                                                   'B cells','Plasma cells',
                                                                   'Classical monocytes','DC1','DC2','Interstitial Mφ perivascular','Migratory DCs','Plasmacytoid DCs',
                                                                   'Mast cells']), :].index

In [None]:
adata.obs['Compartments'] = 'Epithelium'
adata.obs.at[adata_T_ILCs_index,'Compartments']='T/ILCs'
adata.obs.at[adata_B_index,'Compartments']='B/Plasma cells'
adata.obs.at[adata_MNPs_index,'Compartments']='MNPs'
adata.obs.at[adata_Mast_index,'Compartments']='Mast cells'
adata.obs.at[adata_Stromal_index,'Compartments']='Stromal cells'

In [None]:
sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['Compartments'], add_outline=True,frameon=False, title='')

In [None]:
# 82366
adata_EPI = adata[adata_EPI_index, :]
# 49530
adata_T_ILCs = adata[adata_T_ILCs_index, :]
# 12926
adata_B = adata[adata_B_index, :]
# 6859
adata_MNPs = adata[adata_MNPs_index, :]
# 4311
adata_Mast = adata[adata_Mast_index, :]
# 84422
adata_Stromal = adata[adata_Stromal_index, :]

In [None]:
adata_IMM = adata[adata_Imm_index, :]
adata_IMM

## Write

In [None]:
adata_EPI.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/EPI.h5ad')

adata_T_ILCs.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/T_ILCs.h5ad')

adata_B.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/B.h5ad')

adata_MNPs.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/MNPs.h5ad')

adata_Mast.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Mast.h5ad')

adata_Stromal.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Stromal.h5ad')

In [None]:
adata_IMM.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/IMM.h5ad')

In [None]:
adata.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Qilu_Otorhinolaryngology_bbknn.h5ad')

In [None]:
# 20231016
adata = sc.read('/home/wangyue/basic-calculation_data/c_Project_outputs/qilu_CRSwNP/Qilu_Otorhinolaryngology_bbknn.h5ad')
adata

In [None]:
from matplotlib import cm, colors
import colorcet as cc

mymap = colors.LinearSegmentedColormap.from_list('my_colormap', cc.CET_L20)

colors2 = mymap(np.linspace(0.2, 1, 128)) # 30%-100%
colors3 = plt.cm.Greys_r(np.linspace(0.8,0.9,5))
colorsComb = np.vstack([colors3, colors2])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

In [None]:
sc.settings.set_figure_params(dpi=200, figsize = (5, 4.5), fontsize=15)
# Epithelium
sc.pl.umap(adata, color=['EPCAM','KRT8','KRT18','KRT19','PIGR'], add_outline=False, frameon=False,color_map = mymap)

In [None]:
# Jose Ordovas-Montane et al., Nature 2018
sc.settings.set_figure_params(dpi=200, figsize = (5, 4.5), fontsize=15)
# Basal
sc.pl.umap(adata, color=['S100A2','KRT5','KRT15','POSTN','MMP10','PERP','AQP3','EGR1'], add_outline=False, frameon=False,color_map = mymap)
# Apical
sc.pl.umap(adata, color=['SERPINB3','KRT19','S100A6','AGR2','ANXA1','SLPI','WFDC2','CLDN4'], add_outline=False, frameon=False,color_map = mymap)
# Glandular
sc.pl.umap(adata, color=['LYZ','SLPI','AZGP1','LTF'], add_outline=False, frameon=False,color_map = mymap)
# Ciliated
sc.pl.umap(adata, color=['CAPS','PIFO','TPPP3','SNTN'], add_outline=False, frameon=False,color_map = mymap)

In [None]:
# L Sikkema, D Strobl et al., BioRxiv 2022
sc.settings.set_figure_params(dpi=200, figsize = (5, 4.5), fontsize=15)
# Basal
sc.pl.umap(adata, color=['KRT15','KRT17','KRT5','DST'], add_outline=False, frameon=False,color_map = mymap)
# Suprabasal
sc.pl.umap(adata, color=['KRT5','SERPINB4','SERPINB13','IGFBP3'], add_outline=False, frameon=False,color_map = mymap)
# Deuterosomal
sc.pl.umap(adata, color=['CCNO','CDC20B','ZMYND10','KIF9'], add_outline=False, frameon=False,color_map = mymap)
# Multiciliated
sc.pl.umap(adata, color=['C20orf85','C9orf24','RSPH1','PIFO'], add_outline=False, frameon=False,color_map = mymap)

# Club
sc.pl.umap(adata, color=['ASRGL1','LYPD2','UGT2A1','TFCP2L1'], add_outline=False, frameon=False,color_map = mymap)
# Goblet
sc.pl.umap(adata, color=['LYPD2','PI3','MUC16','MUC5AC'], add_outline=False, frameon=False,color_map = mymap)
# Transitional Club-AT2
sc.pl.umap(adata, color=['SCGB3A2','MGP','SFTA1P','VIM'], add_outline=False, frameon=False,color_map = mymap)
# Ionocyte
sc.pl.umap(adata, color=['RARRES2','TMEM61','ASCL3','SCNN1B'], add_outline=False, frameon=False,color_map = mymap)

# Tuft
sc.pl.umap(adata, color=['STMN1','HES6','MARCKSL1','RASSF6'], add_outline=False, frameon=False,color_map = mymap)
# Neuroendocrine
sc.pl.umap(adata, color=['PCSK1N','GRP','CPE','CHGA'], add_outline=False, frameon=False,color_map = mymap)
# SMG serous
sc.pl.umap(adata, color=['LYZ','ZG16B','AZGP1','LTF'], add_outline=False, frameon=False,color_map = mymap)
# SMG mucous
sc.pl.umap(adata, color=['MUC5B','BPIFB2','AZGP1','FCGBP'], add_outline=False, frameon=False,color_map = mymap)
# SMG duct
sc.pl.umap(adata, color=['RARRES1','TCN1','SAA1','MIA'], add_outline=False, frameon=False,color_map = mymap)

In [None]:
sc.pl.umap(adata, color=['KRT14','MYLK','MEG3','GEM'], add_outline=False, frameon=False)

In [None]:
## cell annoation
# 注：22 13定义为Immature enterocytes，似乎是分泌型前体细胞

# Basal
E01_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['14']), :].index
# Apical
E02_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['9']), :].index
# Glandular
E03_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['5','7']), :].index
# Ciliated
E04_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['17']), :].index
# T-ILCs
E05_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['15','18','3','0','1']), :].index
# B cells
E06_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['10']), :].index
# Plasma B cells
E07_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['16']), :].index
# MNPs
E08_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['11']), :].index
# Mast cells
E09_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['19']), :].index

# ACKR1+Endo
E10_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['2']), :].index
# ACKR1-Endo
E11_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['6']), :].index
# Glia
E12_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['20']), :].index
# Pericytes/SMC
E13_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['4','13']), :].index
# Fibroblast
E14_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['8']), :].index
# POSTN+ Fibroblast
E15_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['12']), :].index

In [None]:
############################################### Figure1A ######################################################
adata.obs['cluster_figure1A'] = 'C01-E01-Basal cells'
adata.obs.at[E02_index,'cluster_figure1A']='C01-E02-Apical cells'
adata.obs.at[E03_index,'cluster_figure1A']='C01-E03-Glandular cells'
adata.obs.at[E04_index,'cluster_figure1A']='C01-E04-Ciliated cells'
adata.obs.at[E05_index,'cluster_figure1A']='C02-T_ILCs'
adata.obs.at[E06_index,'cluster_figure1A']='C03-B01-B cells'
adata.obs.at[E07_index,'cluster_figure1A']='C03-B02-Plasma cells'
adata.obs.at[E08_index,'cluster_figure1A']='C04-MNPs'
adata.obs.at[E09_index,'cluster_figure1A']='C05-Mast cells'
adata.obs.at[E10_index,'cluster_figure1A']='C06-S01-ACKR1+Endo'
adata.obs.at[E11_index,'cluster_figure1A']='C06-S02-ACKR1-Endo'
adata.obs.at[E12_index,'cluster_figure1A']='C06-S03-Glia cells'
adata.obs.at[E13_index,'cluster_figure1A']='C06-S04-Pericytes/SMC'
adata.obs.at[E14_index,'cluster_figure1A']='C06-S05-Fibroblast'
adata.obs.at[E15_index,'cluster_figure1A']='C06-S06-POSTN+ Fibroblast'

In [None]:
sc.settings.set_figure_params(dpi=300, figsize = (4, 3), fontsize=7)
sc.pl.umap(adata, color=['cluster_figure1A'], add_outline=True, title='',
           palette=sc.pl.palettes.vega_20_scanpy, outline_width = (0.2, 0.05), frameon=False)

In [None]:
from matplotlib import cm, colors
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=15)

colors2 = plt.cm.plasma(np.linspace(0, 1, 128))
colors3 = plt.cm.Greys_r(np.linspace(0.7,0.8,10))
colorsComb = np.vstack([colors3, colors2])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

In [None]:
sc.settings.set_figure_params(dpi=300, figsize = (4, 3), fontsize=15)

In [None]:
sc.pl.umap(adata, color=['VDR','NR1H4','NR1H3'], add_outline=False, frameon=False,color_map = mymap)

In [None]:
sc.pl.umap(adata, color=['ALX1','GPR87','GSDMD','ADAMTS4'], add_outline=False, frameon=False,color_map = mymap)

In [None]:
sc.pl.umap(adata,color=['CD79A','CAMP','IGHA1'], add_outline=False, frameon=False,color_map = mymap)
sc.pl.umap(adata,color=['TPSAB1','GPR42','FFAR3'], add_outline=False, frameon=False,color_map = mymap)

In [None]:
adata.obs

In [None]:
annotation = 'cluster_figure1A'
phenotype = 'tissue'

ann_list = adata.obs[annotation].to_list()
phenotype_list = adata.obs[phenotype].to_list()

phe_Ann=[phenotype_list[i]+ '-' + ann_list[i] for i in range(min(len(phenotype_list),len(ann_list)))]
Ann_phe=[ann_list[i]+ '-' + phenotype_list[i] for i in range(min(len(phenotype_list),len(ann_list)))]

adata.obs['Ann_phe'] = Ann_phe
adata.obs['phe_Ann'] = phe_Ann

In [None]:
adata.obs['Ann_phe'] = adata.obs['Ann_phe'].astype("category")
adata.obs['Ann_phe'].cat.categories.to_list()

In [None]:
adata.obs['phe_Ann'] = adata.obs['phe_Ann'].astype("category")
adata.obs['phe_Ann'].cat.categories.to_list()

In [None]:
sc.settings.set_figure_params(fontsize=15)
marker_gene_dict = ['GPR42','FFAR3','CAMP','NR1H4','NR1H3','VDR','WFDC2','LCN2','PLA2G2A']
# 上皮细胞，分肠段，表型+类型
mp = sc.pl.dotplot(adata, 
              marker_gene_dict, 
              'Ann_phe', 
              dendrogram=False, 
              figsize=(5,15),
              use_raw=True,
              cmap = 'RdYlBu_r',
              var_group_rotation=45,
              swap_axes=False,
              standard_scale='var',
              colorbar_title='Scaled expression in var',
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.add_totals().style(grid=True).show()

In [None]:
sc.settings.set_figure_params(fontsize=15)
marker_gene_dict = ['ALX1','GPR87','GSDMD','ADAMTS4']
# 上皮细胞，分肠段，表型+类型
mp = sc.pl.dotplot(adata, 
              marker_gene_dict, 
              'Ann_phe', 
              dendrogram=False, 
              figsize=(5,15),
              use_raw=True,
              cmap = 'RdYlBu_r',
              var_group_rotation=45,
              swap_axes=False,
              standard_scale='var',
              colorbar_title='Scaled expression in var',
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.add_totals().style(grid=True).show()

In [None]:
sc.settings.set_figure_params(fontsize=15)
marker_gene_dict = ['ALX1','GPR87','GSDMD','ADAMTS4']
# 上皮细胞，分肠段，表型+类型
mp = sc.pl.dotplot(adata, 
              marker_gene_dict, 
              'phe_Ann', 
              dendrogram=False, 
              # figsize=(21,10),
              use_raw=True,
              cmap = 'RdYlBu_r',
              var_group_rotation=45,
              swap_axes=False,
              standard_scale='var',
              colorbar_title='Scaled expression in var',
              return_fig=True
              # , save='_' + sample_name + '_fig11.png'
             ) 
mp.add_totals().style(grid=True).show()

In [None]:
marker_gene_dict = ['ALX1','GPR87','GSDMD','ADAMTS4']

sc.settings.set_figure_params(fontsize=15)
sc.pl.dotplot(adata, 
              marker_gene_dict, 
              'cluster_figure1A', 
              dendrogram=True,
              figsize=(5,8),
              # , save='_' + sample_name + '_fig11.png'
             ) 

In [None]:
sc.pl.umap(adata, color=['tissue'], add_outline=True, title='',
           palette=sc.pl.palettes.vega_20_scanpy, outline_width = (0.2, 0.05), frameon=False)

In [None]:
Groups_tab_1 = pd.crosstab(index=adata.obs['tissue'],  # Make a crosstab
                        columns=adata.obs['cluster_figure1A'], margins=True)               # Name the count column
MyTab_1= Groups_tab_1.div(Groups_tab_1["All"], axis=0)
MyTab2_1 = MyTab_1.drop(columns="All")
MyTab2_1 = MyTab2_1.drop(index="All")
MyTab2_1.T

In [None]:
#categories = IMM_group[::-1]

MyTab2_1.columns = pd.CategoricalIndex(MyTab2_1.columns.values)

# Sort the columns (axis=1) by the new categorical ordering
MyTab2_1 = MyTab2_1.sort_index(axis=1)

ax = MyTab2_1.plot.bar(
            figsize=(3.7,5),
            stacked=True,
            edgecolor = '#000000',
            linewidth=0.4,
            width=0.8, 
            fontsize=10,
            # color={"1-Duodenum": "#393b79","2-Jejunum": "#8ca252","3-Ileum": "#e7ba52", "4-Colon": "#e7969c", "5-Rectum": "#de9ed6"}
             )


plt.title("", fontsize=12)
plt.ylabel("Fraction of cells", fontsize=12)
plt.xlabel("", fontsize=12)
plt.ylim=1.0

#plt.gca().get_legend().remove() #remove legend
# plt.legend(categories, loc='center left', bbox_to_anchor=(1, 0.6), fontsize=12)
# plt.savefig('Proportion of clusters accross organs.png')
# 去除刻度
#plt.xticks([])
#plt.yticks([])
# ax.tick_params(bottom=False, top=False, left=False, right=False)
handles, labels = ax.get_legend_handles_labels()
ax.legend(reversed(handles), reversed(labels), loc='center left', bbox_to_anchor=(1, 0.6), fontsize=8)
plt.grid(False)

plt.show()

In [None]:
sc.pl.umap(adata,color=['CD79A','IGHA1','MZB1'])  # B cells
sc.pl.umap(adata,color=['CD3D','CD3E','IL7R'])    # T cells/NKs
sc.pl.umap(adata,color=['LYZ','CD74','C1QC'])     # MNPs
sc.pl.umap(adata,color=['TPSAB1','KIT','CLU'])   # Mast cells
sc.pl.umap(adata,color=['EPCAM','KRT18','KRT8'])  # Epithelial cells
sc.pl.umap(adata,color=['COL1A1','CXCL14','MEIS3'])# Fibroblasts
sc.pl.umap(adata,color=['PLVAP','CD320','LYVE1'])   # Endothelial cells/Lymphatic ~
sc.pl.umap(adata,color=['S100B','CRYAB','NRXN1'])  # Glia cells

In [None]:
adata_EPI_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['9','14','17','5','7']), :].index
adata_T_ILCs_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['15','18','3','0','1']), :].index
adata_B_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['10','16']), :].index
adata_MNPs_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['11']), :].index
adata_Mast_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['19']), :].index
adata_Stromal_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['2','6','20','4','13','8','12']), :].index

adata_Fibro_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['4','8','12','13']), :].index
adata_Endo_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['2','6']), :].index
adata_Glia_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['20']), :].index

In [None]:
# 17853
adata_EPI = adata[adata_EPI_index, :]
# 26081
adata_T_ILCs = adata[adata_T_ILCs_index, :]
# 4618
adata_B = adata[adata_B_index, :]
# 2981
adata_MNPs = adata[adata_MNPs_index, :]
# 1073
adata_Mast = adata[adata_Mast_index, :]
# 28155
adata_Stromal = adata[adata_Stromal_index, :]

# 15673
adata_Fibro = adata[adata_Fibro_index, :]
# 12076
adata_Endo = adata[adata_Endo_index, :]
# 406
adata_Glia = adata[adata_Glia_index, :]

In [None]:
adata_EPI.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/EPI.h5ad')

adata_T_ILCs.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/T_ILCs.h5ad')

adata_B.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/B.h5ad')

adata_MNPs.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/MNPs.h5ad')

adata_Mast.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Mast.h5ad')

adata_Stromal.write('/mnt/data2/Datasets/Human_non_intestine_datasets/Qilu_Otorhinolaryngology_surgery_data/dataset_output/Stromal.h5ad')