### for human cells, doublet detection and removal by Scrublet

In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import anndata as ad
import matplotlib.pyplot as plt

In [2]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')

  from .autonotebook import tqdm as notebook_tqdm


scanpy==1.8.2 anndata==0.8.0 umap==0.5.2 numpy==1.21.5 scipy==1.8.0 pandas==1.4.1 scikit-learn==1.0.2 statsmodels==0.13.2 python-igraph==0.9.9 pynndescent==0.5.6


In [3]:
raw_file = 'write_LCA/h_LCA1-5_raw.h5ad'
qc_file = 'write_LCA/h_LCA1-5_qc.h5ad'# the file that will store the analysis results

In [4]:
adatas=sc.read_h5ad(raw_file)
adatas

  utils.warn_names_duplicates("obs")


AnnData object with n_obs × n_vars = 85233 × 41861
    obs: 'Patient', 'Library', 'Most likely LM22 cell type', 'Major cell type', 'Minor subset'

In [5]:
# calculate qc metrics for regression
adatas.var['mt'] = adatas.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adatas, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

In [6]:
adatas

AnnData object with n_obs × n_vars = 85233 × 41861
    obs: 'Patient', 'Library', 'Most likely LM22 cell type', 'Major cell type', 'Minor subset', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt'
    var: 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'

In [7]:
%matplotlib inline
import scrublet
adatas_new=[]
names = ['p3t1','p3t2','p3t3','p4t1','p4t2','p4t3','p5t1','p5t2','p6t1','p6t2','p7t1','p7t2']
for name in names:
    # extract a single sample from the raw data
    adata = adatas[adatas.obs.Library == name, :] 
    # use scrublet to predict doublets in data, the expected_doublet_rate set as the threshold in paper
    sc.external.pp.scrublet(adata, expected_doublet_rate=0.025) 
    #
    # reassembele the sample adata annotated with doublets
    adatas_new.append(adata)
    # doublet validation
    sc.external.pl.scrublet_score_distribution(adata,save='_'+name)
    # create the dimension-reduction plot and show the detected doublets
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata, min_mean=0.05, max_mean=8, min_disp=0.5) 
    adata = adata[:, adata.var.highly_variable]
    #sc.pp.regress_out(adata, ['total_counts', 'pct_counts_mt'])
    sc.pp.scale(adata, max_value=10)
    sc.tl.pca(adata, svd_solver='arpack')
    sc.pp.neighbors(adata)
    sc.tl.tsne(adata)
    predicted_list=list(adata.obs['predicted_doublet'])   
    predicted_list_new = []
    for i in predicted_list:
        predicted_list_new.append(str(i))
    adata.obs['predicted_doublet_n']=predicted_list_new
    sc.pl.tsne(adata, color = ['predicted_doublet_n'],save='_'+name+'_doublet',title='predicted_doublet_'+name)

Running Scrublet
filtered out 19542 genes that are detected in less than 3 cells
filtered out 3 cells that have less than 3 genes expressed
normalizing counts per cell
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:03)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)


  view_to_actual(adata)


normalizing counts per cell
    finished (0:00:00)
normalizing counts per cell
    finished (0:00:00)
Embedding transcriptomes using PCA...
Automatically set threshold at doublet score = 0.43
Detected doublet rate = 0.2%
Estimated detectable doublet fraction = 7.7%
Overall doublet rate:
	Expected   = 2.5%
	Estimated  = 2.3%
    Scrublet finished (0:00:27)


  adata.obs['doublet_score'] = adata_obs.obs['doublet_score']


normalizing counts per cell
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:05)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)


  view_to_actual(adata)


computing PCA
    on highly variable genes
    with n_comps=50
    finished (0:00:09)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:10)
computing tSNE
    using 'X_pca' with n_pcs = 50
    using sklearn.manifold.TSNE




    finished: added
    'X_tsne', tSNE coordinates (adata.obsm) (0:00:22)
Running Scrublet
filtered out 19191 genes that are detected in less than 3 cells
filtered out 3 cells that have less than 3 genes expressed
normalizing counts per cell
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:03)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)


  view_to_actual(adata)


normalizing counts per cell
    finished (0:00:00)
normalizing counts per cell
    finished (0:00:00)
Embedding transcriptomes using PCA...
Automatically set threshold at doublet score = 0.44
Detected doublet rate = 0.2%
Estimated detectable doublet fraction = 7.3%
Overall doublet rate:
	Expected   = 2.5%
	Estimated  = 3.1%
    Scrublet finished (0:00:31)


  adata.obs['doublet_score'] = adata_obs.obs['doublet_score']


normalizing counts per cell




    finished (0:00:00)
extracting highly variable genes
    finished (0:00:05)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)


  view_to_actual(adata)


computing PCA
    on highly variable genes
    with n_comps=50
    finished (0:00:10)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:01)
computing tSNE
    using 'X_pca' with n_pcs = 50
    using sklearn.manifold.TSNE




    finished: added
    'X_tsne', tSNE coordinates (adata.obsm) (0:00:23)
Running Scrublet
filtered out 18841 genes that are detected in less than 3 cells
filtered out 5 cells that have less than 3 genes expressed
normalizing counts per cell
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:02)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)


  view_to_actual(adata)


normalizing counts per cell
    finished (0:00:00)
normalizing counts per cell
    finished (0:00:00)
Embedding transcriptomes using PCA...
Automatically set threshold at doublet score = 0.45
Detected doublet rate = 0.1%
Estimated detectable doublet fraction = 3.7%
Overall doublet rate:
	Expected   = 2.5%
	Estimated  = 3.1%
    Scrublet finished (0:00:21)


  adata.obs['doublet_score'] = adata_obs.obs['doublet_score']


normalizing counts per cell
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:04)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)


  view_to_actual(adata)


computing PCA
    on highly variable genes
    with n_comps=50
    finished (0:00:12)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:01)
computing tSNE
    using 'X_pca' with n_pcs = 50
    using sklearn.manifold.TSNE




    finished: added
    'X_tsne', tSNE coordinates (adata.obsm) (0:00:27)
Running Scrublet
filtered out 22769 genes that are detected in less than 3 cells
filtered out 11 cells that have less than 3 genes expressed
normalizing counts per cell
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:01)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
normalizing counts per cell
    finished (0:00:00)
normalizing counts per cell
    finished (0:00:00)


  view_to_actual(adata)


Embedding transcriptomes using PCA...
Automatically set threshold at doublet score = 0.30
Detected doublet rate = 0.3%
Estimated detectable doublet fraction = 5.1%
Overall doublet rate:
	Expected   = 2.5%
	Estimated  = 5.8%
    Scrublet finished (0:00:07)


  adata.obs['doublet_score'] = adata_obs.obs['doublet_score']


normalizing counts per cell
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:01)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)


  view_to_actual(adata)


computing PCA
    on highly variable genes
    with n_comps=50
    finished (0:00:03)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
computing tSNE
    using 'X_pca' with n_pcs = 50
    using sklearn.manifold.TSNE




    finished: added
    'X_tsne', tSNE coordinates (adata.obsm) (0:00:07)
Running Scrublet
filtered out 22585 genes that are detected in less than 3 cells
filtered out 7 cells that have less than 3 genes expressed
normalizing counts per cell
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:01)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
normalizing counts per cell
    finished (0:00:00)
normalizing counts per cell
    finished (0:00:00)


  view_to_actual(adata)


Embedding transcriptomes using PCA...
Automatically set threshold at doublet score = 0.33
Detected doublet rate = 0.1%
Estimated detectable doublet fraction = 2.5%
Overall doublet rate:
	Expected   = 2.5%
	Estimated  = 2.7%
    Scrublet finished (0:00:06)


  adata.obs['doublet_score'] = adata_obs.obs['doublet_score']


normalizing counts per cell
    finished (0:00:00)




extracting highly variable genes
    finished (0:00:02)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)


  view_to_actual(adata)


computing PCA
    on highly variable genes
    with n_comps=50
    finished (0:00:05)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
computing tSNE
    using 'X_pca' with n_pcs = 50
    using sklearn.manifold.TSNE




    finished: added
    'X_tsne', tSNE coordinates (adata.obsm) (0:00:09)
Running Scrublet
filtered out 22209 genes that are detected in less than 3 cells
filtered out 11 cells that have less than 3 genes expressed
normalizing counts per cell
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:01)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
normalizing counts per cell
    finished (0:00:00)
normalizing counts per cell
    finished (0:00:00)


  view_to_actual(adata)


Embedding transcriptomes using PCA...
Automatically set threshold at doublet score = 0.32
Detected doublet rate = 0.2%
Estimated detectable doublet fraction = 6.4%
Overall doublet rate:
	Expected   = 2.5%
	Estimated  = 3.1%
    Scrublet finished (0:00:08)


  adata.obs['doublet_score'] = adata_obs.obs['doublet_score']


normalizing counts per cell
    finished (0:00:00)




extracting highly variable genes
    finished (0:00:02)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)


  view_to_actual(adata)


computing PCA
    on highly variable genes
    with n_comps=50
    finished (0:00:03)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
computing tSNE
    using 'X_pca' with n_pcs = 50
    using sklearn.manifold.TSNE




    finished: added
    'X_tsne', tSNE coordinates (adata.obsm) (0:00:08)
Running Scrublet
filtered out 21013 genes that are detected in less than 3 cells
filtered out 20 cells that have less than 3 genes expressed
normalizing counts per cell
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:01)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
normalizing counts per cell
    finished (0:00:00)
normalizing counts per cell
    finished (0:00:00)


  view_to_actual(adata)


Embedding transcriptomes using PCA...
Automatically set threshold at doublet score = 0.42
Detected doublet rate = 0.1%
Estimated detectable doublet fraction = 3.2%
Overall doublet rate:
	Expected   = 2.5%
	Estimated  = 2.4%
    Scrublet finished (0:00:13)


  adata.obs['doublet_score'] = adata_obs.obs['doublet_score']


normalizing counts per cell
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:03)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)


  view_to_actual(adata)


computing PCA
    on highly variable genes
    with n_comps=50
    finished (0:00:08)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:01)
computing tSNE
    using 'X_pca' with n_pcs = 50
    using sklearn.manifold.TSNE




    finished: added
    'X_tsne', tSNE coordinates (adata.obsm) (0:00:18)
Running Scrublet
filtered out 20606 genes that are detected in less than 3 cells
filtered out 3 cells that have less than 3 genes expressed
normalizing counts per cell
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:01)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)


  view_to_actual(adata)


normalizing counts per cell
    finished (0:00:00)
normalizing counts per cell
    finished (0:00:00)
Embedding transcriptomes using PCA...
Automatically set threshold at doublet score = 0.38
Detected doublet rate = 0.4%
Estimated detectable doublet fraction = 13.3%
Overall doublet rate:
	Expected   = 2.5%
	Estimated  = 2.7%
    Scrublet finished (0:00:12)


  adata.obs['doublet_score'] = adata_obs.obs['doublet_score']


normalizing counts per cell
    finished (0:00:00)




extracting highly variable genes
    finished (0:00:02)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)


  view_to_actual(adata)


computing PCA
    on highly variable genes
    with n_comps=50
    finished (0:00:05)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
computing tSNE
    using 'X_pca' with n_pcs = 50
    using sklearn.manifold.TSNE




    finished: added
    'X_tsne', tSNE coordinates (adata.obsm) (0:00:13)
Running Scrublet
filtered out 21017 genes that are detected in less than 3 cells
filtered out 3 cells that have less than 3 genes expressed
normalizing counts per cell
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:01)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)


  view_to_actual(adata)


normalizing counts per cell
    finished (0:00:00)
normalizing counts per cell
    finished (0:00:00)
Embedding transcriptomes using PCA...
Automatically set threshold at doublet score = 0.38
Detected doublet rate = 0.2%
Estimated detectable doublet fraction = 9.2%
Overall doublet rate:
	Expected   = 2.5%
	Estimated  = 2.3%
    Scrublet finished (0:00:14)


  adata.obs['doublet_score'] = adata_obs.obs['doublet_score']


normalizing counts per cell
    finished (0:00:00)




extracting highly variable genes
    finished (0:00:03)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)


  view_to_actual(adata)


computing PCA
    on highly variable genes
    with n_comps=50
    finished (0:00:06)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:01)
computing tSNE
    using 'X_pca' with n_pcs = 50
    using sklearn.manifold.TSNE




    finished: added
    'X_tsne', tSNE coordinates (adata.obsm) (0:00:13)
Running Scrublet
filtered out 22486 genes that are detected in less than 3 cells
filtered out 8 cells that have less than 3 genes expressed
normalizing counts per cell
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:01)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)


  view_to_actual(adata)


normalizing counts per cell
    finished (0:00:00)
normalizing counts per cell
    finished (0:00:00)
Embedding transcriptomes using PCA...
Automatically set threshold at doublet score = 0.34
Detected doublet rate = 0.6%
Estimated detectable doublet fraction = 15.7%
Overall doublet rate:
	Expected   = 2.5%
	Estimated  = 3.6%
    Scrublet finished (0:00:10)


  adata.obs['doublet_score'] = adata_obs.obs['doublet_score']


normalizing counts per cell
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:02)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)


  view_to_actual(adata)


computing PCA
    on highly variable genes
    with n_comps=50
    finished (0:00:03)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
computing tSNE
    using 'X_pca' with n_pcs = 50
    using sklearn.manifold.TSNE




    finished: added
    'X_tsne', tSNE coordinates (adata.obsm) (0:00:09)
Running Scrublet
filtered out 19178 genes that are detected in less than 3 cells
filtered out 9 cells that have less than 3 genes expressed
normalizing counts per cell
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:02)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)


  view_to_actual(adata)


normalizing counts per cell
    finished (0:00:00)
normalizing counts per cell
    finished (0:00:00)
Embedding transcriptomes using PCA...
Automatically set threshold at doublet score = 0.43
Detected doublet rate = 0.1%
Estimated detectable doublet fraction = 5.9%
Overall doublet rate:
	Expected   = 2.5%
	Estimated  = 2.1%
    Scrublet finished (0:00:24)


  adata.obs['doublet_score'] = adata_obs.obs['doublet_score']


normalizing counts per cell




    finished (0:00:00)
extracting highly variable genes
    finished (0:00:05)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)


  view_to_actual(adata)


computing PCA
    on highly variable genes
    with n_comps=50
    finished (0:00:09)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
computing tSNE
    using 'X_pca' with n_pcs = 50
    using sklearn.manifold.TSNE




    finished: added
    'X_tsne', tSNE coordinates (adata.obsm) (0:00:20)
Running Scrublet
filtered out 19536 genes that are detected in less than 3 cells
filtered out 5 cells that have less than 3 genes expressed
normalizing counts per cell
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:02)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)


  view_to_actual(adata)


normalizing counts per cell
    finished (0:00:00)
normalizing counts per cell
    finished (0:00:00)
Embedding transcriptomes using PCA...
Automatically set threshold at doublet score = 0.43
Detected doublet rate = 0.1%
Estimated detectable doublet fraction = 3.4%
Overall doublet rate:
	Expected   = 2.5%
	Estimated  = 3.1%
    Scrublet finished (0:00:16)


  adata.obs['doublet_score'] = adata_obs.obs['doublet_score']


normalizing counts per cell
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:04)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)


  view_to_actual(adata)


computing PCA
    on highly variable genes
    with n_comps=50
    finished (0:00:08)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
computing tSNE
    using 'X_pca' with n_pcs = 50
    using sklearn.manifold.TSNE




    finished: added
    'X_tsne', tSNE coordinates (adata.obsm) (0:00:19)


In [8]:
adatas_new = ad.concat(adatas_new, merge = "same")

  utils.warn_names_duplicates("obs")


In [24]:
adatas_new.obs.predicted_doublet = adatas_new.obs.predicted_doublet.astype('string')

In [36]:
adatas_new = adatas_new[adatas_new.obs.predicted_doublet == 'False', :] 

In [43]:
del(adatas_new.obs['predicted_doublet'])

In [44]:
adatas_new.obs

Unnamed: 0,Patient,Library,Most likely LM22 cell type,Major cell type,Minor subset,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,doublet_score
bcHNMG,p3,p3t1,Macrophages M0,tMoMacDC,tMac5,5035,23720.0,1470.0,6.197302,0.006243
bcGUOS,p3,p3t1,B cells memory,tB cells,tB,1033,6932.0,304.0,4.385458,0.008569
bcBAMM,p3,p3t1,Monocytes,Patient3-specific,Pt3A_TFF1/MUC5A,2233,5319.0,465.0,8.742245,0.002703
bcATNB,p3,p3t1,B cells memory,tB cells,tB,1504,4007.0,452.0,11.280259,0.019182
bcAEDB,p3,p3t1,Monocytes,Patient3-specific,Pt3E_CLDN4,1510,3499.0,592.0,16.919119,0.011535
...,...,...,...,...,...,...,...,...,...,...
bcFPAE,p7,p7t2,,,,60,66.0,5.0,7.575758,0.035714
bcBMXQ,p7,p7t2,Macrophages M0,tMoMacDC,tMac2,34,37.0,6.0,16.216215,0.033268
bcGQJH,p7,p7t2,,,,40,54.0,1.0,1.851852,0.014171
bcGEKE,p7,p7t2,,,,41,44.0,0.0,0.000000,0.020009


In [45]:
adatas_new.write(qc_file)