In [None]:
import numpy as np
import doubletdetection
import scanpy as sc
import matplotlib.pyplot as plt
import scipy.io
import tarfile
import time
import anndata as ad

sc.settings.n_jobs=8
sc.set_figure_params()
%matplotlib inline

In [None]:
input_dir = '/home/blake/data/pod/blake_LTS/Atlas_V2/Raw_Counts'
output_dir = '/home/blake/data/pod/blake_LTS/Atlas_V2/DoubletDetection'
sc.settings.figdir = output_dir

In [None]:
def doubletdetect(exps):
  for x in exps:
    adata = scipy.io.mmread(input_dir + '/' + x + '_10X-Dual_UMI_counts_EmptyBC_Filter.mtx').T.tocsc()
    clf = doubletdetection.BoostClassifier(n_iters=50, clustering_algorithm="phenograph", verbose=False, standard_scaling=False)
    doublets = clf.fit(adata).predict(p_thresh=1e-7, voter_thresh=0.8)
    doublet_score = clf.doublet_score()
    adata = ad.AnnData(adata, dtype=adata.dtype)
    adata.obs["doublet"] = doublets
    adata.obs["doublet_score"] = doublet_score
    #visualize
    f1 = doubletdetection.plot.convergence(clf, save=(output_dir + '/' + x + '_10X-Dual_convergence_test.pdf'), show=False, p_thresh=1e-7, voter_thresh=0.8)
    f2 = doubletdetection.plot.threshold(clf, save=(output_dir + '/' + x + '_10X-Dual_threshold_test.pdf'), show=False, p_step=6)
    #Visualize doublets on umap
    sc.pp.normalize_total(adata)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata)
    sc.tl.pca(adata)
    sc.pp.neighbors(adata)
    sc.tl.umap(adata)
    sc.pl.umap(adata, color=["doublet", "doublet_score"], show = False, save=('/' + x + '_10X-Dual_UMAP.pdf'))
    #write out doublet status
    with open((output_dir + '/' + x + '_10X-Dual_DoubletDetection_doublets.txt'), 'w') as file_handler:
        for item in doublets:
            file_handler.write("{}\n".format(item))

In [None]:
exps = ["KPMP_20210421E","KPMP_20210421F","KPMP_20210421G","KPMP_20210421H","KPMP_20210804A","KPMP_20210804C","KPMP_20210901A","KPMP_20210901B",
"KPMP_20220427A","KPMP_20220427B","KPMP_20220602A","KPMP_20220602B","KPMP_20220602C","KPMP_20220602D","KPMP_20220606A","KPMP_20220606B",
"KPMP_20220606C","HuBMAP_20220825A","HuBMAP_20220825B","HuBMAP_20220825C","HuBMAP_20220825D","HuBMAP_20220825E","HuBMAP_20220825F",
"KPMP_20220908A","KPMP_20220908B",
       "KPMP_20220908C","KPMP_20220908D","HsKidAt_20220914G","HsKidAt_20221005G","HuBMAP_20221020B","HuBMAP_20221021A","HuBMAP_20221021B",
"HuBMAP_20221021C","HsKidAt_20221116A","HsKidAt_20221116B","HsKidAt_20221116C","HsKidAt_20221116D","HsKidAt_20221116E","HsKidAt_20221116F",
"HsKidAt_20221116G","HsKidAt_20221130A","HsKidAt_20221130B","HsKidAt_20221130C","HsKidAt_20221130D","HsKidAt_20221130E","HsKidAt_20221130G",
"HuBMAP_20221213A","HuBMAP_20221213B","HuBMAP_20221213C","HsKidAt_20221215A","HsKidAt_20221215B","HuBMAP_20221215A","HuBMAP_20221215B",
"HuBMAP_20221215C","HuBMAP_20221215D",
       ]

In [None]:
doubletdetect(exps)