In [None]:
import numpy as np
import doubletdetection
import scanpy as sc
import matplotlib.pyplot as plt
import scipy.io
import tarfile
import time
import anndata as ad

sc.settings.n_jobs=8
sc.set_figure_params()
%matplotlib inline

In [None]:
input_dir = '/home/blake/data/pod/blake_LTS/Atlas_V2/Raw_Counts'
output_dir = '/home/blake/data/pod/blake_LTS/Atlas_V2/DoubletDetection'
sc.settings.figdir = output_dir

In [None]:
def doubletdetect(exps):
  for x in exps:
    adata = scipy.io.mmread(input_dir + '/' + x + '_10X-R_UMI_counts_EmptyBC_Filter.mtx').T.tocsc()
    clf = doubletdetection.BoostClassifier(n_iters=50, clustering_algorithm="phenograph", verbose=False, standard_scaling=False)
    doublets = clf.fit(adata).predict(p_thresh=1e-7, voter_thresh=0.8)
    doublet_score = clf.doublet_score()
    adata = ad.AnnData(adata, dtype=adata.dtype)
    adata.obs["doublet"] = doublets
    adata.obs["doublet_score"] = doublet_score
    #visualize
    f1 = doubletdetection.plot.convergence(clf, save=(output_dir + '/' + x + '_10X-R_convergence_test.pdf'), show=False, p_thresh=1e-7, voter_thresh=0.8)
    f2 = doubletdetection.plot.threshold(clf, save=(output_dir + '/' + x + '_10X-R_threshold_test.pdf'), show=False, p_step=6)
    #Visualize doublets on umap
    sc.pp.normalize_total(adata)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata)
    sc.tl.pca(adata)
    sc.pp.neighbors(adata)
    sc.tl.umap(adata)
    sc.pl.umap(adata, color=["doublet", "doublet_score"], show = False, save=('/' + x + '_10X-R_UMAP.pdf'))
    #write out doublet status
    with open((output_dir + '/' + x + '_10X-R_DoubletDetection_doublets.txt'), 'w') as file_handler:
        for item in doublets:
            file_handler.write("{}\n".format(item))

In [None]:
exps = ["KPMP_20191204A","KPMP_20191204B","KPMP_20191219A","KPMP_20191219B","KPMP_20191219C","KPMP_20191219D","KPMP_20200115A","KPMP_20200115B",
"KPMP_20200212A","KPMP_20200212B","KPMP_20200212C","KPMP_20200212D","KPMP_20200212E","KPMP_20200212F","KPMP_20190829C","KPMP_20190829D",
"KPMP_20190829E","BUKMAP_20190529L","BUKMAP_20190822F","BUKMAP_20190829B","BUKMAP_20191010","BUKMAP_20191104A","BUKMAP_20191009",
"BUKMAP_20200205A","BUKMAP_20200205D",
       "BUKMAP_20200205F","BUKMAP_20200304B","BUKMAP_20200304F","KPMP_20190607I","KPMP_20190607J","KPMP_20190607K","KPMP_20201202B",
"KPMP_20201202C","KPMP_20201202D","KPMP_20201209A","KPMP_20201209B","KPMP_20201209C","KPMP_20201209D","KPMP_20201209E","KPMP_20201209F",
"KPMP_20201218A","KPMP_20201218B","KPMP_20201218C","KPMP_20201218D","KPMP_20201218E","KPMP_20201218F","KPMP_20210421A","KPMP_20210421B",
"KPMP_20210421C","BUKMAP_20201009B",
       "BUKMAP_20201009C","BUKMAP_20201009D","BUKMAP_20201009E","BUKMAP_20201009F","BUKMAP_20201009G","KPMP_20210811C","KPMP_20210811D",
"KPMP_20210825A","KPMP_20210825B","KPMP_20210825D","KPMP_20210909A","KPMP_20210909B","KPMP_20210909C","KPMP_20210909D","KPMP_20210915A",
"KPMP_20210915B","KPMP_20210915C","KPMP_20211013A","KPMP_20211013B","KPMP_20211013C","KPMP_20211029A","BUKMAP_20210929C","BUKMAP_20210929E",
"BUKMAP_20211029A","BUKMAP_20211029B",
       "KPMP_20211208A","KPMP_20211208D","KPMP_20211208E","KPMP_20220113A","KPMP_20220113D","KPMP_20220113E","BUKMAP_20220113A","BUKMAP_20220113B",
"KPMP_20220217A","KPMP_20220217B","KPMP_20220217C","KPMP_20220217D","KPMP_20220217E","KPMP_20220217G","BUKMAP_20220217A","BUKMAP_20220127A",
"BUKMAP_20220127B","BUKMAP_20220127C","BUKMAP_20220127D","BUKMAP_20220127E","BUKMAP_20220127F","BUKMAP_20220127G","BUKMAP_20220127H",
"KTRC_20220902D"]

In [None]:
doubletdetect(exps)