In [None]:
import pandas as pd
import numpy as np
import scrublet as scr
import anndata
import scanpy as sc
import matplotlib.pyplot as plt
import csv
np.random.seed(111)

In [None]:
#sample_names = ['76-2b','76-2f','79-1d','79-1e','79-2e','79-2f']
sample_name = '79-2f'

In [None]:
input_path = f'{sample_name}_filtered_adata.h5ad'
adata = sc.read_h5ad(input_path)
counts_matrix = adata.X

In [None]:
scrub = scr.Scrublet(counts_matrix, expected_doublet_rate=0.06,sim_doublet_ratio=2)

In [None]:
doublet_scores, predicted_doublets = scrub.scrub_doublets(min_counts=1, 
                                                          min_cells=1, 
                                                          min_gene_variability_pctl=85, 
                                                          n_prin_comps=30,log_transform=True)

In [None]:
score_thresh = 0.2
scrublet_bool=scrub.call_doublets(threshold=score_thresh)

In [None]:
scrub.plot_histogram()
plt.show()

In [None]:
scrublet_bool = pd.DataFrame(scrublet_bool)
scrublet_bool = scrublet_bool.rename(columns={0:'is_doublet'})
out_path = f'{sample_name}_predicted_doublets.csv'
scrublet_bool.to_csv(out_path)

In [None]:
print('Running UMAP...')
scrub.set_embedding('UMAP', scr.get_umap(scrub.manifold_obs_, 10, min_dist=0.3))
print('Done.')

In [None]:
scrub.plot_embedding('UMAP', order_points=True)
plt.show()

In [None]:
out_path = f'{sample_name}_scrublet_params.csv'
a_file = open(out_path, "w")
scrublet_params = {
    'doublet_score_thresh': score_thresh
}
writer = csv.writer(a_file)
for key, value in scrublet_params.items():
    writer.writerow([key, value])
a_file.close()

In [None]:
adata.obs = adata.obs.reset_index()
adata.obs = adata.obs.drop(columns={'index'})
adata.obs['is_doublet'] = scrublet_bool['is_doublet']
out_path = f'{sample_name}_filtered_adata.h5ad'
adata.write(out_path)