In [None]:
import numpy as np
import scanpy as sc
import os
import scvi
import solo
import pandas as pd

using solo doublet detection from https://docs.scvi-tools.org/en/stable/user_guide/models/solo.html
based on https://docs.scvi-tools.org/en/stable/api/reference/scvi.external.SOLO.html#scvi.external.SOLO.view_anndata_setup

In [None]:
#env: solo_env

In [None]:
pre = "H06_01"
drive = "F"
base_model_path = os.path.join(drive + ":\\","monkey_IZI","analysis","models",pre)
base_package_version_path = os.path.join(drive + ":\\","monkey_IZI","analysis","package_versions")
base_table_path = os.path.join(drive + ":\\","monkey_IZI","analysis","tables",pre)
idents = ['Human1_6hr_S3','Human1_24hr_S5','Human1_TimeZero_S1','Human2_6hr_S4','Human2_24hr_S6','Human2_TimeZero_S2']

run solo separately for each sample

In [None]:
for ident in idents:
    path_filtered_counts = os.path.join(drive + ":\\","monkey_IZI","nextflow","outdir_human_ensemble","human","cellranger","count",ident,"outs","filtered_feature_bc_matrix.h5")
    adata_filtered_bc = sc.read_10x_h5(path_filtered_counts)
    adata_filtered_bc.var_names_make_unique()

    #filter genes for solo scvi model
    sc.pp.filter_genes(adata_filtered_bc, min_counts=3)

    adata_filtered_bc_log = adata_filtered_bc.copy()
    sc.pp.log1p(adata_filtered_bc_log)

    #compute highly variable genes as input for scvi model 
    sc.pp.highly_variable_genes(adata_filtered_bc_log, n_top_genes=1200, flavor="cell_ranger")

    adata_filtered_bc.var['highly_variable'] = adata_filtered_bc_log.var['highly_variable'].copy()
    mask = (adata_filtered_bc.var['highly_variable'] == True)
    adata_filtered_bc = adata_filtered_bc[:,mask].copy()

    #ensure that no cell has zero expression in complete hvg gene space
    sc.pp.filter_cells(adata_filtered_bc, min_genes=1)

    #pretrain scvi model
    scvi.model.SCVI.setup_anndata(adata_filtered_bc)
    vae = scvi.model.SCVI(adata_filtered_bc)
    vae.train(max_epochs=400)

    #train solo model 
    solo = scvi.external.SOLO.from_scvi_model(vae)
    solo.train()

    #do doublette preidction
    predictions = solo.predict(soft= False)
    pred_prob = solo.predict(soft= True)
    pred_df = pd.DataFrame(predictions,columns = ['doublet_prediction'])
    pred_df['sample'] = ident
    out_df = pd.concat([pred_df,pred_prob],axis = 1) 

    #save solo doublette prediction
    out_df.to_csv(os.path.join(base_table_path,ident + '_solo_doub_pred.csv'))

    #save solo model
    solo.save(dir_path=os.path.join(base_model_path,ident),save_anndata = True)

save session infos

In [None]:
sc.logging.print_header()

In [None]:
import pkg_resources
with open(os.path.join(base_package_version_path, pre + '_package_versions.txt'), "w") as file:
    for package in pkg_resources.working_set:
        file.write(f"{package.key}=={package.version}\n")
        print(f"{package.key}=={package.version}")

In [None]:
# env: solo_env