In [None]:
import numpy as np
import scanpy as sc
import os
import pandas as pd
import matplotlib.pyplot as plt
import warnings

import celltypist
from celltypist import models

In [None]:
# conda env: celltypist_env

In [None]:
pre = "H09_01"
drive = "F"
base_package_version_path = os.path.join(drive + ":\\","monkey_IZI","analysis","package_versions")
base_table_path = os.path.join(drive + ":\\","monkey_IZI","analysis","tables",pre)
base_analysis_path = os.path.join(drive + ":\\","monkey_IZI","analysis")
base_plots_path = os.path.join(drive + ":\\","monkey_IZI","analysis","plots")
base_package_version_path = os.path.join(drive + ":\\","monkey_IZI","analysis","package_versions")
plots_path = os.path.join(base_plots_path,pre)
if not os.path.exists(plots_path):
        os.mkdir(plots_path)

In [None]:
#models.download_models(force_update = True)

In [None]:
models.models_path

In [None]:
models.models_description()

In [None]:
model_COVID19_HumanChallenge_Blood ='COVID19_HumanChallenge_Blood.pkl'
model_Healthy_COVID19_PBMC ='Healthy_COVID19_PBMC.pkl'
idents = ['Human1_6hr_S3','Human1_24hr_S5','Human1_TimeZero_S1','Human2_6hr_S4','Human2_24hr_S6','Human2_TimeZero_S2']

for ident in idents:
    path_filtered_counts = os.path.join(drive + ":\\","monkey_IZI","nextflow","outdir_human_ensemble","human","cellranger","count",ident,"outs","filtered_feature_bc_matrix.h5")
    adata_filtered_bc = sc.read_10x_h5(path_filtered_counts)
    adata_filtered_bc.var_names_make_unique()
    
    #prepare for celltypist analysis
    adata_celltypist = adata_filtered_bc.copy() 
    
    sc.pp.normalize_per_cell(
            adata_celltypist, counts_per_cell_after=10**4
        )  # normalize to 10,000 counts per cell
    sc.pp.log1p(adata_celltypist)  # log-transform
    
    #model COVID19_HumanChallenge_Blood
    predictions_model_COVID19_HumanChallenge_Blood = celltypist.annotate(
        adata_celltypist, model=model_COVID19_HumanChallenge_Blood, majority_voting=True,
    )
    
    predictions_model_COVID19_HumanChallenge_Blood = predictions_model_COVID19_HumanChallenge_Blood.to_adata()
    
    predictions_model_COVID19_HumanChallenge_Blood.obs.to_csv(os.path.join(base_table_path,ident + '_anno_model_COVID19_HumanChallenge_Blood.csv'))
    
    #prepare for celltypist analysis
    adata_celltypist = adata_filtered_bc.copy() 
    
    sc.pp.normalize_per_cell(
            adata_celltypist, counts_per_cell_after=10**4
        )  # normalize to 10,000 counts per cell
    sc.pp.log1p(adata_celltypist)  # log-transform
    
    #model Healthy_COVID19_PBMC
    predictions_model_Healthy_COVID19_PBMC = celltypist.annotate(
        adata_celltypist, model=model_Healthy_COVID19_PBMC, majority_voting=True,
    )
    
    predictions_model_Healthy_COVID19_PBMC = predictions_model_Healthy_COVID19_PBMC.to_adata()
    
    predictions_model_Healthy_COVID19_PBMC.obs.to_csv(os.path.join(base_table_path,ident + '_anno_model_Healthy_COVID19_PBMC.csv'))

In [None]:
sc.logging.print_header()

In [None]:
import pkg_resources
with open(os.path.join(base_package_version_path, pre + '_package_versions.txt'), "w") as file:
    for package in pkg_resources.working_set:
        file.write(f"{package.key}=={package.version}\n")
        print(f"{package.key}=={package.version}")