In [None]:
import numpy as np
import scanpy as sc
import seaborn as sns
import os
import pandas as pd
import matplotlib.pyplot as plt
import warnings
import sys
path_helper = ["C:\\","Users","vfriedrich","projects","monkey_IZI","git_documentation","scRNAseq_cross_species_primate_human","analysis","helper"]
sys.path.append(os.path.join(*path_helper))
import helperVDF as h
print(sys.executable)

In [None]:
h.print_main_versions()

In [None]:
#env: scArches_env

In [None]:
pre = "M23"
drive = 'F'
species = 'cyno'
base_model_path,base_table_path,base_plots_path,base_anndata_objects = h.return_local_paths(drive = drive,
                                                                                            pre = pre,
                                                                                            add_path = True)
#cyno
_,base_table_path_H21,_,_ = h.return_local_paths(drive = drive,pre = "H21",add_path = False)

#human
_,base_table_path_M21,_,_ = h.return_local_paths(drive = drive,pre = "M21",add_path = False)

In [None]:
#cyno
species = 'cyno'
warnings.filterwarnings("ignore")
IDs = h.return_cellranger_IDs(species)
adata_all_cyno = h.read_bg_corrected_counts(drive,cellranger_IDs = IDs,pre_bg_correction = 'M05', species = species)
#add MT nomenclature
cynoanno = pd.read_csv(os.path.join(h.return_git_path_local(),"preprocessing","ensembl","mart_export.txt.gz"), compression='gzip')
mt_genes_cyno = h.return_mt_genes_cyno(cynoanno)
adata_all_cyno = h.add_MT_prefix(adata=adata_all_cyno,mt_genes=mt_genes_cyno)

In [None]:
anno_cyno = pd.read_csv((os.path.join(base_table_path_M21,'M21_'  + species + '_anno_QC.csv')),index_col=0)

In [None]:
path_s0115_cellano_azimuth =  os.path.join('F:\\monkey_IZI\\analysisR','s0115_cellano_azimuth.txt')
s0115_cellano_azimuth = pd.read_csv(path_s0115_cellano_azimuth,delimiter = "\t",index_col =0)
s0115_cellano_azimuth_cyno = s0115_cellano_azimuth[s0115_cellano_azimuth['species'] == species]

In [None]:
adata_QC_cyno = h.add_anno_to_adata(adata=adata_all_cyno,
                  anno_df=anno_cyno,
                  anno_columns_to_add=anno_cyno.columns,
                  rsuffix='_R')

adata_QC_cyno = h.add_anno_to_adata(adata=adata_all_cyno,
                  anno_df=s0115_cellano_azimuth_cyno,
                  anno_columns_to_add=s0115_cellano_azimuth_cyno.columns,
                  rsuffix='_RR')


In [None]:
#standard filtering - remove cell with zero expression and genes appearing in less than 50 cells
adata_QC_cyno,nr_removed_cells,nr_removed_genes = h.standard_scanpy_filter(adata_QC_cyno,min_genes_per_cell=1,min_cells_per_gene=50)

print('nr_removed_cells : ' + str(nr_removed_cells))
print('nr_removed_genes : ' + str(nr_removed_genes))

In [None]:
adata_passedQC_cyno = h.filter_adata_obs(adata_all_cyno,col_name='QC_summary',val='passed_QC')
adata_passedQC_cyno.obs = adata_passedQC_cyno.obs.loc[:, ~adata_passedQC_cyno.obs.columns.duplicated()]

In [None]:
adata_passedQC_cyno_backup = adata_passedQC_cyno.copy()

### Integration via harmony
using python port as described in https://scanpy.readthedocs.io/en/stable/generated/scanpy.external.pp.harmony_integrate.html

In [None]:
#compute umap -> creates key 'X_pca' in obsm
sc.pp.pca(adata_passedQC_cyno)
sc.pp.pca(adata_passedQC_cyno_backup)

In [None]:
sc.external.pp.harmony_integrate(adata_passedQC_cyno, 'experiment')

In [None]:
sc.pp.neighbors(adata_passedQC_cyno,use_rep = 'X_pca_harmony')
sc.pp.neighbors(adata_passedQC_cyno_backup)

In [None]:
sc.tl.umap(adata_passedQC_cyno)
sc.tl.umap(adata_passedQC_cyno_backup)

In [None]:
adata_passedQC_cyno.obsm['X_umap_harmony'] = adata_passedQC_cyno.obsm['X_umap'].copy() 

In [None]:
#No batch correction
with plt.rc_context():
    plt.figure()
    sc.pl.umap(adata_passedQC_cyno_backup,color = 'experiment',show = False,frameon=False)
    plt.savefig(os.path.join(base_plots_path,pre + '_umap_no_batch_correction_.pdf'),dpi=300, bbox_inches="tight")
    plt.close()
sc.pl.umap(adata_passedQC_cyno_backup,color = 'experiment',frameon=False)

In [None]:
#batch correction
with plt.rc_context():
    plt.figure()
    sc.pl.umap(adata_passedQC_cyno,color = 'experiment',show = False,frameon=False)
    plt.savefig(os.path.join(base_plots_path,pre + '_umap_batch_corrected_.pdf'),dpi=300, bbox_inches="tight")
    plt.close()
sc.pl.umap(adata_passedQC_cyno,color = 'experiment',frameon=False)



In [None]:
sc.tl.louvain(adata_passedQC_cyno)

In [None]:
sc.tl.louvain(adata_passedQC_cyno,resolution=2,key_added = 'louvain_res_2')

In [None]:
#batch correction
sc.pl.umap(adata_passedQC_cyno,color = ['louvain','louvain_res_2'],legend_loc="on data")

In [None]:
sc.pl.umap(adata_passedQC_cyno,color = ['predicted.celltype.l1','Healty_WholeBlood_maj','COVID19_PBMC_maj'],legend_loc="on data")

In [None]:
#compare cell types from automated cell type annotation per louvain cluster (resolution:2)

In [None]:
tab1 = pd.DataFrame(adata_passedQC_cyno.obs.groupby('louvain_res_2')['predicted.celltype.l1'].value_counts())
tab2 = pd.DataFrame(adata_passedQC_cyno.obs.groupby('louvain_res_2')['COVID19_PBMC_maj'].value_counts())
tab3 = pd.DataFrame(adata_passedQC_cyno.obs.groupby('louvain_res_2')['Healty_WholeBlood_maj'].value_counts())
#save
tab1.to_csv(os.path.join(base_table_path,pre +'_' + species + '_' + 'ct_louvain_res_2_tab1.csv')) 
tab2.to_csv(os.path.join(base_table_path,pre +'_' + species + '_' + 'ct_louvain_res_2_tab2.csv'))
tab3.to_csv(os.path.join(base_table_path,pre +'_' + species + '_' + 'ct_louvain_res_2_tab3.csv'))

### Save object

In [None]:
adata_passedQC_cyno.write(os.path.join(base_anndata_objects,pre + '_' + species + '_batch_corrected.h5ad'))

### Save session

In [None]:
base_package_version_path = h.return_package_version_local_path(drive=drive)
h.save_package_versions(base_package_version_path,pre,do_print = True)
h.print_main_versions()