In [1]:
# General modules
import sys
import os
import session_info
import warnings
from pyprojroot.here import here
import glob
import pandas as pd
import numpy as np
import session_info
import seaborn as sns
import matplotlib.pyplot as plt
import scienceplots
import pickle

# Specific modules
import scanpy as sc
import anndata as an
from matplotlib.backends.backend_pdf import PdfPages


# Setting some parameters
warnings.filterwarnings("ignore")
sys.path.insert(1, str(here('bin')))

# Import custom functions
from customPythonFunctions import generate_split_dir, composition_barplot

print("Main directory path: {}".format(here()))

#plt.style.use(['science','nature','no-latex'])
dpi_fig_save = 300
sc.set_figure_params(dpi=100, dpi_save=dpi_fig_save, vector_friendly=True)

Main directory path: /scratch_isilon/groups/singlecell/shared/projects/Inflammation-PBMCs-Atlas


**Setting parameters** 

In [3]:
overwriteFigures = True
overwriteData = True

In [4]:
cellGroup = 'template'
celltype_variable = ''
workDir = os.getcwd()

**Load data**

In [None]:
# Load the scvi h5ad file
adata = sc.read_h5ad(here("{}/results/03_{}_HVGsubset_scVI_UMAP_clinical_allGenes_with_markers.h5ad".format(workDir, cellGroup)))
#adataHVGint = sc.read_h5ad(here('{}/results/01_{}_HVGsubset_scVI.h5ad'.format(workDir, cellGroup)))
adata.uns['log1p'] = dict()
adata.uns['log1p']['base'] = None # only if logscale is computed
adata

In [None]:
with open(here('{}/results/05_{}_clustering_resolutions_to_explore.pkl'.format(workDir, cellGroup)), 'rb') as f:
    clusterDF = pickle.load(f)
for column_name in clusterDF.columns:
    adata.obs[column_name] = clusterDF[column_name]
adata

In [None]:
print("Computing markers for the defined celltypes")
# (0) Generate resolution subfolder 
folder_for_immunologist = here('{}/results/figures/folder_for_immunologist/'.format(workDir))
resolution_folder = here('{}/defined_celltypes/'.format(folder_for_immunologist, res))
os.makedirs(resolution_folder, exist_ok=True)
# (4) Markers: CellGroup markers
de_key = "de_res_{}".format(celltype_variable)
## (4.1) Compute markers
sc.tl.rank_genes_groups(adata, 
                        groupby=celltype_variable, 
                        layer='log1p_10e4_counts', 
                        method='wilcoxon', 
                        key_added=de_key, 
                        use_raw = False)
## (4.2) Save markers
rank_genesDF = sc.get.rank_genes_groups_df(adata, group=None, key = de_key)
rank_genesDF.to_csv(here('{}/04_1_{}_marker_genes_{}.csv'.format(resolution_folder, cellGroup, celltype_variable)))
rank_genes_excel_path = here('{}/04_1_{}_marker_genes_{}.xlsx'.format(resolution_folder, cellGroup, celltype_variable))
unique_groups = rank_genesDF["group"].unique()
with pd.ExcelWriter(rank_genes_excel_path, engine="openpyxl") as writer:
    for group in unique_groups:
        group_rank_genesDF = rank_genesDF[rank_genesDF["group"] == group]
        group_rank_genesDF = group_rank_genesDF[group_rank_genesDF["logfoldchanges"] > 0.2]
        group_rank_genesDF.to_excel(writer, sheet_name=group, index=False)

In [None]:
adata

In [None]:
adata.write(here("{}/results/03_{}_HVGsubset_scVI_UMAP_clinical_allGenes_with_markers.h5ad".format(workDir, cellGroup)), compression="gzip")