In [1]:
import os
import rpy2
import logging
import warnings
import anndata2ri
import pandas as pd
import scanpy as sc
import decoupler as dc
from anndata import AnnData
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
from matplotlib.pyplot import rcParams
from sklearn.model_selection import train_test_split

In [2]:
# # Ignore R warning messages
#Note: this can be commented out to get more verbose R output
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)

warnings.filterwarnings("ignore", category=PendingDeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# Automatically convert rpy2 outputs to pandas dataframes
pandas2ri.activate()
anndata2ri.activate()
%load_ext rpy2.ipython

#rcParams['figure.figsize']=(4,4) #rescale figures

sc.settings.verbosity = 3
#sc.set_figure_params(dpi=200, dpi_save=300)
sc.logging.print_versions()

-----
anndata     0.8.0
scanpy      1.9.3
-----
PIL                         9.5.0
anndata2ri                  1.1
appnope                     0.1.3
asttokens                   NA
backcall                    0.2.0
cffi                        1.15.1
comm                        0.1.3
cycler                      0.10.0
cython_runtime              NA
dateutil                    2.8.2
debugpy                     1.6.7
decorator                   5.1.1
decoupler                   1.4.0
executing                   1.2.0
google                      NA
h5py                        3.9.0
igraph                      0.10.4
ipykernel                   6.23.2
ipywidgets                  8.0.6
jedi                        0.18.2
jinja2                      3.1.2
joblib                      1.2.0
kiwisolver                  1.4.4
leidenalg                   0.9.1
llvmlite                    0.39.1
louvain                     0.8.0
markupsafe                  2.1.3
matplotlib                  3.7.1
mpl_t

In [3]:
%%R
suppressPackageStartupMessages({
    library(reticulate)
    library(ggplot2)
    library(tidyr)
    library(dplyr)
    library(purrr)
    library(Seurat)
    library(tibble)
    library(magrittr) 
    library(forcats)
    library(Matrix)
    library(stats)
    library(tester)
    library(Seurat)
    library(methods)
    library(matrixStats)
    library(edgeR)
    library(DESeq2)
    library(limma)
    library(pbmcapply)
    library(parallel)
    library(lmerTest)
    library(lme4)
    library(glmmTMB)
    library(blme)
# needs to be run every time you start R and want to use %>%
})



    an issue that caused a segfault when used with rpy2:
    https://github.com/rstudio/reticulate/pull/1188
    Make sure that you use a version of that package that includes
    the fix.
    

1: package ‘DESeq2’ was built under R version 4.3.1 
2: package ‘S4Vectors’ was built under R version 4.3.1 
3: package ‘IRanges’ was built under R version 4.3.1 
4: package ‘GenomeInfoDb’ was built under R version 4.3.1 
5: package ‘MatrixGenerics’ was built under R version 4.3.1 
6: In checkMatrixPackageVersion() :
  Package version inconsistency detected.
TMB was built with Matrix version 1.6.0
Current Matrix version is 1.6.1.1
Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
7: In checkDepPackageVersion(dep_pkg = "TMB") :
  Package version inconsistency detected.
glmmTMB was built with TMB version 1.9.3
Current TMB version is 1.9.6
Please re-install glmmTMB from source or restore original ‘TMB’ package (see '?reinstalling' for more information)


## **Prepare data**

Now, we load the preprocessed and annotated data for downstream analysis.

Please set `get_cell_types=True` if, `cell_type` column is absent or contains celltype annotation not of the form

- `Excitatory`, `Inhibitory`, `Astrocyte`, `Oligodendrocyte`, `OPC`, `Microglia`, `Endothelial`.

In [4]:
save_prefix = 'leng_etc'
get_cell_types = True

adata_annot = sc.read_h5ad(f'../data/raw/{save_prefix}/{save_prefix}_raw_anndata.h5ad')
adata_annot.obs_names_make_unique()
adata_annot.var_names_make_unique()

if 'counts' not in adata_annot.layers.keys():
    adata_annot.layers['counts'] = adata_annot.X.copy()
else:
    adata_annot.X = adata_annot.layers['counts'].copy()
    
del adata_annot.obsm, adata_annot.layers, adata_annot.varm, adata_annot.uns, adata_annot.obsp


In [5]:
celltypes = ["Excitatory", "Inhibitory", "Astrocyte", "Microglia", "Oligodendrocyte", "OPC", 'Endothelial'] 

mapping = {'leng_etc':
           
           {'Exc': 'Excitatory', 
            'Inh': 'Inhibitory', 
            'Astro': 'Astrocyte',
            'Endo': 'Endothelial', 
            'Micro': 'Microglia', 
            'OPC': 'OPC', 
            'Oligo': 'Oligodendrocyte'},

           'leng_sfg':
           
           {'Exc': 'Excitatory', 
            'Inh': 'Inhibitory', 
            'Astro': 'Astrocyte',
            'Endo': 'Endothelial', 
            'Micro': 'Microglia', 
            'OPC': 'OPC', 
            'Oligo': 'Oligodendrocyte'},
            
            'seaad_mtg':
           
           {'Excitatory': 'Excitatory', 
            'Inhibitory': 'Inhibitory',
            'Astrocyte': 'Astrocyte',
            'Microglia': 'Microglia', 
            'Endothelial': 'Endothelial', 
            'OPC': 'OPC',
            'Oligodendrocyte': 'Oligodendrocyte'},
            }

cell_column = {'leng_etc': 'clusterCellType',
               'leng_sfg': 'clusterCellType',
               'seaad_mtg': 'cell_type',
               }

In [6]:
if get_cell_types:
    adata_annot.obs['cell_type'] = adata_annot.obs[cell_column[save_prefix]].map(mapping[save_prefix])
    adata_annot = adata_annot[adata_annot.obs.cell_type.isin(celltypes)]

In [7]:
adata_annot

View of AnnData object with n_obs × n_vars = 42528 × 33694
    obs: 'SampleID', 'PatientID', 'BrainRegion', 'BraakStage', 'SampleBatch', 'nUMI', 'nGene', 'initialClusterAssignments', 'seurat.clusters', 'clusterAssignment', 'clusterCellType', 'cell_type'

## **Additional Parametersa**

Now we specify other related information

Specify the following:

- `metadata`: Path to metadata. Metadata must contain a column called `pathology.group` with the only unique groups being `no`, `early`, and `late`.

- `map_meta`: whether to map metadata to obtain `pathology.group`. If False, it will be assumed that `pathology.group` exist in `adata.obs`

- `test_names`: List of the different test names of interest.

- `save_prefix`: Prefix for saving critical files. preferably chosen to be in the format `{source name}_{brain region}`. e.g `mathys_pfc`

- `subject_id`: Name of the column containing Subject/Patient ID in metadata and `.obs`

- `covariates`: This should be a list of additional confounding covariates (`not including pathology.group`) e.g `Sex`, `Sample Batch`, `Age` and other factors that might not be of interest but might have an effect on the pathological status. If you would not like to include any cofounders, please set `covaraites = ['None']`.

- `filter_genes`: Specifies whether to filter genes using `gene_celltype_threshold` before before performing differential expression tests`

        

In [8]:
map_meta = True
filter_genes = "TRUE"


subject_ids_for_study = {'leng_sfg': 'PatientID',
                        'leng_etc': 'PatientID',
                        'seaad_mtg': 'Donor ID'}

subject_id = subject_ids_for_study[save_prefix]     # for leng this is `PatientID` for mathys is 'Subject', and allen is 'individualID'
gene_celltype_threshold = 0.10                      # determines number of cells the gene must be expressed in 


covaraites_for_study = {'leng_sfg': ['None'],  # ['ageDeath.cat',],
                        'leng_etc': ['None'],  # ['ageDeath.cat',]
                        'seaad_mtg': ['None'],
                        }

covariates = covaraites_for_study[save_prefix]                               # list of covariates to be accounted for in regression.

test_names = ['early_vs_no', 'late_vs_early', 'late_vs_no', 'ad_vs_no']
metadata = f'../data/raw/{save_prefix}/{save_prefix}_metadata.csv'  # path to metadata

deg_methods_to_run = robjects.ListVector({
                                        'single_cell_methods': "FALSE",
                                        'pseudo_bulk_methods': "TRUE",
                                        'mixed_model_methods': "FALSE",
                                        })



## **Methods**

Here we implement a modified version of the R package, `Libra`, provided by [**Squair et al. 2021**](https://www.nature.com/articles/s41467-021-25960-2), which implementing all methods for DE analysis discussed in the study within a consistent interface. 
The source package is available from [GitHub](https://github.com/neurorestore/Libra) and as Supplementary Software. 

The source package implements a total of `22 unique differential expression methods` that can all be accessed from a single function. These methods are listed below

By default the pipeline will use a pseudobulk approach, implementing the `edgeR` package with a likelihood ratio test (LRT) null hypothesis testing framework. Each of the 22 tests can be accessed through three key variables of the `run_de` function: `de_family`, `de_method`, and `de_type`. Their precise access arguments are summarized in the below table.

| Method | de_family | de_method | de_type |
|--------|-----------|-----------|---------|
Wilcoxon Rank-Sum test | singlecell | wilcox | |
Likelihood ratio test | singecell | bimod | |
Student's t-test | singlecell | t | |
Negative binomial linear model | singlecell | negbinom | |
Logistic regression | singlecell | LR | |
MAST | singlecell | MAST | |
edgeR-LRT | pseudobulk | edgeR | LRT
edgeR-QLF | pseudobulk | edgeR | QLF
DESeq2-LRT | pseudobulk | DESeq2 | LRT
**DESeq2-Wald** | **pseudobulk** | **DESeq2** | **Wald**
limma-trend | pseudobulk | limma | trend
limma-voom | pseudobulk | limma | voom
Linear mixed model | mixedmodel | linear | Wald
Linear mixed model-LRT | mixedmodel | linear | LRT
Negative binomial generalized linear mixed model | mixedmodel | negbinom | Wald
Negative binomial generalized linear mixed model-LRT | mixedmodel | negbinom | LRT
Negative binomial generalized linear mixed model with offset | mixedmodel | negbinom_offset | Wald
Negative binomial generalized linear mixed model with offset-LRT | mixedmodel | negbinom_offset | LRT
Poisson generalized linear mixed model | mixedmodel | poisson | Wald
Poisson generalized linear mixed model-LRT | mixedmodel | poisson | LRT
Poisson generalized linear mixed model with offset | mixedmodel | poisson_offset | Wald
Poisson generalized linear mixed model with offset-LRT | mixedmodel | poisson_offset | LRT

In [9]:
single_cell_methods = ['wilcox', 'MAST']
pseudo_bulk_methods = ['DESeq2-Wald']
mixed_model_methods = ['poisson-LRT', 'negbinom-LRT']

## **Map Metadata**

In [10]:
meta = pd.read_csv(metadata, encoding_errors='ignore')
meta = meta.astype(str)
mapping = dict(zip(meta[subject_id], meta['pathology.group']))
adata_annot.obs['pathology.group'] = adata_annot.obs[subject_id].map(mapping)

## **Loading data into memory**

In [11]:
pdata = dc.get_pseudobulk(adata_annot,
                          sample_col=subject_id,
                          groups_col='cell_type',
                          layer=None,
                          mode='sum',
                          min_cells=0,
                          min_counts=0
                         )
pdata
genes_to_keep = {}

celltypes = ['Excitatory', 'Inhibitory', 'Astrocyte', 'Microglia', 'Oligodendrocyte', 'OPC']

for cell_type in celltypes:
    print(f'filtering genes in {cell_type}...')
    genes_to_keep[cell_type] = dc.filter_by_prop(pdata[pdata.obs['cell_type'] == cell_type].copy(), min_prop=gene_celltype_threshold)

genes_to_keep_list = robjects.ListVector(genes_to_keep)

del pdata

filtering genes in Excitatory...
filtering genes in Inhibitory...
filtering genes in Astrocyte...
filtering genes in Microglia...
filtering genes in Oligodendrocyte...
filtering genes in OPC...


In [12]:
%%R -i adata_annot -i genes_to_keep_list -i deg_methods_to_run -i celltypes

print(adata_annot)

print('loaded data into memory for recursive use')

class: SingleCellExperiment 
dim: 33694 42528 
metadata(0):
assays(1): X
rownames(33694): RP11-34P13.3 FAM138A ... AC213203.1 FAM231B
rowData names(0):
colnames(42528): EC2_AAACCTGAGGATGCGT EC2_AAACCTGAGTCAATAG ...
  EC10_TTTGTCATCTATCGCC EC10_TTTGTCATCTCTGCTG
colData names(13): SampleID PatientID ... cell_type pathology.group
reducedDimNames(0):
mainExpName: NULL
altExpNames(0):
[1] "loaded data into memory for recursive use"


#### **Pseduo-bulk Differential Expression Analysis with custom scripts adapted from [**Squair et al. 2021**](https://www.nature.com/articles/s41467-021-25960-2)**

In [13]:
%%R -i covariates -i test_names -i subject_id -i pseudo_bulk_methods -i gene_celltype_threshold -o pseudobulk_degs

library(scuttle)

source('../scripts/functions/deg_functions/run_de.R')
source('../scripts/functions/deg_functions/pseudobulk_de.R')
source('../scripts/functions/deg_functions/check_inputs.R')
source('../scripts/functions/deg_functions/to_pseudobulk.R')
source('../scripts/functions/deg_functions/mixedmodel_de.R')
source('../scripts/functions/deg_functions/singlecell_de.R')

if ('None' %in% covariates){
    latent_vars = NULL
} else {
    latent_vars = covariates
}


if (eval(deg_methods_to_run[['pseudo_bulk_methods']])){ 
    
    pseudobulk_degs <- list()
    for (celltype in celltypes){

        pseudobulk_degs[[celltype]] <- list()

        # filter out celltypes
        print('--------------------------------')
        print(paste0('Estimating DEGs in ', celltype, '...'))
        sce_cell <- adata_annot[, adata_annot$cell_type == celltype]

        # Subset the `SingleCellExperiment` object to include only genes with > 0.1 detection rate/frequency
        sce_cell <- sce_cell[genes_to_keep_list[[celltype]], ]
                            
        meta = colData(sce_cell)

        for (test_name in test_names){

            sce <- sce_cell

            pseudobulk_degs[[celltype]][[test_name]] <- list()
            
            ref_level = 'no'
            meta$pathology.group <- factor(meta$pathology.group)
            meta$pathology.group <- relevel(meta$pathology.group, ref = ref_level)

            for(de_method in pseudo_bulk_methods){
                print('----------------------------------')
                print(paste0('Obtaining statistics for ', toupper(test_name), ' with pseudo-bulk method ', toupper(de_method)))
                pseudobulk_degs[[celltype]][[test_name]][[de_method]] = run_de(sce,
                                                                                meta = meta,
                                                                                replicate_col = ifelse(grepl(" ", subject_id), gsub(" ", ".", subject_id), subject_id),
                                                                                cell_type_col = 'cell_type',
                                                                                label_col = 'pathology.group',
                                                                                latent_vars = latent_vars,
                                                                                test_name = test_name,
                                                                                ref_level = ref_level,
                                                                                min_cells = 3,
                                                                                min_reps = 2,
                                                                                min_features = 0,
                                                                                de_family = 'pseudobulk',
                                                                                de_method = strsplit(de_method, "-")[[1]][1],
                                                                                de_type = strsplit(de_method, "-")[[1]][2],
                                                                                n_threads = 2)
            }
        }

        print('........................')
            
    }
}   else{
    print("Not evaluating DEGs with Pseudobulk Methods.")
}

[1] "--------------------------------"
[1] "Estimating DEGs in Excitatory..."
[1] "----------------------------------"
[1] "Obtaining statistics for EARLY_VS_NO with pseudo-bulk method DESEQ2-WALD"
[1] "Excitatory"
[1] "----------------------------------"
[1] "Obtaining statistics for LATE_VS_EARLY with pseudo-bulk method DESEQ2-WALD"
[1] "Excitatory"
[1] "----------------------------------"
[1] "Obtaining statistics for LATE_VS_NO with pseudo-bulk method DESEQ2-WALD"
[1] "Excitatory"
[1] "----------------------------------"
[1] "Obtaining statistics for AD_VS_NO with pseudo-bulk method DESEQ2-WALD"
[1] "Excitatory"
[1] "........................"
[1] "--------------------------------"
[1] "Estimating DEGs in Inhibitory..."
[1] "----------------------------------"
[1] "Obtaining statistics for EARLY_VS_NO with pseudo-bulk method DESEQ2-WALD"
[1] "Inhibitory"
[1] "----------------------------------"
[1] "Obtaining statistics for LATE_VS_EARLY with pseudo-bulk method DESEQ2-WALD"
[1] "Inh

converting counts to integer mode
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
converting counts to integer mode
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
converting counts to integer mode
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
converting counts to integer mode
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
converting counts to integer mode
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
converting counts to integer m

##### **Save Results**

In [14]:
if eval(dict(zip(deg_methods_to_run.names, list(deg_methods_to_run)))['pseudo_bulk_methods'][0].capitalize()):
    for test_name in test_names:

        fig_dir = f'../results/{test_name}/{save_prefix}/DEG/'

        if not os.path.exists(fig_dir):
            os.makedirs(fig_dir)

        for deg_method in pseudo_bulk_methods:
            with pd.ExcelWriter(fig_dir+f'{deg_method}_degs.xlsx') as writer:
                for cell_type in celltypes:
                    
                    df = pseudobulk_degs[cell_type][test_name][deg_method].sort_values(by='p_val_adj')
                    df['abs_logFC'] = abs(df['avg_logFC'])
                    df['direction'] = df['avg_logFC'].apply(lambda x: "up" if x>0 else "down")
                    df.to_excel(writer, sheet_name=cell_type, na_rep='NA')
else:
    print('No result saved for Pseudobulk Methods')

### **Mixed Models**

In [15]:
%%R -i mixed_model_methods -o mixedmodel_degs

if ('None' %in% covariates){
    latent_vars = NULL
} else {
    latent_vars = covariates
}

if (eval(deg_methods_to_run[['mixed_model_methods']])){ 
    mixedmodel_degs <- list()
    for (celltype in celltypes){

        mixedmodel_degs[[celltype]] <- list()

        # filter out celltypes
        print('--------------------------------')
        print(paste0('Estimating DEGs in ', celltype, '...'))
        sce_cell <- adata_annot[, adata_annot$cell_type == celltype]

        assay(sce_cell, 'counts') <- assay(sce_cell)

        # Calculate the detection rate/frequency for each gene
        detection_rate <- rowMeans(counts(sce_cell) > 0)
        # Subset the `SingleCellExperiment` object to include only genes with >0.1 detection rate/frequency
        sce_cell <- sce_cell[detection_rate > gene_celltype_threshold, ]
        
        meta = colData(sce_cell)

        for (test_name in test_names){
            # print('---------------------------------')
            # print(paste0('Running differential expression test for: ', test_name))
            sce <- sce_cell

            mixedmodel_degs[[celltype]][[test_name]] <- list()

            if (test_name == 'ad_vs_no'){
                    sce$pathology.group = ifelse(sce$pathology.group == "no", "no", "ad")
                    ref_level <- tail(strsplit('ad_vs_no', "_vs_")[[1]], n = 1)
                } else if (test_name %in% c('late_vs_no', 'late_vs_early', 'early_vs_no')){
                    sce <- sce[, colData(sce)$pathology.group %in% c(strsplit(test_name, "_vs_")[[1]])]
                    ref_level <- tail(strsplit(test_name, "_vs_")[[1]], n = 1)
                }
            
            meta$pathology.group <- factor(meta$pathology.group)
            meta$pathology.group <- relevel(meta$pathology.group, ref = ref_level)

            for(de_method in mixed_model_methods){
                print('----------------------------------')
                print(paste0('Obtaining statistics for ', toupper(test_name), ' with single-cell method ', toupper(de_method)))
                mixedmodel_degs[[celltype]][[test_name]][[de_method]] = run_de(sce,
                                                                                meta = meta,
                                                                                replicate_col = ifelse(grepl(" ", subject_id), gsub(" ", ".", subject_id), subject_id),
                                                                                cell_type_col = 'cell_type',
                                                                                label_col = 'pathology.group',
                                                                                latent_vars = latent_vars,
                                                                                min_cells = 3,
                                                                                min_reps = 2,
                                                                                min_features = 0,
                                                                                de_family = 'mixedmodel',
                                                                                de_method = strsplit(de_method, "-")[[1]][1],
                                                                                de_type = strsplit(de_method, "-")[[1]][2],
                                                                                n_threads = 2)
            }
        }
        print('........................')
    }
}    else{

    print("Not evaluating DEGs with Mixed-model Methods.")
    mixedmodel_degs = NULL
}

[1] "Not evaluating DEGs with Mixed-model Methods."


##### **Save Results**

In [16]:
if eval(dict(zip(deg_methods_to_run.names, list(deg_methods_to_run)))['mixed_model_methods'][0].capitalize()):
    for test_name in test_names:

        fig_dir = f'../results/{test_name}/{save_prefix}/DEG/'

        if not os.path.exists(fig_dir):
            os.makedirs(fig_dir)

        for deg_method in pseudo_bulk_methods:
            with pd.ExcelWriter(fig_dir+f'{deg_method}_degs.xlsx') as writer:
                for cell_type in celltypes:
                    
                    df = mixedmodel_degs[cell_type][test_name][deg_method].sort_values(by='p_val_adj')
                    df['abs_logFC'] = abs(df['avg_logFC'])
                    df['direction'] = df['avg_logFC'].apply(lambda x: "up" if x>0 else "down")
                    df.to_excel(writer, sheet_name=cell_type, na_rep='NA')
else:
    print('No result saved for Mixed-model Methods')

No result saved for Mixed-model Methods


### **Single-cell Models**

In [17]:
%%R  -i single_cell_methods -o singlecell_degs


if ('None' %in% covariates){
    latent_vars = NULL
} else {
    latent_vars = covariates
}

if (eval(deg_methods_to_run[['single_cell_methods']])){ 
    singlecell_degs <- list()
    for (celltype in celltypes){

        singlecell_degs[[celltype]] <- list()

        # filter out celltypes
        print('--------------------------------')
        print(paste0('Estimating DEGs in ', celltype, '...'))
        sce_cell <- adata_annot[, adata_annot$cell_type == celltype]

        # Calculate the detection rate/frequency for each gene
        detection_rate <- rowMeans(counts(sce_cell) > 0)
        # Subset the `SingleCellExperiment` object to include only genes with >0.1 detection rate/frequency
        sce_cell <- sce_cell[detection_rate > gene_celltype_threshold, ]
        meta = colData(sce_cell)

        for (test_name in test_names){
            sce <- sce_cell

            singlecell_degs[[celltype]][[test_name]] <- list()

            if (test_name == 'ad_vs_no'){
                    sce$pathology.group = ifelse(sce$pathology.group == "no", "no", "ad")
                    ref_level <- tail(strsplit('ad_vs_no', "_vs_")[[1]], n = 1)
                } else if (test_name %in% c('late_vs_no', 'late_vs_early', 'early_vs_no')){
                    sce <- sce[, colData(sce)$pathology.group %in% c(strsplit(test_name, "_vs_")[[1]])]
                    ref_level <- tail(strsplit(test_name, "_vs_")[[1]], n = 1)
                }
            
            meta$pathology.group <- factor(meta$pathology.group)
            meta$pathology.group <- relevel(meta$pathology.group, ref = ref_level)

            for(de_method in single_cell_methods){
                print('----------------------------------')
                print(paste0('Obtaining statistics for ', toupper(test_name), ' with single-cell method ', toupper(de_method)))
                singlecell_degs[[celltype]][[test_name]][[de_method]] = run_de(sce,
                                                                                meta = meta,
                                                                                replicate_col = ifelse(grepl(" ", subject_id), gsub(" ", ".", subject_id), subject_id),
                                                                                cell_type_col = 'cell_type',
                                                                                label_col = 'pathology.group',
                                                                                latent_vars = latent_vars,
                                                                                min_cells = 3,
                                                                                min_reps = 2,
                                                                                min_features = 0,
                                                                                de_family = 'singlecell',
                                                                                de_method = de_method,
                                                                                de_type = NULL,
                                                                                n_threads = 2)
            }
        }

        print('........................')
    }

}    else{

    print("Not evaluating DEGs with Single-cell Methods.")
    singlecell_degs = NULL
}

[1] "Not evaluating DEGs with Single-cell Methods."


##### **Save Results**

In [18]:
if eval(dict(zip(deg_methods_to_run.names, list(deg_methods_to_run)))['single_cell_methods'][0].capitalize()):
    for test_name in test_names:
        fig_dir = f'../results/{test_name}/{save_prefix}/DEG/'

        if not os.path.exists(fig_dir):
            os.makedirs(fig_dir)

        for deg_method in pseudo_bulk_methods:
            with pd.ExcelWriter(fig_dir+f'{deg_method}_degs.xlsx') as writer:
                for cell_type in celltypes:
                    
                    df = singlecell_degs[cell_type][test_name][deg_method].sort_values(by='p_val_adj')
                    df['abs_logFC'] = abs(df['avg_logFC'])
                    df['direction'] = df['avg_logFC'].apply(lambda x: "up" if x>0 else "down")
                    df.to_excel(writer, sheet_name=cell_type, na_rep='NA')
else:
    print('No result saved for Mixed-model Methods')

No result saved for Mixed-model Methods
