In [None]:
import warnings
from warnings import simplefilter
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings("ignore", category=PendingDeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning, module='anndata')
warnings.filterwarnings('ignore', message='.*reticulate.*')

In [None]:
import os
import rpy2
import logging
import warnings
import anndata2ri
import numpy as np
import pandas as pd
import scanpy as sc
import anndata as ad
from itertools import chain
from datetime import datetime
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
from scipy.sparse import csr_matrix
from joblib import Parallel, delayed
from functions import helper_functions
from matplotlib.pyplot import rcParams
from functions.run_nebula import run_de_tests, build_effect_size_anndata, get_standardized_effects




In [3]:
# # Ignore R warning messages
#Note: this can be commented out to get more verbose R output
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)

# Automatically convert rpy2 outputs to pandas dataframes
pandas2ri.activate()
anndata2ri.activate()
%load_ext rpy2.ipython

sc.settings.verbosity = 3
#sc.set_figure_params(dpi=200, dpi_save=300)
sc.logging.print_versions()

simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

-----
anndata     0.11.1
scanpy      1.9.3
-----
PIL                         9.5.0
anndata2ri                  1.1
asttokens                   NA
backcall                    0.2.0
cffi                        1.15.1
comm                        0.1.3
cycler                      0.10.0
cython_runtime              NA
dateutil                    2.8.2
debugpy                     1.6.7
decorator                   5.1.1
decoupler                   1.4.0
dot_parser                  NA
exceptiongroup              1.1.1
executing                   1.2.0
functions                   NA
google                      NA
gsva_prep                   NA
h5py                        3.9.0
igraph                      0.10.4
ipykernel                   6.23.2
ipywidgets                  8.0.6
jedi                        0.18.2
jinja2                      3.1.2
joblib                      1.2.0
kiwisolver                  1.4.4
leidenalg                   0.9.1
llvmlite                    0.39.1
louvain      

In [4]:
%%R

suppressPackageStartupMessages({
    library(Matrix)
    library(viridis)
    library(harmony)
    library(ggpubr)
    library(tictoc)
    library(RColorBrewer)
    library(Hmisc)
    library(corrplot)
    library(grid)
    library(gridExtra)
    library(igraph)
    library(ggrepel)
    library(readxl)
    library(conflicted)
    library(dplyr)
    library(parallel)
    library(stringr)
    library(tibble)
    library(BiocParallel)

    # single-cell analysis package
    library(Seurat)
    library(zellkonverter)   
    library(SingleCellExperiment)
    library(tidyr)
    library(readxl)
    library(GSA)
    library(limma)

    # plotting and data science packages
    library(tidyverse)
    library(cowplot)
    library(patchwork)
    library(ggplot2)

    # co-expression network analysis packages:
    library(WGCNA)
    library(hdWGCNA)

    # gene enrichment packages
    library(enrichR)
    library(GeneOverlap)
    library(GSEABase)
    library(GSVA) 

    # cell-cell communication
    library(nichenetr)

# needs to be run every time you start R and want to use %>%
})

# using the cowplot theme for ggplot
theme_set(theme_cowplot())

# set random seed for reproducibility
set.seed(12345)

# optionally enable multithreading
enableWGCNAThreads(nThreads = 40)


    an issue that caused a segfault when used with rpy2:
    https://github.com/rstudio/reticulate/pull/1188
    Make sure that you use a version of that package that includes
    the fix.
    Allowing parallel execution with up to 40 working processes.


1: replacing previous import ‘GenomicRanges::intersect’ by ‘SeuratObject::intersect’ when loading ‘hdWGCNA’ 
2: replacing previous import ‘GenomicRanges::union’ by ‘dplyr::union’ when loading ‘hdWGCNA’ 
3: replacing previous import ‘GenomicRanges::setdiff’ by ‘dplyr::setdiff’ when loading ‘hdWGCNA’ 
4: replacing previous import ‘dplyr::as_data_frame’ by ‘igraph::as_data_frame’ when loading ‘hdWGCNA’ 
5: replacing previous import ‘Seurat::components’ by ‘igraph::components’ when loading ‘hdWGCNA’ 
6: replacing previous import ‘dplyr::groups’ by ‘igraph::groups’ when loading ‘hdWGCNA’ 
7: replacing previous import ‘dplyr::union’ by ‘igraph::union’ when loading ‘hdWGCNA’ 
8: replacing previous import ‘GenomicRanges::subtract’ by ‘magrittr::subtract’ when loading ‘hdWGCNA’ 
9: replacing previous import ‘Matrix::as.matrix’ by ‘proxy::as.matrix’ when loading ‘hdWGCNA’ 
10: replacing previous import ‘igraph::groups’ by ‘tidygraph::groups’ when loading ‘hdWGCNA’ 


## **Data Prep Parameters**

- `test_names`: List of the different test names of interest.

- `save_prefix`: Preferred prefix for saving critical files. Ideally chosen to be in the format `{source name}_{brain region}`. e.g `mathys_pfc`

- `subject_id`: Column name for Subject/Patient ID in both metadata and `.obs`

In [11]:
save_prefix = 'seaad_mtg'                                       # this takes the format '{StudyName}_{ThreeLetterAccronymForBrainRegion}'
subject_id = helper_functions.clean_strings('Donor ID')
cell_type_column = 'Subclass'                       # 'Supertype (non-expanded)', 'Subclass'
pseudobulk_level = 'Supertype'                      # Level at which cells are aggregated into pseudobulks,
factor = 'Continuous Pseudo-progression Score'    # pathology.group # Continuous Pseudoprogression Score
factor = helper_functions.clean_strings(factor, replace_hyphen=True)
test_names = ['late_vs_early']                                  # test categories
region_name = save_prefix.split('_')[-1].upper()
count_agg_strategy = 'metacell'   # options 'network', 'random', 'pseudobulk', 'smaller_network', 'standardbulk', 'metacell', `blanchardbulk``


data_dir = f'/media/tadeoye/Volume1/data/seq/SEA-AD/{region_name}/RNAseq/'
save_dir = f'../results/'

if not os.path.exists(save_dir):
    os.makedirs(save_dir)

subclass = {
    'excitatory': ['L5 IT', 'L2/3 IT', 'L4 IT', 'L6 IT', 'L6 IT Car3', 'L5/6 NP', 'L6b', 'L6 CT', 'L5 ET'],
    'inhibitory': ['Pvalb', 'Sst', 'Lamp5 Lhx6', 'Vip', 'Lamp5', 'Sncg', 'Chandelier', 'Sst Chodl', 'Pax6'],
    'astrocyte': ['Astrocyte'],
    'microglia': ['Microglia-PVM'],
    'opc': ['OPC'],
    'oligodendrocyte': ['Oligodendrocyte'],
    'endothelial': ['Endothelial'],
    'vlmc': ['VLMC'],
    }

cell_supertypes = list(chain(*list(subclass.values())))



# **Load Merged Pseudo-bulked Data**

We have merged the pseduobulked data for all cell sypertypes (`02_merge_pseudobulks.ipynb`). Now, we load the merged data.

In [6]:
clean_subclass = robjects.ListVector({k: [helper_functions.clean_strings(v, preserve_case=True) for v in val] for k, val in subclass.items()})
clean_cell_supertypes = [helper_functions.clean_strings(cell_type, preserve_case=True) for cell_type in cell_supertypes]

In [7]:
adata_merged = sc.read_h5ad(data_dir+f'counts/anndata/all_subclass_{count_agg_strategy}_anndata.h5ad')
adata_merged.X = csr_matrix(adata_merged.X)

adata_annot = sc.read_h5ad(os.path.join(data_dir, f'counts/anndata/{save_prefix.upper()}_RNAseq_final-nuclei.2024-02-13.h5ad'))
adata_annot.X = adata_annot.layers['UMIs'] 

Subclass = robjects.ListVector(subclass)

In [8]:
%%R -i adata_merged -i count_agg_strategy -i data_dir -i subject_id -i Subclass -i cell_supertypes -i clean_cell_supertypes -i clean_subclass

source('../scripts/functions/helper_functions.r')

print('Merged data loaded')
adata_merged

[1] "Merged data loaded"
class: SingleCellExperiment 
dim: 36601 111258 
metadata(0):
assays(2): X logcounts
rownames(36601): MIR1302-2HG FAM138A ... AC007325.4 AC007325.2
rowData names(0):
colnames(111258): L5 IT_1#H19.33.004_1 L5 IT_1#H19.33.004_2 ...
  VLMC_1#H21.33.027_5 VLMC_1#H21.33.027_6
colData names(21): donor_id orig.ident ... pseudo_genes_detected
  pseudo_number_of_umis
reducedDimNames(0):
mainExpName: NULL
altExpNames(0):


In [9]:
# del adata_merged, summed_counts_per_celltype

In [13]:
n_cores = 40

covariates = [
    'Continuous Pseudo-progression Score',
    'Age at Death binned codes',
    'Sex',
    'APOE4_Status',
    'Pseudo Genes Detected',
    'Pseudo Number of UMIs',
    # 'PMI',
    'method',
    'Race (choice=White)',
]

covariates = helper_functions.clean_strings(covariates, replace_hyphen=True)

In [None]:

if run_differential_expression:
    # Set up parameters
    params = {
        'region': 'MTG',
        'target': '',
        'split_key': pseudobulk_level,
        'outgroup_de': False, 
        'outgroup': '', 
        'covariates': covariates,
        'random_effect': subject_id,
        'covariate_formula': ' + '.join(covariates[1:]) + " + ",
        'tests': [factor],
        'layer': 'X',  
        'offset_variable': helper_functions.clean_strings('Pseudo Number of UMIs'),
        'group_cell': False,
        'save_dir': data_dir+'results/differential_gene_expression'  
    }

    adata_merged.obs[params['split_key']] = adata_merged.obs[params['split_key']].astype('category')

    os.makedirs(params['save_dir'], exist_ok=True)

    def process_target(target, adata_merged, params, cell_type_column):
        adata = adata_merged[adata_merged.obs[cell_type_column] == target]
        results = []
        for b in adata.obs[params['split_key']].cat.categories:
            try:
                result = run_de_tests(
                    adata_merged,
                    params['region'],
                    helper_functions.clean_strings(target,preserve_case=True,replace_hyphen=False),
                    params['split_key'],
                    params['outgroup_de'],
                    b,
                    params['covariates'],
                    params['random_effect'],
                    params['covariate_formula'],
                    params['tests'],
                    params['layer'],
                    params['offset_variable'],
                    params['group_cell'],
                    params['save_dir']
                )
                results.append(result)
            except Exception as e:
                error_msg = str(e)
                if "Some predictors have zero variation or a zero vector" in error_msg:
                    print(f"{datetime.now()} -- Skipping {b} because some predictors have zero variation or a zero vector")
                else:
                    print(f"{datetime.now()} -- Error processing {b}: {str(e)}") 
                continue
                
        return results

    # Parallelize at target level
    targets = adata_merged.obs[cell_type_column].unique()
    results = Parallel(n_jobs=n_cores)(
        delayed(process_target)(target, adata_merged, params, cell_type_column) 
        for target in targets
    )




    an issue that caused a segfault when used with rpy2:
    https://github.com/rstudio/reticulate/pull/1188
    Make sure that you use a version of that package that includes
    the fix.
    
    an issue that caused a segfault when used with rpy2:
    https://github.com/rstudio/reticulate/pull/1188
    Make sure that you use a version of that package that includes
    the fix.
    




    an issue that caused a segfault when used with rpy2:
    https://github.com/rstudio/reticulate/pull/1188
    Make sure that you use a version of that package that includes
    the fix.
    




    an issue that caused a segfault when used with rpy2:
    https://github.com/rstudio/reticulate/pull/1188
    Make sure that you use a version of that package that includes
    the fix.
    




    an issue that caused a segfault when used with rpy2:
    https://github.com/rstudio/reticulate/pull/1188
    Make sure that you use a version of that package that includes
    the fix.
    
    an issue that caused a segfault when used with rpy2:
    https://github.com/rstudio/reticulate/pull/1188
    Make sure that you use a version of that package that includes
    the fix.
    




    an issue that caused a segfault when used with rpy2:
    https://github.com/rstudio/reticulate/pull/1188
    Make sure that you use a version of that package that includes
    the fix.
    




    an issue that caused a segfault when used with rpy2:
    https://github.com/rstudio/reticulate/pull/1188
    Make sure that you use a version of that package that includes
    the fix.
    

R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication



2025-01-21 13:27:48.718252 -- No outgroup specified, running analysis for all groups and tests

    an issue that caused a segfault when used with rpy2:
    https://github.com/rstudio/reticulate/pull/1188
    Make sure that you use a version of that package that includes
    the fix.
    

R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication



2025-01-21 13:27:50.040343 -- No outgroup specified, running analysis for all groups and tests

    an issue that caused a segfault when used with rpy2:
    https://github.com/rstudio/reticulate/pull/1188
    Make sure that you use a version of that package that includes
    the fix.
    2025-01-21 13:27:50.713590 -- Removing 3697 features from L5_IT_1 for low numbers of counts per cell
2025-01-21 13:27:50.848823 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:27:50.850709 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 13:27:50.909977 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.25 for 1476 genes in L5_IT_1 because all values are 0 for that covariate


R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication



2025-01-21 13:27:51.736980 -- No outgroup specified, running analysis for all groups and tests

    an issue that caused a segfault when used with rpy2:
    https://github.com/rstudio/reticulate/pull/1188
    Make sure that you use a version of that package that includes
    the fix.
    
    an issue that caused a segfault when used with rpy2:
    https://github.com/rstudio/reticulate/pull/1188
    Make sure that you use a version of that package that includes
    the fix.
    2025-01-21 13:27:52.565351 -- Removing 4218 features from L4_IT_1 for low numbers of counts per cell
2025-01-21 13:27:52.706466 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:27:52.707825 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 13:27:52.759539 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.25 for 885 genes in L4_IT_1 because all values are 0 for that covariate


R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication



2025-01-21 13:27:53.079388 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 13:27:53.134722 -- Removing 7505 features from L6_IT_Car3_1 for low numbers of counts per cell
2025-01-21 13:27:53.143022 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:27:53.159398 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.0 for 4357 genes in L6_IT_Car3_1 because all values are 0 for that covariate

    an issue that caused a segfault when used with rpy2:
    https://github.com/rstudio/reticulate/pull/1188
    Make sure that you use a version of that package that includes
    the fix.
    
    an issue that caused a segfault when used with rpy2:
    https://github.com/rstudio/reticulate/pull/1188
    Make sure that you use a version of that package that includes
    the fix.
    2025-01-21 13:27:54.342556 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 13:

R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication

R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication



2025-01-21 13:27:54.707096 -- Removing 4471 features from L5_6_NP_1 for low numbers of counts per cell
2025-01-21 13:27:54.739760 -- Removing 4169 features from L6_IT_1 for low numbers of counts per cell
2025-01-21 13:27:54.766883 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:27:54.768220 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 13:27:54.798369 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:27:54.800265 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 13:27:54.799585 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.25 for 2053 genes in L5_6_NP_1 because all values are 0 for that covariate
2025-01-21 13:27:54.834197 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.25 for 1528 genes in L6_IT_1 because all values

R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication



2025-01-21 13:27:56.936526 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 13:27:57.198544 -- Removing 5968 features from Pvalb_1 for low numbers of counts per cell
2025-01-21 13:27:57.243447 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:27:57.292248 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.8095932638239676 for 2905 genes in Pvalb_1 because all values are 0 for that covariate


R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication



2025-01-21 13:27:57.775866 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 13:27:57.900479 -- Removing 4622 features from L6_CT_1 for low numbers of counts per cell
2025-01-21 13:27:57.915561 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:27:57.916952 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 13:27:57.935106 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.25 for 2299 genes in L6_CT_1 because all values are 0 for that covariate


R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication



2025-01-21 13:27:58.238245 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 13:27:58.279042 -- Removing 11698 features from L6b_2 for low numbers of counts per cell
2025-01-21 13:27:58.282031 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:27:58.282619 -- Removing continuous_pseudo_progression_score from the covariate formula
2025-01-21 13:27:58.283791 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 13:27:58.284199 -- Removing age_at_death_binned_codes from the covariate formula
2025-01-21 13:27:58.284741 -- Removing sex from the covariate formula
2025-01-21 13:27:58.285022 -- Removing apoe4_status from the covariate formula
2025-01-21 13:27:58.286112 -- Detected pseudo_genes_detected as an integer or float, applying a min-max normalization
2025-01-21 13:27:58.299699 -- Adding 3 pseudocounts to pseudo_genes_detected=0.0 for 5399 

R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication



2025-01-21 13:27:59.504843 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 13:27:59.533555 -- Removing 16021 features from Sst_2 for low numbers of counts per cell
2025-01-21 13:27:59.536573 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:27:59.536959 -- Removing continuous_pseudo_progression_score from the covariate formula
2025-01-21 13:27:59.537990 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 13:27:59.538230 -- Removing age_at_death_binned_codes from the covariate formula
2025-01-21 13:27:59.538592 -- Removing sex from the covariate formula
2025-01-21 13:27:59.538835 -- Removing apoe4_status from the covariate formula
2025-01-21 13:27:59.539876 -- Detected pseudo_genes_detected as an integer or float, applying a min-max normalization
2025-01-21 13:27:59.553788 -- Adding 3 pseudocounts to pseudo_genes_detected=0.25402278069

R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication



2025-01-21 13:28:00.520167 -- No outgroup specified, running analysis for all groups and tests


R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication



2025-01-21 13:28:00.922758 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 13:28:01.299262 -- Removing 5228 features from Vip_1 for low numbers of counts per cell
2025-01-21 13:28:01.366303 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:28:01.367593 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 13:28:01.393096 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.25 for 1636 genes in Vip_1 because all values are 0 for that covariate
2025-01-21 13:28:01.501952 -- Removing 4728 features from Lamp5_Lhx6_1 for low numbers of counts per cell
2025-01-21 13:28:01.702774 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:28:01.704348 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 13:28:01.780744 -- Adding 3

R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication

R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication



2025-01-21 13:28:02.880584 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 13:28:02.913042 -- Removing 11945 features from Sncg_1 for low numbers of counts per cell
2025-01-21 13:28:02.916973 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:28:02.930314 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.7107407372495411 for 5044 genes in Sncg_1 because all values are 0 for that covariate
2025-01-21 13:28:03.065324 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 13:28:03.112941 -- Removing 10503 features from Lamp5_1 for low numbers of counts per cell
2025-01-21 13:28:03.117155 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:28:03.131686 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.0 for 7536 genes in Lamp5_1 because all values are 0 for that covariate

R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication



2025-01-21 13:28:03.616681 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 13:28:03.691375 -- Removing 8227 features from Pax6_1 for low numbers of counts per cell
2025-01-21 13:28:03.700126 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:28:03.716642 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.103825014087543 for 7178 genes in Pax6_1 because all values are 0 for that covariate


R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication



2025-01-21 13:28:05.181442 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 13:28:05.370962 -- Removing 6615 features from Chandelier_1 for low numbers of counts per cell
2025-01-21 13:28:05.393980 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:28:05.395264 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 13:28:05.414968 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.25 for 2566 genes in Chandelier_1 because all values are 0 for that covariate


R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication

R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication



2025-01-21 13:28:08.068388 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 13:28:08.267820 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 13:28:08.300938 -- Removing 19813 features from Endo_2 for low numbers of counts per cell
2025-01-21 13:28:08.303917 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:28:08.304327 -- Removing continuous_pseudo_progression_score from the covariate formula
2025-01-21 13:28:08.305423 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 13:28:08.305662 -- Removing age_at_death_binned_codes from the covariate formula
2025-01-21 13:28:08.306013 -- Removing sex from the covariate formula
2025-01-21 13:28:08.306230 -- Removing apoe4_status from the covariate formula
2025-01-21 13:28:08.307325 -- Detected pseudo_genes_detected as an integer or float, applying a min-max normaliz

R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication



2025-01-21 13:28:09.082551 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.2331134537922132 for 2817 genes in VLMC_1 because all values are 0 for that covariate
2025-01-21 13:28:12.906560 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:28:12.908974 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 13:28:13.717519 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.25 for 9 genes in L2_3_IT_1 because all values are 0 for that covariate


R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication



2025-01-21 13:28:14.710580 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 13:28:14.897154 -- Removing 6155 features from Astro_1 for low numbers of counts per cell


R[write to console]: here() starts at /home/tadeoye/Documents/research_codes/cell_cell_communication



2025-01-21 13:28:14.927143 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:28:14.928847 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 13:28:14.932982 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 13:28:14.969681 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.25 for 1963 genes in Astro_1 because all values are 0 for that covariate
2025-01-21 13:28:16.848134 -- Removing 5840 features from OPC_2 for low numbers of counts per cell
2025-01-21 13:28:17.230761 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:28:17.232653 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 13:28:17.372132 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.25 for 444 genes in OPC_2 because all values are 0 for that

R[write to console]: Error in nebula(counts, obs$donor_id, pred = df, offset = obs$pseudo_number_of_umis,  : 
  Some predictors have zero variation or a zero vector.



2025-01-21 13:31:10.046384 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.0 for 3075 genes in Sncg_1 because all values are 0 for that covariate
2025-01-21 13:31:26.951521 -- Detected pseudo_number_of_umis as an integer or float, applying a min-max normalization
2025-01-21 13:31:26.991080 -- Removing method from the covariate formula
2025-01-21 13:31:26.991195 -- Removing race_choice_white from the covariate formula
2025-01-21 13:31:27.032148 -- Starting Sst_2 across continuous_pseudo_progression_score
The cells are already grouped.2025-01-21 13:31:27.098640 -- Skipping Sst_2 because some predictors have zero variation or a zero vector


R[write to console]: Error in nebula(counts, obs$donor_id, pred = df, offset = obs$pseudo_number_of_umis,  : 
  Some predictors have zero variation or a zero vector.



2025-01-21 13:31:28.230507 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 13:31:28.532481 -- Removing 5772 features from Sst_3 for low numbers of counts per cell
2025-01-21 13:31:28.583692 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:31:28.585210 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 13:31:28.612314 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.25 for 1686 genes in Sst_3 because all values are 0 for that covariate
2025-01-21 13:31:32.941771 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.3327136446937277 for 8447 genes in Lamp5_1 because all values are 0 for that covariate
2025-01-21 13:31:38.433136 -- Detected pseudo_number_of_umis as an integer or float, applying a min-max normalization
2025-01-21 13:31:38.474615 -- Removing method from the covariate formula
2025-01-21 13:31:38.474744 

R[write to console]: Error in nebula(counts, obs$donor_id, pred = df, offset = obs$pseudo_number_of_umis,  : 
  Some predictors have zero variation or a zero vector.



2025-01-21 13:31:40.349449 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 13:31:40.613446 -- Removing 4321 features from L6b_6 for low numbers of counts per cell
2025-01-21 13:31:40.655458 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 13:31:40.657016 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 13:31:40.692940 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.0 for 1113 genes in L6b_6 because all values are 0 for that covariate
2025-01-21 13:32:16.497030 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.0 for 36 genes in L2_3_IT_1 because all values are 0 for that covariate
2025-01-21 13:32:23.621066 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 13:32:23.719479 -- Detected pseudo_genes_detected as an integer or float, applying a min-max normalization
2025

R[write to console]: Error in nebula(counts, obs$donor_id, pred = df, offset = obs$pseudo_number_of_umis,  : 
  Some predictors have zero variation or a zero vector.



2025-01-21 14:21:58.030243 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 14:21:58.057616 -- Removing 11316 features from Sncg_8 for low numbers of counts per cell
2025-01-21 14:21:58.060636 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 14:21:58.073602 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.7142033482533316 for 5673 genes in Sncg_8 because all values are 0 for that covariate
2025-01-21 14:22:53.491066 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.632943265190392 for 6894 genes in Pax6_1 because all values are 0 for that covariate
2025-01-21 14:23:46.137206 -- Adding 3 pseudocounts to race_choice_white=Unchecked for 7387 genes in Astro_1 because all values are 0 for that covariate
2025-01-21 14:24:04.019304 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.27633850362408346 for 4919 genes in Sncg_8 because all values are 0 f

R[write to console]: In addition: 

R[write to console]: In nebula(counts, obs$donor_id, pred = df, offset = obs$pseudo_number_of_umis,  :
R[write to console]: 
 
R[write to console]:  Some predictors in the design matrix are collinear or linearly dependent. The effects of these predictors are not identifiable.



2025-01-21 14:29:36.038454 -- Sncg_8 along continuous_pseudo_progression_score was written to disk
2025-01-21 14:30:04.282567 -- Starting L6b_6 across continuous_pseudo_progression_score
The cells are already grouped.The average number of cells per subject ( 10.05 ) is less than 30. The 'method' is set as 'HL'.
Remove  1  genes having low expression.
Analyzing  32279  genes with  20  subjects and  201  cells.
2025-01-21 14:30:11.695864 -- Adding 3 pseudocounts to race_choice_white=Unchecked for 5343 genes in Vip_1 because all values are 0 for that covariate
2025-01-21 14:30:18.558800 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.75 for 324 genes in Lamp5_Lhx6_1 because all values are 0 for that covariate
2025-01-21 14:31:00.906824 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.6670367899703017 for 8394 genes in Pax6_1 because all values are 0 for that covariate
2025-01-21 14:34:11.023319 -- L6b_6 along continuous_pseudo_progression_score was written to disk

R[write to console]: Error in nebula(counts, obs$donor_id, pred = df, offset = obs$pseudo_number_of_umis,  : 
  Some predictors have zero variation or a zero vector.



2025-01-21 14:38:54.638028 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.0 for 2732 genes in L6_IT_Car3_3 because all values are 0 for that covariate
2025-01-21 14:39:03.942511 -- Removing race_choice_white from the covariate formula
2025-01-21 14:39:06.735275 -- Skipping L6_IT_1 along continuous_pseudo_progression_score (already exists)
2025-01-21 14:39:07.689679 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 14:39:09.892593 -- Adding 3 pseudocounts to race_choice_white=Unchecked for 4974 genes in L4_IT_1 because all values are 0 for that covariate
2025-01-21 14:39:10.165621 -- Removing 3536 features from L6_IT_2 for low numbers of counts per cell
2025-01-21 14:39:10.709512 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 14:39:10.710736 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 14:39:10.884571 -- Adding 3 pseudoco

R[write to console]: Error in nebula(counts, obs$donor_id, pred = df, offset = obs$pseudo_number_of_umis,  : 
  Some predictors have zero variation or a zero vector.



2025-01-21 15:39:57.646669 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 15:39:57.739524 -- Removing 6693 features from Sst_13 for low numbers of counts per cell
2025-01-21 15:39:57.756032 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 15:39:57.757016 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 15:39:57.775453 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.25 for 2935 genes in Sst_13 because all values are 0 for that covariate
2025-01-21 15:41:48.550832 -- Adding 3 pseudocounts to apoe4_status=Y for 14 genes in L6_IT_2 because all values are 0 for that covariate
2025-01-21 15:41:59.318426 -- Adding 3 pseudocounts to sex=Male for 258 genes in Vip_2 because all values are 0 for that covariate
2025-01-21 15:42:23.935158 -- Detected pseudo_genes_detected as an integer or float, applying a min-max normalization
2025-01-2

R[write to console]: In addition: 

R[write to console]: In nebula(counts, obs$donor_id, pred = df, offset = obs$pseudo_number_of_umis,  :
R[write to console]: 
 
R[write to console]:  Some predictors in the design matrix are collinear or linearly dependent. The effects of these predictors are not identifiable.



2025-01-21 17:51:28.779778 -- Sst_20 along continuous_pseudo_progression_score was written to disk
2025-01-21 17:51:29.746049 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 17:51:29.780253 -- Removing 12305 features from Sst_23 for low numbers of counts per cell
2025-01-21 17:51:29.783878 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 17:51:29.795730 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.3944035688622841 for 4315 genes in Sst_23 because all values are 0 for that covariate
2025-01-21 17:52:42.862800 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.0 for 3436 genes in Sst_23 because all values are 0 for that covariate
2025-01-21 17:53:41.080438 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.5400110102823146 for 4399 genes in Sst_23 because all values are 0 for that covariate
2025-01-21 17:55:01.150296 -- Detected age_at_death

R[write to console]: In addition: 

R[write to console]: In nebula(counts, obs$donor_id, pred = df, offset = obs$pseudo_number_of_umis,  :
R[write to console]: 
 
R[write to console]:  Some predictors in the design matrix are collinear or linearly dependent. The effects of these predictors are not identifiable.



2025-01-21 17:56:43.816906 -- Sst_23 along continuous_pseudo_progression_score was written to disk
2025-01-21 17:56:44.736710 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 17:56:44.777621 -- Removing 9774 features from Sst_25 for low numbers of counts per cell
2025-01-21 17:56:44.784146 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 17:56:44.795557 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.2381996399997909 for 4202 genes in Sst_25 because all values are 0 for that covariate
2025-01-21 17:58:47.858554 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.7221901108281324 for 3026 genes in Sst_25 because all values are 0 for that covariate
2025-01-21 18:00:17.758875 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.0 for 3422 genes in Sst_25 because all values are 0 for that covariate
2025-01-21 18:00:22.883192 -- Adding 3 pseudocounts 

R[write to console]: In addition: 

R[write to console]: In nebula(counts, obs$donor_id, pred = df, offset = obs$pseudo_number_of_umis,  :
R[write to console]: 
 
R[write to console]:  Some predictors in the design matrix are collinear or linearly dependent. The effects of these predictors are not identifiable.



2025-01-21 18:04:07.098163 -- Sst_25 along continuous_pseudo_progression_score was written to disk
2025-01-21 18:05:37.143941 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.5116409139749925 for 3699 genes in Vip_5 because all values are 0 for that covariate
2025-01-21 18:07:09.576108 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.6626252004619664 for 4394 genes in Vip_5 because all values are 0 for that covariate
2025-01-21 18:09:02.080120 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.5446361465002717 for 5758 genes in Vip_5 because all values are 0 for that covariate
2025-01-21 18:11:34.693100 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 18:11:34.731747 -- Removing sex from the covariate formula
2025-01-21 18:11:34.756519 -- Detected pseudo_genes_detected as an integer or float, applying a min-max normalization
2025-01-21 18:11:34.757248 -- Detected pseudo_number_of_u

R[write to console]: Error in nebula(counts, obs$donor_id, pred = df, offset = obs$pseudo_number_of_umis,  : 
  Some predictors have zero variation or a zero vector.



2025-01-21 20:00:12.179688 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 20:00:12.218427 -- Removing 9462 features from Astro_5 for low numbers of counts per cell
2025-01-21 20:00:12.225681 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 20:00:12.237025 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.5772646320229491 for 6673 genes in Astro_5 because all values are 0 for that covariate
2025-01-21 20:00:34.062399 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.5 for 2880 genes in L5_IT_3 because all values are 0 for that covariate
2025-01-21 20:03:29.392642 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.7579101799462293 for 5258 genes in Astro_5 because all values are 0 for that covariate
2025-01-21 20:04:53.421954 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.0 for 3045 genes in Vip_11 because all values are 0 for that covariate
202

R[write to console]: Error in nebula(counts, obs$donor_id, pred = df, offset = obs$pseudo_number_of_umis,  : 
  Some predictors have zero variation or a zero vector.



2025-01-21 23:08:53.681901 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 23:08:53.808325 -- Removing 5156 features from Vip_19 for low numbers of counts per cell
2025-01-21 23:08:53.824934 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 23:08:53.825832 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 23:08:53.841966 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.25 for 3727 genes in Vip_19 because all values are 0 for that covariate
2025-01-21 23:12:02.550272 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-21 23:12:03.146231 -- Detected pseudo_genes_detected as an integer or float, applying a min-max normalization
2025-01-21 23:12:03.147293 -- Detected pseudo_number_of_umis as an integer or float, applying a min-max normalization
2025-01-21 23:12:03.147691 -- Removing

R[write to console]: Error in nebula(counts, obs$donor_id, pred = df, offset = obs$pseudo_number_of_umis,  : 
  Some predictors have zero variation or a zero vector.



2025-01-21 23:22:35.698484 -- No outgroup specified, running analysis for all groups and tests
2025-01-21 23:22:36.183310 -- Removing 5620 features from Pvalb_9 for low numbers of counts per cell
2025-01-21 23:22:36.276298 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-21 23:22:36.325673 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.3774144952899409 for 2427 genes in Pvalb_9 because all values are 0 for that covariate
2025-01-21 23:24:04.055271 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.5 for 390 genes in L4_IT_4 because all values are 0 for that covariate
2025-01-21 23:29:15.849709 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.75 for 3411 genes in Vip_19 because all values are 0 for that covariate
2025-01-21 23:29:54.366836 -- Adding 3 pseudocounts to age_at_death_binned_codes=1.0 for 56 genes in Oligo_4 because all values are 0 for that covariate
2025-01-21 23:31:21.004792 -- 

R[write to console]: In addition: 

R[write to console]: In nebula(counts, obs$donor_id, pred = df, offset = obs$pseudo_number_of_umis,  :
R[write to console]: 
 
R[write to console]:  Some predictors in the design matrix are collinear or linearly dependent. The effects of these predictors are not identifiable.



2025-01-22 02:02:18.459190 -- Pvalb_9 along continuous_pseudo_progression_score was written to disk
2025-01-22 02:02:19.352399 -- No outgroup specified, running analysis for all groups and tests
2025-01-22 02:02:19.487096 -- Removing 5899 features from Pvalb_10 for low numbers of counts per cell
2025-01-22 02:02:19.515304 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-22 02:02:19.542853 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.707882151477128 for 2460 genes in Pvalb_10 because all values are 0 for that covariate
2025-01-22 02:11:04.181711 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.0 for 779 genes in Pvalb_10 because all values are 0 for that covariate
2025-01-22 02:13:50.984269 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-22 02:13:51.049825 -- Removing sex from the covariate formula
2025-01-22 02:13:51.049937 -- Remo

R[write to console]: In addition: 

R[write to console]: In nebula(counts, obs$donor_id, pred = df, offset = obs$pseudo_number_of_umis,  :
R[write to console]: 
 
R[write to console]:  Some predictors in the design matrix are collinear or linearly dependent. The effects of these predictors are not identifiable.



2025-01-22 02:15:38.011543 -- Pvalb_10 along continuous_pseudo_progression_score was written to disk
2025-01-22 02:15:38.962814 -- No outgroup specified, running analysis for all groups and tests
2025-01-22 02:15:39.583940 -- Removing 5376 features from Pvalb_12 for low numbers of counts per cell
2025-01-22 02:15:39.706340 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-22 02:15:39.788640 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.0 for 1667 genes in Pvalb_12 because all values are 0 for that covariate
2025-01-22 02:32:51.799059 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.4145860536076113 for 1313 genes in Pvalb_12 because all values are 0 for that covariate
2025-01-22 02:46:22.137995 -- Adding 3 pseudocounts to continuous_pseudo_progression_score=0.6666314547467305 for 3811 genes in Pvalb_12 because all values are 0 for that covariate
2025-01-22 02:49:20.826863 -- Skipping L5_

R[write to console]: In addition: 

R[write to console]: In nebula(counts, obs$donor_id, pred = df, offset = obs$pseudo_number_of_umis,  :
R[write to console]: 
 
R[write to console]:  Some predictors in the design matrix are collinear or linearly dependent. The effects of these predictors are not identifiable.



2025-01-22 04:28:47.188093 -- Pvalb_14 along continuous_pseudo_progression_score was written to disk
2025-01-22 04:28:48.020374 -- No outgroup specified, running analysis for all groups and tests
2025-01-22 04:28:50.500477 -- Removing 4636 features from Pvalb_15 for low numbers of counts per cell
2025-01-22 04:28:50.981817 -- Detected continuous_pseudo_progression_score as an integer or float, applying a min-max normalization
2025-01-22 04:28:50.983095 -- Detected age_at_death_binned_codes as an integer or float, applying a min-max normalization
2025-01-22 04:28:51.334298 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.75 for 75 genes in Pvalb_15 because all values are 0 for that covariate
2025-01-22 04:32:19.825550 -- Adding 3 pseudocounts to age_at_death_binned_codes=1.0 for 330 genes in Pvalb_15 because all values are 0 for that covariate
2025-01-22 04:47:43.233803 -- Adding 3 pseudocounts to age_at_death_binned_codes=0.25 for 734 genes in Pvalb_15 because all values are 0 f

In [173]:
blacklisted_genes = ["MTRNR2L12", "TTTY14", "USP9Y", "NLGN4Y", "UTY", "DDX3Y", "KDM5D", "TTTY10"]
blacklisted_genes.extend(adata_merged.var_names[adata_merged.var_names.str.startswith("MT-")].to_list())


effect_sizes, pvalues, std_errors  = build_effect_size_anndata(
    results_dir=os.path.join(data_dir+'results/differential_gene_expression',),
    glob_pattern="*.csv",
    file_pattern="_across_continuous_pseudo_progression_score_DE.csv",
    test="continuous_pseudo_progression_score",
    adata=adata_merged,
    subclass=cell_type_column,
    celltype=cell_type_column,
    blacklisted_genes=blacklisted_genes,
)

NameError: name 'build_effect_size_anndata' is not defined

In [174]:
effect_size_table = pd.read_csv('/media/tadeoye/Volume1/data/seq/SEA-AD/MTG/supplementary/Nebula Results/effect_size_table.csv')
effect_size_table

Unnamed: 0.1,Unnamed: 0,Gene,Taxonomy Level,Population,Effect size across all of pseudoprogression,Effect size across early pseudoprogression,Effect size across late pseudoprogression,Mean expression (natural log UMIs per 10k plus 1)
0,0,MIR1302-2HG,Supertype,Lamp5_Lhx6_1,0.00,0.00,0.00,0.00
1,1,FAM138A,Supertype,Lamp5_Lhx6_1,0.00,0.00,0.00,0.00
2,2,OR4F5,Supertype,Lamp5_Lhx6_1,0.00,0.00,0.00,0.00
3,3,AL627309.1,Supertype,Lamp5_Lhx6_1,-2.17,-0.79,0.28,0.01
4,4,AL627309.3,Supertype,Lamp5_Lhx6_1,0.00,0.00,0.00,0.00
...,...,...,...,...,...,...,...,...
6072275,6072275,AC141272.1,Class,Non-neuronal and non-neural,0.00,0.00,0.00,0.00
6072276,6072276,AC023491.2,Class,Non-neuronal and non-neural,0.00,0.00,0.00,0.00
6072277,6072277,AC007325.1,Class,Non-neuronal and non-neural,0.03,0.05,0.04,0.00
6072278,6072278,AC007325.4,Class,Non-neuronal and non-neural,0.47,-0.04,0.29,0.02


In [None]:
results_dir = os.path.join(data_dir+'results/differential_gene_expression',
                             subclass.capitalize(),
                             factor, 
                             f'{subclass.capitalize()}_{subclass.capitalize()}_across_{factor}_DE.csv')

effect_sizes = get_standardized_effects(results_dir, factor, filter_genes=False, thr = 0.01)