In [1]:
# General modules
import sys
import os
import session_info
import warnings
from pyprojroot.here import here
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.lines as plines
import matplotlib.colors as mcolors
from IPython.display import display
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib.lines import Line2D  # for legend handle

# Specific modules
import scanpy as sc
import decoupler as dc
import anndata as ad
import scienceplots

# Setting some parameters
warnings.filterwarnings("ignore")

# Import functions
sys.path.insert(1, str(here('bin')))
from customPalette import *
from customPythonFunctions import mean_by_category, filter_low_represented_cell_group, RelativeDiff_mean_by_category, mscatter, aggregating_features

plt.style.use(['nature'])
dpi_fig_save = 300
sc.set_figure_params(dpi=100, dpi_save=dpi_fig_save, vector_friendly=True)

overwriteFigures = True
overwriteData = True

pd.options.display.max_columns = 300
pd.options.display.max_rows = 1000

In [2]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.api as sm
import statsmodels.stats.multitest as smm
import patsy

In [3]:
print("Main directory path: {}".format(here()))

Main directory path: /scratch_isilon/groups/singlecell/shared/projects/Inflammation-PBMCs-Atlas


In [4]:
n_min_obs = 3

In [5]:
methodList = ['pinv','qr']

# Evaluate covariates on healthy - Level1

Reference: https://www.statsmodels.org/stable/api.html#api-reference

## Corrected

In [6]:
summary_UMLeval_list = []

for celltype in ["Mono"]:
    print(celltype)

    # Load data
    anndata_path = here('03_downstream_analysis/06_inflammation_signatures/results/DecoupleR_ulmestimates_{}_L1_Corr.h5ad'.format(celltype))
    actsPS = ad.read_h5ad(anndata_path)
    actsPS = actsPS[(actsPS.obs["disease"].isin(["healthy"]))]
    
    actsPS = actsPS[:, [var for var in actsPS.var_names if 'X-global-X' not in var]].copy()
    actsPS.obsm["ulm_estimate"] = actsPS.obsm["ulm_estimate"][[var for var in actsPS.var_names if 'X-global-X' not in var]]
    actsPS.obsm["ulm_pvals"] = actsPS.obsm["ulm_pvals"][[var for var in actsPS.var_names if 'X-global-X' not in var]]


    # Prepare data
    factor_list = actsPS.var_names.to_list()
    actsPS_df = pd.DataFrame(actsPS.X, index=actsPS.obs.index, columns=factor_list)
    covariates = actsPS.obs[['studyID', 'disease', 'chemistry', 'sex', 'binned_age']]
    actsPS_covar_df = pd.merge(actsPS_df, covariates, left_index=True, right_index=True)
    
    # Encode categorical variables
    actsPS_covar_df['disease'] = actsPS_covar_df['disease'].astype('category')
    actsPS_covar_df['chemistry'] = actsPS_covar_df['chemistry'].astype('category')
    actsPS_covar_df['sex'] = actsPS_covar_df['sex'].astype('category')
    actsPS_covar_df['binned_age'] = actsPS_covar_df['binned_age'].astype('category')
    
    actsPS_covar_df['disease'] = actsPS_covar_df['disease'].cat.remove_unused_categories()
    actsPS_covar_df['chemistry'] = actsPS_covar_df['chemistry'].cat.remove_unused_categories()
    actsPS_covar_df['sex'] = actsPS_covar_df['sex'].cat.remove_unused_categories()
    actsPS_covar_df['binned_age'] = actsPS_covar_df['binned_age'].cat.remove_unused_categories()
    
    # Fit Mixed Linear Model per each factor
    factor = factor_list[0]
    print(factor)
    
    actsPS_covar_df_sub = actsPS_covar_df[['disease', 'chemistry', 'studyID', 'sex', 'binned_age', factor]]

Mono
39-X-Mono-X-IFNG_response


In [7]:
formula = f"Q('{factor}') ~ C(chemistry) + C(sex) + C(binned_age)"
model = smf.mixedlm(formula, actsPS_covar_df_sub, groups = actsPS_covar_df_sub['studyID'] ).fit()             

print(model.summary())

                   Mixed Linear Model Regression Results
Model:            MixedLM Dependent Variable: Q('39-X-Mono-X-IFNG_response')
No. Observations: 270     Method:             REML                          
No. Groups:       12      Scale:              2.5647                        
Min. group size:  1       Log-Likelihood:     -513.8270                     
Max. group size:  133     Converged:          Yes                           
Mean group size:  22.5                                                      
-----------------------------------------------------------------------------
                              Coef.   Std.Err.    z     P>|z|  [0.025  0.975]
-----------------------------------------------------------------------------
Intercept                     -0.222     0.638  -0.348  0.728  -1.472   1.029
C(chemistry)[T.3_GEX_V3]      -0.305     0.974  -0.313  0.754  -2.213   1.603
C(chemistry)[T.5_GEX_V1]       0.528     1.162   0.454  0.650  -1.751   2.806
C(chemistry)[

In [12]:
formula = f"Q('{factor}') ~ C(binned_age)"
model = smf.mixedlm(formula, actsPS_covar_df_sub, groups = actsPS_covar_df_sub['studyID'] ).fit()             

print(model.summary())

                   Mixed Linear Model Regression Results
Model:            MixedLM Dependent Variable: Q('39-X-Mono-X-IFNG_response')
No. Observations: 270     Method:             REML                          
No. Groups:       12      Scale:              2.4686                        
Min. group size:  1       Log-Likelihood:     -510.2445                     
Max. group size:  133     Converged:          Yes                           
Mean group size:  22.5                                                      
------------------------------------------------------------------------------
                          Coef.    Std.Err.     z      P>|z|   [0.025   0.975]
------------------------------------------------------------------------------
Intercept                 -0.199      0.348   -0.573   0.567   -0.881    0.482
C(binned_age)[T.31-40]    -0.373      0.288   -1.296   0.195   -0.936    0.191
C(binned_age)[T.41-50]    -1.211      0.353   -3.429   0.001   -1.903   -0.519
C(binne

## UnCorrected

In [9]:
for celltype in ["Mono"]:
    print(celltype)

    # Load data
    anndata_path = here('03_downstream_analysis/06_inflammation_signatures/results/DecoupleR_ulmestimates_{}_L1_UnCorr.h5ad'.format(celltype))
    actsPS = ad.read_h5ad(anndata_path)
    actsPS = actsPS[(actsPS.obs["disease"].isin(["healthy"]))]
    
    actsPS = actsPS[:, [var for var in actsPS.var_names if 'X-global-X' not in var]].copy()
    actsPS.obsm["ulm_estimate"] = actsPS.obsm["ulm_estimate"][[var for var in actsPS.var_names if 'X-global-X' not in var]]
    actsPS.obsm["ulm_pvals"] = actsPS.obsm["ulm_pvals"][[var for var in actsPS.var_names if 'X-global-X' not in var]]


    # Prepare data
    factor_list = actsPS.var_names.to_list()
    actsPS_df = pd.DataFrame(actsPS.X, index=actsPS.obs.index, columns=factor_list)
    covariates = actsPS.obs[['studyID', 'disease', 'chemistry', 'sex', 'binned_age']]
    actsPS_covar_df = pd.merge(actsPS_df, covariates, left_index=True, right_index=True)
    
    # Encode categorical variables
    actsPS_covar_df['disease'] = actsPS_covar_df['disease'].astype('category')
    actsPS_covar_df['chemistry'] = actsPS_covar_df['chemistry'].astype('category')
    actsPS_covar_df['sex'] = actsPS_covar_df['sex'].astype('category')
    actsPS_covar_df['binned_age'] = actsPS_covar_df['binned_age'].astype('category')
    
    actsPS_covar_df['disease'] = actsPS_covar_df['disease'].cat.remove_unused_categories()
    actsPS_covar_df['chemistry'] = actsPS_covar_df['chemistry'].cat.remove_unused_categories()
    actsPS_covar_df['sex'] = actsPS_covar_df['sex'].cat.remove_unused_categories()
    actsPS_covar_df['binned_age'] = actsPS_covar_df['binned_age'].cat.remove_unused_categories()
    
    # Fit Mixed Linear Model per each factor
    factor = factor_list[0]
    print(factor)
    
    actsPS_covar_df_sub = actsPS_covar_df[['disease', 'chemistry', 'studyID', 'sex', 'binned_age', factor]]
    


Mono
39-X-Mono-X-IFNG_response


In [10]:
formula = f"Q('{factor}') ~ C(chemistry) + C(sex) + C(binned_age)"
model = smf.mixedlm(formula, actsPS_covar_df_sub, groups = actsPS_covar_df_sub['studyID'] ).fit()             

print(model.summary())

                   Mixed Linear Model Regression Results
Model:            MixedLM Dependent Variable: Q('39-X-Mono-X-IFNG_response')
No. Observations: 270     Method:             REML                          
No. Groups:       12      Scale:              2.4657                        
Min. group size:  1       Log-Likelihood:     -507.6731                     
Max. group size:  133     Converged:          Yes                           
Mean group size:  22.5                                                      
-----------------------------------------------------------------------------
                              Coef.   Std.Err.    z     P>|z|  [0.025  0.975]
-----------------------------------------------------------------------------
Intercept                     -0.214     0.550  -0.389  0.697  -1.292   0.864
C(chemistry)[T.3_GEX_V3]       0.223     0.830   0.269  0.788  -1.403   1.849
C(chemistry)[T.5_GEX_V1]       0.319     0.991   0.322  0.747  -1.622   2.261
C(chemistry)[

In [11]:
formula = f"Q('{factor}') ~  C(chemistry)"
model = smf.mixedlm(formula, actsPS_covar_df_sub, groups = actsPS_covar_df_sub['studyID'] ).fit()             

print(model.summary())

                   Mixed Linear Model Regression Results
Model:            MixedLM Dependent Variable: Q('39-X-Mono-X-IFNG_response')
No. Observations: 270     Method:             REML                          
No. Groups:       12      Scale:              2.5874                        
Min. group size:  1       Log-Likelihood:     -517.2048                     
Max. group size:  133     Converged:          Yes                           
Mean group size:  22.5                                                      
-----------------------------------------------------------------------------
                              Coef.   Std.Err.    z     P>|z|  [0.025  0.975]
-----------------------------------------------------------------------------
Intercept                     -0.794     0.569  -1.394  0.163  -1.910   0.322
C(chemistry)[T.3_GEX_V3]      -0.020     0.905  -0.022  0.983  -1.794   1.755
C(chemistry)[T.5_GEX_V1]      -0.065     1.077  -0.060  0.952  -2.176   2.047
C(chemistry)[