In [1]:
# General modules
import sys
import os
import session_info
import warnings
from pyprojroot.here import here
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.lines as plines
import matplotlib.colors as mcolors
from IPython.display import display
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib.lines import Line2D  # for legend handle

# Specific modules
import scanpy as sc
import decoupler as dc
import anndata as ad
import scienceplots

# Setting some parameters
warnings.filterwarnings("ignore")

# Import functions
sys.path.insert(1, str(here('bin')))
from customPalette import *
from customPythonFunctions import mean_by_category, filter_low_represented_cell_group, RelativeDiff_mean_by_category, mscatter, aggregating_features

plt.style.use(['nature'])
dpi_fig_save = 300
sc.set_figure_params(dpi=100, dpi_save=dpi_fig_save, vector_friendly=True)

overwriteFigures = True
overwriteData = True

pd.options.display.max_columns = 300
pd.options.display.max_rows = 1000

In [2]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.stats.multitest as smm
import patsy

In [3]:
print("Main directory path: {}".format(here()))

Main directory path: /scratch_isilon/groups/singlecell/shared/projects/Inflammation-PBMCs-Atlas


In [4]:
n_min_obs = 3

In [5]:
methodList = ['BFGS','Powell','CG','NM']

# Evaluate UML results

Reference: https://www.statsmodels.org/stable/api.html#api-reference

## Level 2

In [6]:
summary_UMLeval_list = []

for celltype1 in ["B", "Plasma", "DC", "Mono", "T_CD4_NonNaive", "T_CD8_NonNaive", "ILC", "UTC"]:
    print(celltype1)

    # Load data
    anndata_path = here('03_downstream_analysis/06_inflammation_signatures/results/DecoupleR_ulmestimates_{}_L2_Corr.h5ad'.format(celltype1))
    actsPS = ad.read_h5ad(anndata_path)
    actsPS = actsPS[:, [var for var in actsPS.var_names if 'X-global-X' not in var]].copy()
    actsPS.obsm["ulm_estimate"] = actsPS.obsm["ulm_estimate"][[var for var in actsPS.var_names if 'X-global-X' not in var]]
    actsPS.obsm["ulm_pvals"] = actsPS.obsm["ulm_pvals"][[var for var in actsPS.var_names if 'X-global-X' not in var]]

    # Prepare data
    factor_list = actsPS.var_names.to_list()
    
    summary_table_list = []

    for celltype2 in actsPS.obs["Level2"].unique().tolist():
        print(celltype2)
        acts_sub = actsPS[actsPS.obs["Level2"] == celltype2].copy()

        actsPS_df = pd.DataFrame(acts_sub.X, index=acts_sub.obs.index, columns=factor_list)
        covariates = acts_sub.obs[['studyID', 'disease', 'chemistry', 'Level2']]#, 'sex', 'binned_age']]
        actsPS_covar_df = pd.merge(actsPS_df, covariates, left_index=True, right_index=True)
        
        # Encode categorical variables
        actsPS_covar_df['disease'] = actsPS_covar_df['disease'].astype('category')
        actsPS_covar_df['chemistry'] = actsPS_covar_df['chemistry'].astype('category')

        ## Check if there are at least 3 observation for each disease
        diseaseCount = actsPS_covar_df.disease.value_counts().reset_index()
        removeDisease = diseaseCount.query("count < @n_min_obs")['disease'].tolist()
        if len(removeDisease) > 0:
            print(f"{', '.join(removeDisease)} include less than {n_min_obs}. They won't be considered")
    
        actsPS_covar_df_filt = actsPS_covar_df.query("disease not in @removeDisease")
        actsPS_covar_df_filt['disease'] = actsPS_covar_df_filt['disease'].cat.remove_unused_categories()
        actsPS_covar_df_filt['chemistry'] = actsPS_covar_df_filt['chemistry'].cat.remove_unused_categories()    
        
        # Fit Mixed Linear Model per each factor
        for factor in factor_list:
            formula = f'Q("{factor}")~ C(disease, Treatment(reference="healthy")) + 'f'C(chemistry)'

            try:
                model = smf.mixedlm(formula, actsPS_covar_df_filt, groups=actsPS_covar_df_filt['studyID']).fit(method=methodList, maxiter=1000) 
        
                summary_table = model.summary().tables[1]
                summary_table = summary_table.loc[summary_table.index.str.contains("C\(disease,", regex=True)]
                summary_table = summary_table.applymap(pd.to_numeric, errors='ignore')
                
                summary_table["AnnotationLevel"] = "Level2"
                summary_table["CellType_Level1"] = celltype1
                summary_table["CellType_Level2"] = celltype2
                summary_table["FactorName"] = factor
                summary_table["disease"] = summary_table.index.str.extract(r'C\(disease, Treatment\(reference="healthy"\)\)\[T\.(.*)\]')[0].to_list()
                summary_table = summary_table.dropna(subset=['P>|z|'])
                
                summary_table_list.append(summary_table)

                print(f"{factor} done")
                
            except Exception as error:
                
                print(f"ERROR: Not computing mixedLM for {celltype2} and {factor} due to {error} error.")
                print("probably due to:")
                print(actsPS_covar_df_filt.value_counts(['disease','chemistry']).reset_index().sort_values('disease'))
                
            print("",end='\n')
        try:
            summary_table_byCell = pd.concat(summary_table_list)
        except Exception as error:
            print(f"ERROR: Not concatenating mixedLM for {celltype2} due to {error} error.")

        
        # Correct for multiple Factor testing
        corrected_pvalues = smm.fdrcorrection(summary_table_byCell['P>|z|'], method='indep', alpha=0.05, is_sorted=False)[1]
        summary_table_byCell['Pval_adj'] = corrected_pvalues
    
    # Append results
    summary_UMLeval_list.append(summary_table_byCell)

summary_table_UMLeval = pd.concat(summary_UMLeval_list)
summary_table_UMLeval.to_csv(here('03_downstream_analysis/06_inflammation_signatures/results/DecoupleR_ulmestimates_mixedmlEval_L2_Corr.csv'))

# Filtering
summary_table_UMLeval = summary_table_UMLeval[summary_table_UMLeval['Pval_adj'] < 0.05]
summary_table_UMLeval.to_csv(here('03_downstream_analysis/06_inflammation_signatures/results/DecoupleR_ulmestimates_mixedmlEval_L2_Corr_filt.csv'))

B
B_IFNresponder
HIV, COPD, RA, SLE, cirrhosis include less than 3. They won't be considered
10-X-B-X-cytokine_and_receptors_proinflammatory done

11-X-B-X-cytokine_andreceptors_antiinflammatory done

12-X-B-X-IFN_Type_1_2_Lambda done

13-X-B-X-IFN_response done

14-X-B-X-TNF_receptors_ligands done

15-X-B-X-adhesion_molecules done

16-X-B-X-antigen_presentation_molecules done

7-X-B-X-effector done

8-X-B-X-chemokines done

9-X-B-X-chemokine_receptors done

B_Memory_ITGAX
HNSCC, flu, COPD, BRCA include less than 3. They won't be considered
10-X-B-X-cytokine_and_receptors_proinflammatory done

11-X-B-X-cytokine_andreceptors_antiinflammatory done

12-X-B-X-IFN_Type_1_2_Lambda done

13-X-B-X-IFN_response done

14-X-B-X-TNF_receptors_ligands done

15-X-B-X-adhesion_molecules done

16-X-B-X-antigen_presentation_molecules done

7-X-B-X-effector done

8-X-B-X-chemokines done

9-X-B-X-chemokine_receptors done

B_Memory_switched
10-X-B-X-cytokine_and_receptors_proinflammatory done

11-X-B-X-cy



10-X-B-X-cytokine_and_receptors_proinflammatory done

11-X-B-X-cytokine_andreceptors_antiinflammatory done

12-X-B-X-IFN_Type_1_2_Lambda done

13-X-B-X-IFN_response done

14-X-B-X-TNF_receptors_ligands done

15-X-B-X-adhesion_molecules done

16-X-B-X-antigen_presentation_molecules done

7-X-B-X-effector done

8-X-B-X-chemokines done

9-X-B-X-chemokine_receptors done

B_Naive
10-X-B-X-cytokine_and_receptors_proinflammatory done

11-X-B-X-cytokine_andreceptors_antiinflammatory done

12-X-B-X-IFN_Type_1_2_Lambda done

13-X-B-X-IFN_response done

14-X-B-X-TNF_receptors_ligands done

15-X-B-X-adhesion_molecules done

16-X-B-X-antigen_presentation_molecules done

7-X-B-X-effector done

8-X-B-X-chemokines done

9-X-B-X-chemokine_receptors done

B_Naive_activated
flu include less than 3. They won't be considered
10-X-B-X-cytokine_and_receptors_proinflammatory done

11-X-B-X-cytokine_andreceptors_antiinflammatory done

12-X-B-X-IFN_Type_1_2_Lambda done

13-X-B-X-IFN_response done

14-X-B-X-TNF_



56-X-Plasma-X-IFN_response done

57-X-Plasma-X-TNF_receptors_ligands done

58-X-Plasma-X-adhesion_molecules done

59-X-Plasma-X-antigen_presentation_molecules done

Plasma_IGHA
BRCA, asthma, PSA, MS, COPD include less than 3. They won't be considered




51-X-Plasma-X-chemokines done

52-X-Plasma-X-chemokine_receptors done

53-X-Plasma-X-cytokine_and_receptors_proinflammatory done

54-X-Plasma-X-cytokine_andreceptors_antiinflammatory done

55-X-Plasma-X-IFN_Type_1_2_Lambda done





56-X-Plasma-X-IFN_response done

57-X-Plasma-X-TNF_receptors_ligands done

58-X-Plasma-X-adhesion_molecules done





59-X-Plasma-X-antigen_presentation_molecules done

Plasma_IGHG
CD, CRC, HBV, BRCA, PSA include less than 3. They won't be considered
51-X-Plasma-X-chemokines done

52-X-Plasma-X-chemokine_receptors done





53-X-Plasma-X-cytokine_and_receptors_proinflammatory done

54-X-Plasma-X-cytokine_andreceptors_antiinflammatory done

55-X-Plasma-X-IFN_Type_1_2_Lambda done

56-X-Plasma-X-IFN_response done

57-X-Plasma-X-TNF_receptors_ligands done

58-X-Plasma-X-adhesion_molecules done

59-X-Plasma-X-antigen_presentation_molecules done

DC
DC4
BRCA, HNSCC, NPC, asthma, sepsis, MS, UC include less than 3. They won't be considered
18-X-DC-X-DC_antigen-crosspresentation done

19-X-DC-X-chemokines done

20-X-DC-X-chemokine_receptors done

21-X-DC-X-cytokine_and_receptors_proinflammatory done

22-X-DC-X-cytokine_andreceptors_antiinflammatory done

23-X-DC-X-IFN_Type_1_2_Lambda done





24-X-DC-X-IFN_response done

25-X-DC-X-TNF_receptors_ligands done

26-X-DC-X-adhesion_molecules done

27-X-DC-X-antigen_presentation_molecules done

28-X-DC-X-global_all_TNF-via-NFkB_signaling done

cDC3
NPC, HIV include less than 3. They won't be considered
18-X-DC-X-DC_antigen-crosspresentation done

19-X-DC-X-chemokines done

20-X-DC-X-chemokine_receptors done

21-X-DC-X-cytokine_and_receptors_proinflammatory done

22-X-DC-X-cytokine_andreceptors_antiinflammatory done

23-X-DC-X-IFN_Type_1_2_Lambda done

24-X-DC-X-IFN_response done

25-X-DC-X-TNF_receptors_ligands done

26-X-DC-X-adhesion_molecules done

27-X-DC-X-antigen_presentation_molecules done

28-X-DC-X-global_all_TNF-via-NFkB_signaling done

cDC2
flu, HIV include less than 3. They won't be considered
18-X-DC-X-DC_antigen-crosspresentation done

19-X-DC-X-chemokines done

20-X-DC-X-chemokine_receptors done

21-X-DC-X-cytokine_and_receptors_proinflammatory done

22-X-DC-X-cytokine_andreceptors_antiinflammatory done

23-X-DC-X-



24-X-DC-X-IFN_response done

25-X-DC-X-TNF_receptors_ligands done

26-X-DC-X-adhesion_molecules done

27-X-DC-X-antigen_presentation_molecules done

28-X-DC-X-global_all_TNF-via-NFkB_signaling done

DC5
COVID, HBV, sepsis include less than 3. They won't be considered
18-X-DC-X-DC_antigen-crosspresentation done

19-X-DC-X-chemokines done

20-X-DC-X-chemokine_receptors done





21-X-DC-X-cytokine_and_receptors_proinflammatory done

22-X-DC-X-cytokine_andreceptors_antiinflammatory done

23-X-DC-X-IFN_Type_1_2_Lambda done

24-X-DC-X-IFN_response done

25-X-DC-X-TNF_receptors_ligands done

26-X-DC-X-adhesion_molecules done

27-X-DC-X-antigen_presentation_molecules done

28-X-DC-X-global_all_TNF-via-NFkB_signaling done

Mono
Mono_IFNresponse
HIV include less than 3. They won't be considered
39-X-Mono-X-IFNG_response done

40-X-Mono-X-IL4-IL13_response done

41-X-Mono-X-chemokines done

42-X-Mono-X-chemokine_receptors done

43-X-Mono-X-cytokine_and_receptors_proinflammatory done

44-X-Mono-X-cytokine_andreceptors_antiinflammatory done

45-X-Mono-X-IFN_Type_1_2_Lambda done

46-X-Mono-X-IFN_response done

47-X-Mono-X-TNF_receptors_ligands done

48-X-Mono-X-adhesion_molecules done

49-X-Mono-X-antigen_presentation_molecules done

50-X-Mono-X-global_all_TNF-via-NFkB_signaling done

Mono_classical
39-X-Mono-X-IFNG_response done

40-X-Mono-X-IL4-IL13_response done

41-X



39-X-Mono-X-IFNG_response done

40-X-Mono-X-IL4-IL13_response done

41-X-Mono-X-chemokines done

42-X-Mono-X-chemokine_receptors done

43-X-Mono-X-cytokine_and_receptors_proinflammatory done

44-X-Mono-X-cytokine_andreceptors_antiinflammatory done

45-X-Mono-X-IFN_Type_1_2_Lambda done

46-X-Mono-X-IFN_response done

47-X-Mono-X-TNF_receptors_ligands done

48-X-Mono-X-adhesion_molecules done

49-X-Mono-X-antigen_presentation_molecules done

50-X-Mono-X-global_all_TNF-via-NFkB_signaling done

Mono_regulatory
MS, NPC, HIV include less than 3. They won't be considered
39-X-Mono-X-IFNG_response done

40-X-Mono-X-IL4-IL13_response done

41-X-Mono-X-chemokines done

42-X-Mono-X-chemokine_receptors done

43-X-Mono-X-cytokine_and_receptors_proinflammatory done

44-X-Mono-X-cytokine_andreceptors_antiinflammatory done

45-X-Mono-X-IFN_Type_1_2_Lambda done

46-X-Mono-X-IFN_response done

47-X-Mono-X-TNF_receptors_ligands done

48-X-Mono-X-adhesion_molecules done

49-X-Mono-X-antigen_presentation_m



82-X-T_CD4_NonNaive-X-chemokine_receptors done

83-X-T_CD4_NonNaive-X-cytokine_and_receptors_proinflammatory done

84-X-T_CD4_NonNaive-X-cytokine_andreceptors_antiinflammatory done

85-X-T_CD4_NonNaive-X-IFN_Type_1_2_Lambda done

86-X-T_CD4_NonNaive-X-IFN_response done

87-X-T_CD4_NonNaive-X-TNF_receptors_ligands done

88-X-T_CD4_NonNaive-X-adhesion_molecules done

89-X-T_CD4_NonNaive-X-antigen_presentation_molecules done

Tregs
76-X-T_CD4_NonNaive-X-CD4T_TH17_UP done

77-X-T_CD4_NonNaive-X-CD4T_TH2_UP done

78-X-T_CD4_NonNaive-X-CD4T_TFH_UP done

79-X-T_CD4_NonNaive-X-CD4T_TH1_UP done

80-X-T_CD4_NonNaive-X-Tregs_FoxP3_stabilization done

81-X-T_CD4_NonNaive-X-chemokines done

82-X-T_CD4_NonNaive-X-chemokine_receptors done

83-X-T_CD4_NonNaive-X-cytokine_and_receptors_proinflammatory done

84-X-T_CD4_NonNaive-X-cytokine_andreceptors_antiinflammatory done

85-X-T_CD4_NonNaive-X-IFN_Type_1_2_Lambda done

86-X-T_CD4_NonNaive-X-IFN_response done

87-X-T_CD4_NonNaive-X-TNF_receptors_ligand



106-X-T_CD8_NonNaive-X-chemokine_receptors done

ERROR: Not computing mixedLM for T_CD8_arrested and 107-X-T_CD8_NonNaive-X-cytokine_and_receptors_proinflammatory due to Singular matrix error.
probably due to:
   disease chemistry  count
4       CD  3_GEX_V3      4
1      HBV  5_GEX_V1      6
0       RA  3_GEX_V3      8
3      SLE  3_GEX_V2      5
6      SLE  3_GEX_V3      2
2       UC  3_GEX_V3      6
5  healthy  5_GEX_V1      3

ERROR: Not computing mixedLM for T_CD8_arrested and 108-X-T_CD8_NonNaive-X-cytokine_andreceptors_antiinflammatory due to Singular matrix error.
probably due to:
   disease chemistry  count
4       CD  3_GEX_V3      4
1      HBV  5_GEX_V1      6
0       RA  3_GEX_V3      8
3      SLE  3_GEX_V2      5
6      SLE  3_GEX_V3      2
2       UC  3_GEX_V3      6
5  healthy  5_GEX_V1      3

ERROR: Not computing mixedLM for T_CD8_arrested and 109-X-T_CD8_NonNaive-X-IFN_Type_1_2_Lambda due to Singular matrix error.
probably due to:
   disease chemistry  count
4       C



120-X-UTC-X-IFN_response done

121-X-UTC-X-TNF_receptors_ligands done

122-X-UTC-X-adhesion_molecules done

123-X-UTC-X-antigen_presentation_molecules done



In [7]:
session_info.show()