## TF activity differential analysis between mild and severe cells in cMono cell type

In [1]:
import tensorflow as tf
import anndata
import h5py
import numpy as np
import math
import scipy
import scipy.sparse as sparse
from scipy.sparse import csr_matrix
import scanpy as sc
import copy
import pandas as pd
from scipy.stats import mannwhitneyu
from statsmodels.stats.multitest import multipletests
import seaborn as sns
import matplotlib.pyplot as plt

In [5]:
path = '../../'
out_dir = "./"

In [6]:
raw = sc.read_h5ad(f'{path}/2_TFactivity/activity_out/covid19_tfactivity.h5ad')
obj = raw.copy()
obj.obs['celltypeL0'] = obj.obs['celltypeL0'].replace(
    {'CD163.cMono': 'cMono'}
)
df = pd.DataFrame(raw.X, index=raw.obs_names, columns=raw.var_names)

In [7]:
def calculate_pvalues(obj, celltype, tf):
    subset = obj[obj.obs["celltypeL0"] == celltype]
    mild = subset[subset.obs["Severity"] == "mild"].to_df()[tf]
    severe = subset[subset.obs["Severity"] == "severe"].to_df()[tf]
    if len(mild) > 3 and len(severe) > 3:
        stat, p = mannwhitneyu(mild, severe, alternative="two-sided")
        return p
    else:
        return np.nan

In [8]:
results = []
celltypes = obj.obs["celltypeL0"].unique()
tfs = obj.var_names

for celltype in celltypes:
    for tf in tfs:
        pval = calculate_pvalues(obj, celltype, tf)
        results.append({
            "celltype": celltype,
            "tf": tf,
            "pval": pval
        })

results_df = pd.DataFrame(results)

results_df["pval_adj"] = np.nan 
for celltype in celltypes:
    mask = results_df["celltype"] == celltype
    pvals = results_df.loc[mask, "pval"].dropna()
    
    if len(pvals) > 0:
        _, pvals_adj, _, _ = multipletests(pvals, method="fdr_bh")
        results_df.loc[mask, "pval_adj"] = pvals_adj

In [9]:
## calculate difference size
df_zscore = pd.DataFrame(
    obj.X,
    index=obj.obs_names,
    columns=obj.var_names
)
df_zscore["celltypeL0"] = obj.obs["celltypeL0"]
df_zscore["Severity"] = obj.obs["Severity"]
grouped_mean = df_zscore.groupby(["celltypeL0", "Severity"]).mean()
mild_mean = grouped_mean.xs("mild", level="Severity")
severe_mean = grouped_mean.xs("severe", level="Severity")
effect_sizes = severe_mean - mild_mean

es_long = effect_sizes.stack().reset_index()
es_long.columns = ['celltype', 'tf', 'mean_difference']

results_df = results_df.merge(es_long, on=['celltype', 'tf'], how='left')

In [11]:
cMono_df = results_df[results_df['celltype']=='cMono']
cMono_df.to_csv('./DAresults_mildVSsevere_incMono.csv', index=False)

In [13]:
results_df

Unnamed: 0,celltype,tf,pval,pval_adj,mean_difference
0,cMono,ALX1,6.411295e-38,8.709221e-38,-0.000132
1,cMono,ALX3,3.080985e-01,3.148992e-01,0.000399
2,cMono,ALX4,1.653847e-01,1.708387e-01,0.000413
3,cMono,ANHX,5.501032e-76,9.929828e-76,-0.000435
4,cMono,AR,1.114455e-96,2.631696e-96,-0.001193
...,...,...,...,...,...
9580,mDC,ZSCAN4,5.633650e-01,7.139757e-01,0.000455
9581,mDC,ZSCAN5,4.490377e-01,6.575762e-01,0.000315
9582,mDC,ZSCAN5C,7.337668e-06,4.551659e-04,0.001042
9583,mDC,ZSCAN9,4.922153e-01,6.670609e-01,0.003969


### Add the correlation data between TF activity and TF expression in classical Monocytes

In [15]:
mcoor = pd.read_csv(f'{path}/3_mono/1_cMono/pearsonr_TFactivity_TFexpr_incMono.csv')
merged_df = pd.merge(
    left=cMono_df,
    right=mcoor[["TF", "Correlation",'pval_adj']].rename(columns={"TF": "tf",'pval_adj':'corr_padj'}),
    on="tf",
    how="left"
)
print(merged_df.shape)
merged_df["corr_padj"] = merged_df["corr_padj"].fillna(0).astype(float)  
merged_df["Correlation"] = merged_df["Correlation"].fillna(0).astype(float) 
merged_df.to_csv(f'{out_dir}/DAresults_mildVSsevere_incMono_add-corr.csv')

(1065, 7)
