In [None]:
import pandas as pd
import statsmodels.api as sts
from statsmodels.stats.multitest import multipletests as mp

from src.figures import boxplot
from src.col_palette import pal

In [None]:
samplesheet = pd.read_csv("../data/raw/SampleSheet.csv", index_col=0)

samplesheet = samplesheet[samplesheet.ICU.isin(["non-ICU", "ICU"])]
samplesheet = samplesheet[samplesheet.Status.str.contains("COVID")]
samplesheet = samplesheet[~samplesheet.Status.str.contains("PL")]

samplesheet.Sex = samplesheet.Sex.replace({"F": 1, "M": 0})
samplesheet.ICU = samplesheet.ICU.replace({"non-ICU": 0, "ICU": 1})
samplesheet["intercept"] = 1
samplesheet

In [None]:
samplesheet.ICU.value_counts()

In [None]:
mynorm = pd.read_parquet(
    "../data/processed/CorrectedMyNorms/mynorm.parquet", columns=samplesheet.index
)

In [None]:
cpgs = pd.read_csv("../Files/COVSpecificDMPs.csv", index_col=0).index
mynorm = mynorm.loc[cpgs, :]

In [None]:
frames = []
for group in samplesheet.Status.unique():

    samples = samplesheet[samplesheet.Status == group].index
    temp_mynorm = mynorm[samples].T
    results_df = []

    for cpg in cpgs:

        temp = pd.concat(
            (
                temp_mynorm[cpg],
                samplesheet.loc[samples, ["intercept", "Sex", "Age", "ICU"]],
            ),
            axis=1,
        )

        model = sts.Logit(
            endog=temp["ICU"], exog=temp[["intercept", "Sex", "Age", cpg]]
        )

        model = model.fit()
        pval = model.pvalues.loc[cpg]

        record = {"CpG": cpg, "Group": group, "p-value": pval}
        results_df.append(record)

    results_df = pd.DataFrame(results_df)
    _, results_df["FDR"], _, _ = mp(results_df["p-value"], method="fdr_bh")
    frames.append(results_df)

results = pd.concat(frames)

In [None]:
selected_cpgs = results[results["FDR"] <= 0.05].CpG.value_counts()
selected_cpgs = selected_cpgs[selected_cpgs == 3].index

In [None]:
df = pd.concat((mynorm.T[selected_cpgs], samplesheet[["ICU"]]), axis=1)
df = df.melt("ICU", var_name="CpG", value_name="β-value")
df.ICU = df.ICU.replace({0: "non-ICU", 1: "ICU"})
df

In [None]:
boxplot(
    df,
    facet_font_size=20,
    y="β-value",
    facet_col="CpG",
    color_column="ICU",
    tick_font_size=18,
    color_discrete_map=pal,
    y_range=[0, 1],
    path="../Plots/ICUassociatedProbes.png",
)