In [None]:
# Visualisation of COVID-19 specific DMPs

In [None]:
import os

import numpy as np
import pandas as pd

from scipy import stats

from src.figures import *
from src.col_palette import pal

In [None]:
manifest = pd.read_parquet(os.environ.get("POETRY_EPIC"))
genes = pd.read_parquet(os.environ.get("POETRY_EPIC_genes"))

samplesheet = pd.read_csv("../data/raw/SampleSheet.csv", index_col=0)
samplesheet = samplesheet[samplesheet["ICU"] != "Home"]

mynorm = pd.read_parquet(
    "../data/processed/CorrectedMyNorms/mynorm.parquet", columns=samplesheet.index
)

In [None]:
dmps = pd.read_csv("../Files/COVSpecificDMPs.csv", index_col=0)
dmps

In [None]:
mynorm = mynorm.loc[dmps.index, :].T
mynorm

In [None]:
mynorm = pd.concat((mynorm, samplesheet["Status"]), axis=1)
mynorm

In [None]:
df = mynorm.melt(
    id_vars="Status",
    value_vars=dmps.index,
    var_name="CpG",
    value_name="β-values",
)

In [None]:
for idx, row in df.iterrows():
    cpg = row["CpG"]
    try:
        gene = " ".join(set(dmps.loc[cpg, "UCSC_RefGene_Name"].split(";")))
    except AttributeError:
        gene = ""

    df.loc[idx, "CpG"] = f"{gene} {cpg}"
df = df.sort_values(by="CpG")
df

In [None]:
boxplot(
    df.dropna(),
    color_column="Status",
    y="β-values",
    facet_col="CpG",
    facet_col_wrap=4,
    height=1000,
    width=1600,
    axis_title_font_size=20,
    marker_size=8,
    facet_font_size=20,
    tick_font_size=14,
    category_orders={
        "Status": [
            "COVID-19 PL",
            "COVID-19 ES",
            "COVID-19 USA 1",
            "COVID-19 USA 2",
            "Healthy controls",
            "Other-infections USA 1",
            "Other-infections USA 2",
        ]
    },
    path="../Plots/COV_specyfic_DMPs_noCorrectedData.png",
)