In [None]:
# Notebook: Analysis of association between expression and methylation
# Description: Notebook contains analysis of genes expresion [TPMs] and methylation [beta-values] for covid-19 and non-covid-19 patients [USA]
# Data collected from: GSE157103 [expression data] and  GSE174818 [methylation]

In [None]:
from IPython.display import display
import os

from sklearn.preprocessing import StandardScaler
import scipy.stats as sts
import seaborn as sns
import pandas as pd

from src.utils import extract, extract_probes_in_specific_region
from src.col_palette import pal
from src.figures import *

In [None]:
# Load mynorm

In [None]:
samplesheet = pd.read_csv("../data/raw/SampleSheet.csv", index_col=0)
samplesheet.Status.unique()

In [None]:
samplesheet = samplesheet[(samplesheet.Status == "COVID-19 USA 1")]

In [None]:
# Load differetntial methylated genes

In [None]:
epic = pd.read_parquet(os.environ.get("POETRY_EPIC"))[
    ["UCSC_RefGene_Name", "UCSC_RefGene_Group"]
]

In [None]:
genes_set = pd.read_csv("../Files/DEGs_ICU_nonICU.csv", index_col=0).Gene.tolist()
genes_set

In [None]:
# Load expression data

In [None]:
ttable = pd.read_table("../Expression/GSE157103_genes.tpm.tsv", index_col=0).T
ttable = ttable[genes_set]

In [None]:
# Convert sample names in mynorm

In [None]:
sample_names = pd.read_csv(
    "../data/Annotations/GSE174818.csv", index_col=0
).description.map(lambda x: x.split(",")[1])

sample_names = dict(zip(sample_names.index, sample_names.values))

In [None]:
clinical = pd.read_excel(
    "../data/Annotations/GSE174818_supplement.xlsx", index_col=1, skiprows=1
)[["ICU (1=yes)"]]

clinical.columns = ["TYPE"]

df = pd.concat((ttable, clinical), axis=1).dropna()
df.TYPE = df.TYPE.replace({0: "non-ICU", 1: "ICU"})
df = df.loc[[name for name in df.index if name.startswith("C")]]
df

In [None]:
df = df.melt("TYPE", var_name="Gene", value_name="Expression levels [TPM]")
df

In [None]:
boxplot(
    df,
    color_column="TYPE",
    y="Expression levels [TPM]",
    facet_col="Gene",
    color_discrete_map=pal,
    facet_font_size=18,
    tick_font_size=18,
    spacing=0.05,
    width=1200,
    height=400,
    sharey=False,
    path="../Plots/Expression_ICU_NON_ICU.png",
)