In [None]:
# Visualization of methylation and expression between COVID-19 and non-COIVD-19 patients

In [None]:
from IPython.display import display
import os

from plotly.subplots import make_subplots
from sklearn.preprocessing import StandardScaler


import plotly.graph_objects as go
import plotly.express as px
import scipy.stats as sts
import seaborn as sns
import pandas as pd

from src.utils import extract, extract_probes_in_specific_region
from src.col_palette import pal
from src.figures import *

In [None]:
# Load mynorm

In [None]:
samplesheet = pd.read_csv("../data/raw/SampleSheet.csv", index_col=0)
samplesheet.Status.unique()

In [None]:
samplesheet = samplesheet[
    (samplesheet.Status == "COVID-19 USA 1")
    | (samplesheet.Status == "Other respiratory infections USA 1")
]

usa_myNorm = pd.read_parquet(
    "../data/processed/CorrectedMyNorms/mynorm.parquet",
    columns=samplesheet.index.tolist(),
)
usa_myNorm

In [None]:
# Load differetntial methylated genes

In [None]:
epic = pd.read_parquet(os.environ.get("POETRY_EPIC"))[
    ["UCSC_RefGene_Name", "UCSC_RefGene_Group"]
]

In [None]:
genes_set = pd.read_csv("../Files/DEGs_COV_OI.csv", index_col=0)["Gene"].tolist()
genes_set

In [None]:
# Load expression data

In [None]:
ttable = pd.read_table("../Expression/GSE157103_genes.tpm.tsv", index_col=0).T

In [None]:
# Convert sample names in mynorm

In [None]:
sample_names = pd.read_csv(
    "../data/Annotations/GSE174818.csv", index_col=0
).description.map(lambda x: x.split(",")[1])

sample_names = dict(zip(sample_names.index, sample_names.values))

In [None]:
# Plot methylation vs expression

In [None]:
usa_myNorm.columns = usa_myNorm.columns.map(sample_names)
usa_myNorm

In [None]:
probes = (
    pd.read_csv("../Files/COVSpecificGenes.csv", index_col=0).dropna().Genes.to_frame()
)

probes = probes[probes.Genes.isin(genes_set)]
probes

In [None]:
df = pd.concat(
    (
        ttable[set(ttable.columns).intersection(set(probes.Genes.values))],
        usa_myNorm.loc[set(probes.index), :].T,
    ),
    axis=1,
).dropna()

df["TYPE"] = [
    "COVID-19 USA 1" if name.startswith("C") else "Other respiratory infections USA 1"
    for name in df.index
]  # Add TYPE column with disease status
df

In [None]:
titles = zip(probes.Genes, probes.index)
titles = [" ".join(name) for name in titles]
len(titles)

In [None]:
df.TYPE.unique()

In [None]:
fig = make_subplots(rows=2, cols=8, column_titles=titles, horizontal_spacing=0.027)

idx = 1
for cpg, gene in probes.iterrows():

    gene = gene.squeeze()
    if gene not in df.columns:
        continue

    if idx == 1:
        show_legend = True
    else:
        show_legend = False

    for patient_type in df.TYPE.unique():
        temp_data = df[df["TYPE"] == patient_type]

        # Add expression box
        fig.add_trace(
            go.Box(
                x=[patient_type] * len(temp_data[gene]),
                y=temp_data[gene],
                boxmean=True,
                marker=dict(size=8),
                marker_color=pal.get(patient_type),
                showlegend=False,
            ),
            row=1,
            col=idx,
        )

        # Add scatter
        fig.add_trace(
            go.Scatter(
                x=temp_data[cpg],
                y=temp_data[gene],
                mode="markers",
                name=patient_type,
                marker=dict(size=8),
                marker_color=pal.get(patient_type),
                showlegend=show_legend,
            ),
            row=2,
            col=idx,
        )

    idx += 1

fig.update_layout(
    height=750, width=1800, title_text="", legend_font_size=19, font=(dict(size=16))
)

fig.update_yaxes(title_text="Expression levels [TPM]", row=1, col=1)

fig.update_yaxes(title_text="Expression levels [TPM]", row=2, col=1)
fig.update_xaxes(title_text="Methylation levels [β-values]", row=2, col=1)

fig.for_each_xaxis(lambda axis: axis.title.update(font=dict(size=16)))
fig.for_each_yaxis(lambda axis: axis.title.update(font=dict(size=16)))

for col in range(1, 10):
    fig.update_xaxes(title_text="", showticklabels=False, row=1, col=col)


fig.write_image("../Plots/ExpressionMethylationLevels_COV_VS_OI.png", scale=2)
fig.show(renderer="browser")