In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from venn import venn

import os
import plotly.express as px
import matplotlib.pyplot as plt
from matplotlib.patches import Patch

from sklearn.manifold import TSNE

from src.figures import *
from src.col_palette import pal

In [None]:
sampleSheet = pd.read_csv("../data/raw/SampleSheet.csv", index_col=0)

In [None]:
mynorm = pd.read_parquet(
    "../data/processed/CorrectedMyNorms/mynorm.parquet",
)
mynorm

In [None]:
dmps = pd.read_csv("../Files/ExtendedDMPS.csv", index_col=0).index
dmps

In [None]:
def tsne(df: pd.DataFrame, poi_column: str = "Status") -> pd.DataFrame:

    perplexity = min(df[poi_column].value_counts())
    print("Perplexity: ", perplexity)

    tsne = TSNE(n_components=2, method="exact", random_state=101, perplexity=perplexity)
    deco = tsne.fit_transform(df.drop(poi_column, axis=1))
    deco = pd.DataFrame(deco, index=df.index, columns=["t-SNE 1", "t-SNE 2"])

    return pd.concat((deco, df[poi_column]), axis=1)

In [None]:
# COVID-19 vs Other respiratory infections USA 1

In [None]:
sampleSheet.Status.unique()

In [None]:
df_ori_1 = pd.concat(
    (
        mynorm.T[dmps],
        sampleSheet[
            sampleSheet.Status.isin(
                [
                    "COVID-19 USA 1",
                    "Other respiratory infections USA 1",
                ]
            )
        ]["Status"],
    ),
    axis=1,
).dropna()
df_ori_1.shape

In [None]:
scatterplot(
    tsne(df_ori_1),
    x="t-SNE 1",
    y="t-SNE 2",
    color_column="Status",
    color_discrete_map=pal,
    labels={"Other respiratory infection USA 1": "Other resp. inf. USA 1"},
    path="../Plots/Cov_vs_nonCOV_extended_CpG.png",
    trendline=None,
)

In [None]:
clustermap(
    df_ori_1, poi_columns=["Status"], colors_palette=pal, path="../Plots/Ext_USA1.png"
)

In [None]:
# COVID-19 vs Other respiratory infections USA 2

In [None]:
df_ori_2 = pd.concat(
    (
        mynorm.T[dmps],
        sampleSheet[
            sampleSheet.Status.isin(
                [
                    "COVID-19 USA 2",
                    "Other respiratory infections USA 2",
                ]
            )
        ]["Status"],
    ),
    axis=1,
).dropna()
df_ori_1.shape

In [None]:
scatterplot(
    tsne(df_ori_2),
    x="t-SNE 1",
    y="t-SNE 2",
    color_column="Status",
    color_discrete_map=pal,
    path="../Plots/Cov_vs_nonCOV_extendedCpGs.png",
    trendline=None,
)

In [None]:
clustermap(
    df_ori_2, poi_columns=["Status"], colors_palette=pal, path="../Plots/Ext_USA2.png"
)