In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from venn import venn

import os
import plotly.express as px
import matplotlib.pyplot as plt
from matplotlib.patches import Patch

from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.metrics import fowlkes_mallows_score

from scipy.cluster.hierarchy import linkage, fcluster, dendrogram
import matplotlib.pyplot as plt

from src.figures import *
from src.col_palette import pal

In [None]:
sampleSheet = pd.read_csv("../data/raw/SampleSheet.csv", index_col=0)
sampleSheet = sampleSheet[
    (sampleSheet.ICU != "Home")]

sampleSheet

In [None]:
mynorm = pd.read_parquet(
    "../data/processed/CorrectedMyNorms/mynorm.parquet",
    columns=sampleSheet.index.tolist(),
)

mynorm

In [None]:
dmps = pd.read_csv("../Files/DMPs_COV_vs_HC.csv", index_col=0).index
dmps

In [None]:
epic = pd.read_parquet(os.environ.get("POETRY_EPIC"))

In [None]:
def tsne(df: pd.DataFrame, poi_column: str = "Status") -> pd.DataFrame:

    perplexity = min(df[poi_column].value_counts())
    print("Perplexity: ", perplexity)

    tsne = TSNE(n_components=2, method="exact", random_state=101, perplexity=perplexity)
    deco = tsne.fit_transform(df.drop(poi_column, axis=1))
    deco = pd.DataFrame(deco, index=df.index, columns=["t-SNE 1", "t-SNE 2"])

    return pd.concat((deco, df[poi_column]), axis=1)


def extract(df):
    return df[(df["Delta mean"].abs() > 0.05) & (df["Adj. p-value"] <= 0.05)]

In [None]:
# All cohorts

In [None]:
df = pd.concat((mynorm.T[dmps], sampleSheet[["Status"]]), axis=1)

df = df[
    df["Status"].isin(
        [
            "COVID-19 PL",
            "COVID-19 ES",
            "COVID-19 USA 1",
            "Healthy controls",
            "COVID-19 USA 2",
        ]
    )
]

df["Status"].unique()

In [None]:
clustermap(
    df,
    poi_columns=["Status"],
    order_legend=[
        "COVID-19 PL",
        "COVID-19 ES",
        "COVID-19 USA 1",
        "COVID-19 USA 2",
        "Healthy controls",
    ],
    colors_palette=pal,
    cbar_pos=(0.02, 0.85, 0.05, 0.18),
    path="../Plots/All_cov.png",
)

In [None]:
scatterplot(
    tsne(df),
    x="t-SNE 1",
    y="t-SNE 2",
    color_column="Status",
    color_discrete_map=pal,
    category_orders={
        "Status": [
            "COVID-19 PL",
            "COVID-19 ES",
            "COVID-19 USA 1",
            "COVID-19 USA 2",
            "Healthy controls",
        ]
    },
    trendline=None,
    path="../Plots/All_CoV_vs_HC.png",
)

In [None]:
# COVID-19 vs Other respiratory infections USA 1

In [None]:
df_ori_1 = pd.concat((mynorm.T[dmps], sampleSheet[["Status"]]), axis=1)
df_ori_1 = df_ori_1.loc[
    sampleSheet[
        (sampleSheet["Status"] == "COVID-19 USA 1")
        | (sampleSheet["Status"] == "Other respiratory infections USA 1")
    ].index
]
df_ori_1.shape

In [None]:
scatterplot(
    tsne(df_ori_1).drop("GSM5331997"),  # drop
    x="t-SNE 1",
    y="t-SNE 2",
    color_column="Status",
    color_discrete_map=pal,
    labels={"Other respiratory infection USA 1": "Other resp. inf. USA 1"},
    path="../Plots/Cov_vs_nonCOV_1.png",
    trendline=None,
)

In [None]:
clustermap(
    df_ori_1.drop("GSM5331997"),
    poi_columns=["Status"],
    colors_palette=pal,
    cbar_pos=(0.02, 0.85, 0.05, 0.18),
    path="../Plots/USA1_cov.png",
)

In [None]:
# COVID-19 vs Other respiratory infections USA 2

In [None]:
df_ori_2 = pd.concat((mynorm.T[dmps], sampleSheet[["Status"]]), axis=1)

df_ori_2 = df_ori_2.loc[
    sampleSheet[
        (sampleSheet["Status"] == "COVID-19 USA 2")
        | (sampleSheet["Status"] == "Other respiratory infections USA 2")
    ].index
]

In [None]:
sampleSheet[
    (sampleSheet["Status"] == "COVID-19 USA 2")
    | (sampleSheet["Status"] == "Other respiratory infections USA 2")
].Status.value_counts()

In [None]:
scatterplot(
    tsne(df_ori_2),
    x="t-SNE 1",
    y="t-SNE 2",
    color_column="Status",
    color_discrete_map=pal,
    path="../Plots/Cov_vs_nonCOV_2.png",
    trendline=None,
)

In [None]:
clustermap(
    df_ori_2,
    poi_columns=["Status"],
    colors_palette=pal,
    cbar_pos=(0.02, 0.85, 0.05, 0.18),
    path="../Plots/USA2_cov.png",
)