In [1]:
import pandas as pd

pd.set_option("display.precision", 3)
import warnings

warnings.filterwarnings("ignore")
from sklearn.cluster import KMeans
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN

import math
import seaborn as sns
from IPython.core.display import display

sns.set_style("white")

In [2]:
Cov = "_CovNotStandardized"
data_path = "data{}/".format(Cov)
score_path = "{}/Clustering_silhouette/".format(data_path)
cdf_path = "{}/CDF plots/".format(data_path)
tsne_path = "{}/TSNEplots/".format(data_path)

physio_view_original = pd.read_csv("data/PhysioView.csv", index_col=0)
physio_view = pd.read_csv("data/PhysioViewNormalImputed.csv", index_col=0)

contextual_view = pd.read_csv("data/ContextualViewZeroImputed.csv", index_col=0)
clinical_view = pd.concat([physio_view, contextual_view], 1)

data_episode = pd.read_csv('../olinks/spss_mlcb_olink_episode.csv', index_col=0)
data_episode = data_episode.loc[clinical_view.index]

clinical_view[["death.30.bc", "picu"]] = data_episode[["death.30.bc", "picu"]].replace(['no', 'yes'], [0, 1])

In [3]:
def get_characteristic(clinical_view, assignment):
    data = clinical_view.copy()
    data["cluster"] = assignment["assignment"]
    clinical_view["cluster"] = assignment["assignment"]

    cons = [
        "cons05.resp",
        "cons05.cvs",
        "cons05.cns",
        "cons05.ren",
        "cons05.hep",
        "cons05.hem",
    ]
    overall_mean = clinical_view[
        cons
        + ["cons05.score", "ccc.summary", "age.at.bc", "sex", "death.30.bc", "picu"]
    ].mean()

    report = clinical_view.groupby("cluster").mean()[
        cons
        + ["cons05.score", "ccc.summary", "age.at.bc", "sex", "death.30.bc", "picu"]
    ]
    report["size"] = clinical_view.groupby("cluster").size().astype(str)
    report.loc["overall"] = overall_mean.tolist() + [str(len(clinical_view))]
    pd.set_option("display.precision", 3)
    display(report.T)

# Clinical view

In [7]:
KCC_space = 4
view = "clinical"
assignment = pd.read_csv(
    "{}/ConsensusKMeans_{}_view_KCC_{}_assignments.csv".format(
        score_path, view, KCC_space
    ),
    index_col=0,
)

get_characteristic(clinical_view, assignment)

cluster,1,2,3,4,overall
cons05.resp,0.013,0.233,0.96,0.148,0.222
cons05.cvs,0.069,0.086,0.56,0.197,0.158
cons05.cns,0.019,0.078,0.4,0.049,0.09
cons05.ren,0.013,0.034,0.12,0.016,0.034
cons05.hep,0.062,0.043,0.14,0.115,0.075
cons05.hem,0.175,0.069,0.32,0.393,0.196
cons05.score,0.35,0.543,2.5,0.918,0.775
ccc.summary,0.631,0.509,1.26,1.361,0.791
age.at.bc,1783.931,1100.784,586.8,1594.934,1394.705
sex,0.606,0.647,0.64,0.623,0.625


# Contextual 

In [8]:
KCC_space = 5
view = "contextual"
assignment = pd.read_csv(
    "{}/ConsensusKMeans_{}_view_KCC_{}_assignments.csv".format(
        score_path, view, KCC_space
    ),
    index_col=0,
)

get_characteristic(clinical_view, assignment)

cluster,1,2,3,4,5,overall
cons05.resp,0.231,0.062,0.023,0.419,0.256,0.222
cons05.cvs,0.209,0.012,0.045,0.118,0.359,0.158
cons05.cns,0.066,0.049,0.0,0.086,0.218,0.09
cons05.ren,0.022,0.0,0.068,0.065,0.026,0.034
cons05.hep,0.088,0.025,0.045,0.086,0.115,0.075
cons05.hem,0.176,0.062,0.0,0.28,0.372,0.196
cons05.score,0.791,0.21,0.182,1.054,1.346,0.775
ccc.summary,1.0,0.333,0.295,1.043,1.0,0.791
age.at.bc,807.286,2296.457,670.25,1215.495,1765.936,1394.705
sex,0.626,0.58,0.705,0.602,0.654,0.625


# physio

In [None]:
KCC_space = 3
view = "contextual"
assignment = pd.read_csv(
    "{}/DBSCAN_{}_view_KCC_{}_assignments.csv".format(
        score_path, view, KCC_space
    ),
    index_col=0,
)

get_characteristic(clinical_view, assignment)

# Proteome view

In [4]:
KCC_space = 3
view = "proteome"
assignment = pd.read_csv(
    "{}/ConsensusKMeans_{}_view_KCC_{}_assignments.csv".format(
        score_path, view, KCC_space
    ),
    index_col=0,
)

get_characteristic(clinical_view, assignment)

cluster,1,2,3,overall
cons05.resp,0.186,0.137,0.39,0.222
cons05.cvs,0.128,0.115,0.26,0.158
cons05.cns,0.045,0.069,0.19,0.09
cons05.ren,0.038,0.015,0.05,0.034
cons05.hep,0.109,0.023,0.09,0.075
cons05.hem,0.314,0.053,0.2,0.196
cons05.score,0.821,0.412,1.18,0.775
ccc.summary,0.737,0.71,0.98,0.791
age.at.bc,1672.667,1365.0,1000.0,1394.705
sex,0.622,0.641,0.61,0.625


In [5]:
KCC_space = 4
view = "proteome"
assignment = pd.read_csv(
    "{}/ConsensusKMeans_{}_view_KCC_{}_assignments.csv".format(
        score_path, view, KCC_space
    ),
    index_col=0,
)

get_characteristic(clinical_view, assignment)

cluster,1,2,3,4,overall
cons05.resp,0.297,0.158,0.337,0.09,0.222
cons05.cvs,0.132,0.142,0.235,0.115,0.158
cons05.cns,0.066,0.083,0.163,0.038,0.09
cons05.ren,0.044,0.008,0.071,0.013,0.034
cons05.hep,0.066,0.025,0.071,0.167,0.075
cons05.hem,0.11,0.05,0.173,0.551,0.196
cons05.score,0.714,0.467,1.051,0.974,0.775
ccc.summary,0.659,0.758,0.888,0.872,0.791
age.at.bc,1060.0,1234.05,1167.449,2317.885,1394.705
sex,0.637,0.608,0.643,0.615,0.625
