In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import colors
from IPython.display import display
import warnings

warnings.filterwarnings("ignore")

In [3]:
def background_gradient(s, m=None, M=None, cmap='Blues', low=0, high=0.5):
    if m is None:
        m = s.min().min()
    if M is None:
        M = s.max().max()
    rng = M - m
    norm = colors.Normalize(m - (rng * low),
                            M + (rng * high))
    normed = s.apply(norm)

    cm = plt.cm.get_cmap(cmap)
    c = normed.applymap(lambda x: colors.rgb2hex(cm(x)))
    ret = c.applymap(lambda x: 'background-color: %s' % x)
    return ret

def get_characteristic(clinical_view, assignment):
    data = clinical_view.copy()
    data["cluster"] = assignment["assignment"]
    clinical_view["cluster"] = assignment["assignment"].astype(int)
    if -1 in assignment["assignment"].tolist():
        clinical_view = clinical_view[clinical_view['cluster'] != -1]

    cons = [
        "cons05.resp",
        "cons05.cvs",
        "cons05.cns",
        "cons05.ren",
        "cons05.hep",
        "cons05.hem",
    ]
    overall_mean = clinical_view[
        cons
        + ["cons05.score", "ccc.summary", "age.at.bc", "sex", "death.30.bc", "picu"]
    ].mean()

    report = clinical_view.groupby("cluster").mean()[
        cons
        + ["cons05.score", "ccc.summary", "age.at.bc", "sex", "death.30.bc", "picu"]
    ]
    report["size"] = clinical_view.groupby("cluster").size().astype(str)
    report.loc["overall"] = overall_mean.tolist() + [str(len(clinical_view))]
    pd.set_option("display.precision", 3)
    display(report.T)

In [4]:

data_path = "data/"
score_path = "{}/Clustering_silhouette/".format(data_path)
cdf_path = "{}/CDF plots/".format(data_path)
tsne_path = "{}/TSNEplots/".format(data_path)
KCC_path = "{}/KCC/".format(data_path)

physio_view_original = pd.read_csv("data/PhysioView.csv", index_col=0)
physio_view = pd.read_csv("data/PhysioViewNormalImputed.csv", index_col=0)

contextual_view = pd.read_csv("data/ContextualViewZeroImputed.csv", index_col=0)
clinical_view = pd.concat([physio_view, contextual_view], 1)

data_episode = pd.read_csv('../olinks/spss_mlcb_olink_episode.csv', index_col=0)
data_episode = data_episode.loc[clinical_view.index]

clinical_view[["death.30.bc", "picu"]] = data_episode[["death.30.bc", "picu"]].replace(['no', 'yes'], [0, 1])

In [5]:
configs = [
    ["clinical", 4, "ConsensusKMeans"],
    ["contextual", 5, "ConsensusKMeans"],
    ["physio", 3, "DBSCAN"],
    ["proteome", 3, "ConsensusKMeans"],
    ["proteome", 4, "ConsensusKMeans"],
]


# cluster overlapping

In [30]:
for i in range(len(configs)):
    for j in range(len(configs)):
        if j > i:
            view, KCC_space, method1 = configs[i]
            assignment1 = pd.read_csv(
                "{}/{}_{}_view_KCC_{}_assignments.csv".format(
                    score_path, method1, view, KCC_space
                ),
                index_col=0,
            )
            if method1 == "DBSCAN":
                assignment1["assignment"] = assignment1["assignment"] + 1
                assignment1 = assignment1[assignment1["assignment"] != 0]

            view, KCC_space, method2 = configs[j]
            assignment2 = pd.read_csv(
                "{}/{}_{}_view_KCC_{}_assignments.csv".format(
                    score_path, method2, view, KCC_space
                ),
                index_col=0,
            )
            if method2 == "DBSCAN":
                assignment2["assignment"] = assignment2["assignment"] + 1
                assignment2 = assignment2[assignment2["assignment"] != 0]

            agreement_mat = pd.DataFrame(
                index=[
                    "{} {}".format(method1, i)
                    for i in sorted(assignment1["assignment"].unique())
                ],
                columns=[
                    "{} {}".format(method2, i)
                    for i in sorted(assignment2["assignment"].unique())
                ],
            )
            print(
                "{} view, KCC {}, {} / {} view, KCC {}, {}".format(
                    *(configs[i] + configs[j])
                )
            )

            for cluster1 in sorted(assignment1["assignment"].unique()):
                for cluster2 in sorted(assignment2["assignment"].unique()):
                    index_1 = assignment1[assignment1["assignment"] == cluster1].index
                    index_2 = assignment2[assignment2["assignment"] == cluster2].index
                    len_overlap = len(list(set(index_1).intersection(set(index_2))))
                    len_union = len(set(index_1).union(set(index_2)))
                    agreement_mat.loc[
                        "{} {}".format(method1, cluster1), "{} {}".format(method2, cluster2)
                    ] = (len_overlap / len_union)
            agreement_mat = agreement_mat.astype(float)
            display(
                agreement_mat.style.set_precision(3).apply(
                    background_gradient, axis=None
                )
            )
            print("-" * 80)

clinical view, KCC 4, ConsensusKMeans / contextual view, KCC 5, ConsensusKMeans


Unnamed: 0,ConsensusKMeans 1,ConsensusKMeans 2,ConsensusKMeans 3,ConsensusKMeans 4,ConsensusKMeans 5
ConsensusKMeans 1,0.136,0.289,0.159,0.059,0.167
ConsensusKMeans 2,0.163,0.113,0.096,0.188,0.115
ConsensusKMeans 3,0.137,0.031,0.011,0.083,0.153
ConsensusKMeans 4,0.109,0.022,0.01,0.294,0.053


--------------------------------------------------------------------------------
clinical view, KCC 4, ConsensusKMeans / physio view, KCC 3, DBSCAN


Unnamed: 0,DBSCAN 1,DBSCAN 2,DBSCAN 3,DBSCAN 4
ConsensusKMeans 1,0.158,0.37,0.17,0.01
ConsensusKMeans 2,0.367,0.137,0.077,0.031
ConsensusKMeans 3,0.019,0.0,0.016,0.772
ConsensusKMeans 4,0.062,0.114,0.268,0.0


--------------------------------------------------------------------------------
clinical view, KCC 4, ConsensusKMeans / proteome view, KCC 3, ConsensusKMeans


Unnamed: 0,ConsensusKMeans 1,ConsensusKMeans 2,ConsensusKMeans 3
ConsensusKMeans 1,0.239,0.293,0.145
ConsensusKMeans 2,0.209,0.199,0.149
ConsensusKMeans 3,0.079,0.058,0.2
ConsensusKMeans 4,0.179,0.079,0.095


--------------------------------------------------------------------------------
clinical view, KCC 4, ConsensusKMeans / proteome view, KCC 4, ConsensusKMeans


Unnamed: 0,ConsensusKMeans 1,ConsensusKMeans 2,ConsensusKMeans 3,ConsensusKMeans 4
ConsensusKMeans 1,0.131,0.261,0.157,0.19
ConsensusKMeans 2,0.225,0.186,0.151,0.072
ConsensusKMeans 3,0.093,0.076,0.175,0.032
ConsensusKMeans 4,0.086,0.077,0.089,0.198


--------------------------------------------------------------------------------
contextual view, KCC 5, ConsensusKMeans / physio view, KCC 3, DBSCAN


Unnamed: 0,DBSCAN 1,DBSCAN 2,DBSCAN 3,DBSCAN 4
ConsensusKMeans 1,0.141,0.149,0.132,0.127
ConsensusKMeans 2,0.097,0.287,0.059,0.039
ConsensusKMeans 3,0.157,0.127,0.008,0.011
ConsensusKMeans 4,0.14,0.095,0.192,0.116
ConsensusKMeans 5,0.139,0.068,0.169,0.122


--------------------------------------------------------------------------------
contextual view, KCC 5, ConsensusKMeans / proteome view, KCC 3, ConsensusKMeans


Unnamed: 0,ConsensusKMeans 1,ConsensusKMeans 2,ConsensusKMeans 3
ConsensusKMeans 1,0.118,0.168,0.209
ConsensusKMeans 2,0.191,0.122,0.124
ConsensusKMeans 3,0.075,0.182,0.021
ConsensusKMeans 4,0.251,0.098,0.135
ConsensusKMeans 5,0.136,0.161,0.134


--------------------------------------------------------------------------------
contextual view, KCC 5, ConsensusKMeans / proteome view, KCC 4, ConsensusKMeans


Unnamed: 0,ConsensusKMeans 1,ConsensusKMeans 2,ConsensusKMeans 3,ConsensusKMeans 4
ConsensusKMeans 1,0.103,0.172,0.204,0.07
ConsensusKMeans 2,0.154,0.123,0.133,0.104
ConsensusKMeans 3,0.08,0.163,0.029,0.061
ConsensusKMeans 4,0.165,0.098,0.117,0.196
ConsensusKMeans 5,0.097,0.145,0.135,0.122


--------------------------------------------------------------------------------
physio view, KCC 3, DBSCAN / proteome view, KCC 3, ConsensusKMeans


Unnamed: 0,ConsensusKMeans 1,ConsensusKMeans 2,ConsensusKMeans 3
DBSCAN 1,0.192,0.216,0.134
DBSCAN 2,0.171,0.293,0.131
DBSCAN 3,0.267,0.06,0.117
DBSCAN 4,0.089,0.058,0.189


--------------------------------------------------------------------------------
physio view, KCC 3, DBSCAN / proteome view, KCC 4, ConsensusKMeans


Unnamed: 0,ConsensusKMeans 1,ConsensusKMeans 2,ConsensusKMeans 3,ConsensusKMeans 4
DBSCAN 1,0.232,0.197,0.155,0.038
DBSCAN 2,0.131,0.263,0.149,0.109
DBSCAN 3,0.062,0.052,0.105,0.383
DBSCAN 4,0.101,0.075,0.164,0.04


--------------------------------------------------------------------------------
proteome view, KCC 3, ConsensusKMeans / proteome view, KCC 4, ConsensusKMeans


Unnamed: 0,ConsensusKMeans 1,ConsensusKMeans 2,ConsensusKMeans 3,ConsensusKMeans 4
ConsensusKMeans 1,0.534,0.004,0.004,0.41
ConsensusKMeans 2,0.005,0.832,0.046,0.03
ConsensusKMeans 3,0.021,0.023,0.784,0.023


--------------------------------------------------------------------------------


In [26]:
view, KCC_space, method1 = configs[2]
assignment1 = pd.read_csv(
    "{}/{}_{}_view_KCC_{}_assignments.csv".format(
        score_path, method1, view, KCC_space
    ),
    index_col=0,
)
if method == "DBSCAN":
    assignment1["assignment"] = assignment1["assignment"] + 1
    assignment1 = assignment1[assignment1["assignment"] != 0]


In [29]:
assignment1[assignment1["assignment"] != 0]

Unnamed: 0_level_0,assignment
sample.id,Unnamed: 1_level_1
BE-003,1
BE-004,2
BE-005,1
BE-007,1
BE-008,2
...,...
ZH-244,3
ZH-246,2
ZH-255,3
ZH-257,1


# cluster characteristics

In [20]:
configs = [
    ["clinical", 4, "ConsensusKMeans"],
    ["contextual", 5, "ConsensusKMeans"],
    ["physio", 3, "DBSCAN"],
    ["proteome", 3, "ConsensusKMeans"],
    ["proteome", 4, "ConsensusKMeans"],
]
for i in range(len(configs)):
    view, KCC_space, method = configs[i]
    
    assignment = pd.read_csv(
        "{}/{}_{}_view_KCC_{}_assignments.csv".format(
            score_path, method, view, KCC_space
        ),
        index_col=0,
    )
    if method == "DBSCAN":
        assignment["assignment"] = assignment["assignment"] + 1
        assignment = assignment[assignment["assignment"] != 1]
    assignment['assignment'] = assignment['assignment'].astype(int)
    print('{} view, KCC {}, {}'.format(*configs[i]))
    get_characteristic(clinical_view, assignment)
    print("-" * 80)

clinical view, KCC 4, ConsensusKMeans


cluster,1,2,3,4,overall
cons05.resp,0.013,0.233,0.96,0.148,0.222
cons05.cvs,0.069,0.086,0.56,0.197,0.158
cons05.cns,0.019,0.078,0.4,0.049,0.09
cons05.ren,0.013,0.034,0.12,0.016,0.034
cons05.hep,0.062,0.043,0.14,0.115,0.075
cons05.hem,0.175,0.069,0.32,0.393,0.196
cons05.score,0.35,0.543,2.5,0.918,0.775
ccc.summary,0.631,0.509,1.26,1.361,0.791
age.at.bc,1783.931,1100.784,586.8,1594.934,1394.705
sex,0.606,0.647,0.64,0.623,0.625


--------------------------------------------------------------------------------
contextual view, KCC 5, ConsensusKMeans


cluster,1,2,3,4,5,overall
cons05.resp,0.231,0.062,0.023,0.419,0.256,0.222
cons05.cvs,0.209,0.012,0.045,0.118,0.359,0.158
cons05.cns,0.066,0.049,0.0,0.086,0.218,0.09
cons05.ren,0.022,0.0,0.068,0.065,0.026,0.034
cons05.hep,0.088,0.025,0.045,0.086,0.115,0.075
cons05.hem,0.176,0.062,0.0,0.28,0.372,0.196
cons05.score,0.791,0.21,0.182,1.054,1.346,0.775
ccc.summary,1.0,0.333,0.295,1.043,1.0,0.791
age.at.bc,807.286,2296.457,670.25,1215.495,1765.936,1394.705
sex,0.626,0.58,0.705,0.602,0.654,0.625


--------------------------------------------------------------------------------
physio view, KCC 3, DBSCAN


cluster,0.0,2.0,3.0,4.0,overall
cons05.resp,0.316,0.04,0.099,0.941,0.222
cons05.cvs,0.158,0.024,0.136,0.529,0.158
cons05.cns,0.053,0.008,0.025,0.412,0.09
cons05.ren,0.0,0.008,0.0,0.137,0.034
cons05.hep,0.053,0.04,0.16,0.098,0.075
cons05.hem,0.158,0.056,0.556,0.235,0.196
cons05.score,0.737,0.176,0.975,2.353,0.775
ccc.summary,1.0,0.432,1.358,1.157,0.791
age.at.bc,1569.263,1830.84,1840.333,582.647,1394.705
sex,0.316,0.688,0.58,0.647,0.625


--------------------------------------------------------------------------------
proteome view, KCC 3, ConsensusKMeans


cluster,1,2,3,overall
cons05.resp,0.186,0.137,0.39,0.222
cons05.cvs,0.128,0.115,0.26,0.158
cons05.cns,0.045,0.069,0.19,0.09
cons05.ren,0.038,0.015,0.05,0.034
cons05.hep,0.109,0.023,0.09,0.075
cons05.hem,0.314,0.053,0.2,0.196
cons05.score,0.821,0.412,1.18,0.775
ccc.summary,0.737,0.71,0.98,0.791
age.at.bc,1672.667,1365.0,1000.0,1394.705
sex,0.622,0.641,0.61,0.625


--------------------------------------------------------------------------------
proteome view, KCC 4, ConsensusKMeans


cluster,1,2,3,4,overall
cons05.resp,0.297,0.158,0.337,0.09,0.222
cons05.cvs,0.132,0.142,0.235,0.115,0.158
cons05.cns,0.066,0.083,0.163,0.038,0.09
cons05.ren,0.044,0.008,0.071,0.013,0.034
cons05.hep,0.066,0.025,0.071,0.167,0.075
cons05.hem,0.11,0.05,0.173,0.551,0.196
cons05.score,0.714,0.467,1.051,0.974,0.775
ccc.summary,0.659,0.758,0.888,0.872,0.791
age.at.bc,1060.0,1234.05,1167.449,2317.885,1394.705
sex,0.637,0.608,0.643,0.615,0.625


--------------------------------------------------------------------------------
