In [1]:
import pandas as pd

pd.set_option("display.precision", 3)
import os
import warnings

warnings.filterwarnings("ignore")
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from ConsensusClusteringSingleView import ConsensusCluster
import scipy.stats as sps
import copy
from tqdm import tqdm
from sklearn.manifold import TSNE
from sklearn.metrics import silhouette_score
from sklearn.cluster import DBSCAN
from tqdm import tqdm

from pathlib import Path
from itertools import product
import matplotlib.cm as cm
import matplotlib.lines as mlines
from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram, linkage

In [2]:
data_path = "data/"
kcc_path = '{}/KCC/'.format(data_path)
score_path = "{}/ClusteringResults/".format(data_path)
cdf_path = "{}/CDF_plots/".format(data_path)
tsne_path = "{}/TSNE_plots/".format(data_path)
Hierachical_plot_path = "{}/HierachicalClustering_plots/".format(data_path)
refer_id = pd.read_csv("data/ContextualViewStandardized.csv", index_col=0)


In [3]:
def plot_embedding(TSNE_emb, labels, savename, savepath):
    
    plt.figure(figsize=(12, 9))
    legends = []
    cmap = cm.rainbow(np.linspace(0, 1, len(np.unique(labels))))
    colors = np.array([cmap[i] for i in labels])
    for i in range(len(np.unique(labels))):
        legends.append(
            mlines.Line2D(
                [],
                [],
                color=cmap[i],
                marker=".",
                linestyle="None",
                markersize=10,
                label="Cluster {}".format(i + 1),
            )
        )

    plt.scatter(
        TSNE_emb.iloc[:, 0],
        TSNE_emb.iloc[:, 1],
        color=colors,
        s=10,
        marker="*",
    )
    plt.title(
        "Hierarchical: {} view, KCC space = {}".format(view, KCC_space),
        fontsize=20,
    )
    plt.legend(handles=legends, fontsize=10)
    plt.savefig(savepath + savename, dpi=200)
    plt.close()



In [31]:
view, KCC_space, method = ["contextual", 6, "DBSCAN"]

TSNE_emb = pd.read_csv(
    "{}/{}_{}_view_KCC_{}_TSNE_embeddings.csv".format(
        score_path, method, view, KCC_space
    ), index_col=0
)

assignments = pd.read_csv(
    "{}/{}_{}_view_KCC_{}_assignments_ranked.csv".format(score_path,method, view, KCC_space), index_col=0
)
TSNE_emb = TSNE_emb[assignments['assignment'] != -1]
assignments = assignments[assignments['assignment'] != -1]
num_clsuster = len(np.unique(assignments['assignment'].tolist()))
savename = '{}_{}_view_cluster_{}.png'.format(method, view, num_clsuster)
                          
plot_embedding(TSNE_emb, assignments['assignment'].tolist(), savename, Hierachical_plot_path)                    

In [32]:
for num_clsuster in [3, 4, 5, 6]:
    model = AgglomerativeClustering(n_clusters=num_clsuster, affinity='euclidean', linkage='ward')
    model.fit(TSNE_emb.values)
    labels = model.labels_
    savename = '{}_{}_view_cluster_{}.png'.format(method, view, num_clsuster)
    plot_embedding(TSNE_emb,labels, savename, Hierachical_plot_path)  

In [34]:
view, KCC_space, method = ["clinical", 4, "DBSCAN"]

TSNE_emb = pd.read_csv(
    "{}/{}_{}_view_KCC_{}_TSNE_embeddings.csv".format(
        score_path, method, view, KCC_space
    ), index_col=0
)

assignments = pd.read_csv(
    "{}/{}_{}_view_KCC_{}_assignments_ranked.csv".format(score_path,method, view, KCC_space), index_col=0
)
TSNE_emb = TSNE_emb[assignments['assignment'] != -1]
assignments = assignments[assignments['assignment'] != -1]
num_clsuster = len(np.unique(assignments['assignment'].tolist()))
savename = '{}_{}_view_cluster_{}.png'.format(method, view, num_clsuster)
                          
plot_embedding(TSNE_emb, assignments['assignment'].tolist(), savename, Hierachical_plot_path)       

for num_clsuster in [3, 4]:
    model = AgglomerativeClustering(n_clusters=num_clsuster, affinity='euclidean', linkage='ward')
    model.fit(TSNE_emb.values)
    labels = model.labels_
    savename = '{}_{}_view_cluster_{}.png'.format(method, view, num_clsuster)
    plot_embedding(TSNE_emb,labels, savename, Hierachical_plot_path)  

In [43]:
view, KCC_space, method = ["proteome", 5, "DBSCAN"]

TSNE_emb = pd.read_csv(
    "{}/{}_{}_view_KCC_{}_TSNE_embeddings.csv".format(
        score_path, method, view, KCC_space
    ), index_col=0
)

assignments = pd.read_csv(
    "{}/{}_{}_view_KCC_{}_assignments_ranked.csv".format(score_path,method, view, KCC_space), index_col=0
)
TSNE_emb = TSNE_emb[assignments['assignment'] != -1]
assignments = assignments[assignments['assignment'] != -1]
num_clsuster = len(np.unique(assignments['assignment'].tolist()))
savename = '{}_{}_view_cluster_{}.png'.format(method, view, num_clsuster)
                          
plot_embedding(TSNE_emb, assignments['assignment'].tolist(), savename, Hierachical_plot_path)     

In [4]:
view, KCC_space, method = ["physio", 4, "ConsensusKMeans"]

TSNE_emb = pd.read_csv(
    "{}/{}_{}_view_KCC_{}_TSNE_embeddings.csv".format(
        score_path, method, view, KCC_space
    ), index_col=0
)

assignments = pd.read_csv(
    "{}/{}_{}_view_KCC_{}_assignments_ranked.csv".format(score_path,method, view, KCC_space), index_col=0
)
TSNE_emb = TSNE_emb[assignments['assignment'] != -1]
assignments = assignments[assignments['assignment'] != -1]
assignments['assignment'] = assignments['assignment'] - 1
num_clsuster = len(np.unique(assignments['assignment'].tolist()))
savename = '{}_{}_view_cluster_{}.png'.format(method, view, num_clsuster)
                          
plot_embedding(TSNE_emb, assignments['assignment'].tolist(), savename, Hierachical_plot_path)       

for num_clsuster in [3, 4]:
    model = AgglomerativeClustering(n_clusters=num_clsuster, affinity='euclidean', linkage='ward')
    model.fit(TSNE_emb.values)
    labels = model.labels_
    savename = '{}_{}_view_cluster_{}.png'.format(method, view, num_clsuster)
    plot_embedding(TSNE_emb,labels, savename, Hierachical_plot_path)  