In [None]:
import pandas as pd
pd.set_option("display.precision", 3)
import warnings
warnings.filterwarnings("ignore")
import pickle
from mvlearn.cluster import MultiviewKMeans
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from IPython.display import display
import math
import seaborn as sns
sns.set_style('white')
from ConsensusClusteringSingleView import ConsensusCluster

In [None]:
wd = '/cluster/work/borgw/SPSS/MultiOmicsAnalysis/ConsensusClustering/'
os.chdir(wd)

In [None]:
def Consensus(name):
    Mks = []
    Aks = []
    assignments = pd.DataFrame(columns=[2, 3, 4, 5, 6])   
    for k in [2, 3, 4, 5, 6]:
        data = pd.read_csv("data/TwoViewsKCC_{}_K_{}.csv".format(name, k), index_col=0)
            
        cons = ConsensusCluster(KMeans, k, k+1, 100, resample_proportion=0.8)
        cons.fit(data)
        assert cons.Mk.shape[0] == 1
        Mks.append(cons.Mk[0])
        assignments[k] = cons.predict_data(data) + 1
        
    Aks = []
    plt.figure(figsize=(12, 9))

    for k in [2, 3, 4, 5, 6]:
        hist, bins = np.histogram(Mks[k-2].ravel(), bins=50)
        pdf = hist / sum(hist)
        cdf = np.cumsum(pdf)
        acdf = np.sum(h*(b-a) for b, a, h in zip(bins[1:], bins[:-1], cdf))
        Aks.append(acdf)
        plt.plot(bins[1:], cdf, label=k)

    plt.legend(fontsize=16)
    plt.title('CDF: {} view'.format(name), fontsize=25)
    plt.ylim(-0.05, 1.05)
    plt.tight_layout()
    plt.savefig('CDF plots/CDF {} view.png'.format(name), dpi=300)
    plt.show()
    return assignments

In [None]:
for name in ['clinical', 'proteome']:
    assignments = Consensus(name)
    assignments.to_csv('data/{}ViewAssignments.csv'.format(name.capitalize()))

# two views

In [None]:
Mks = []
Aks = []
assignments = pd.DataFrame(columns=[2, 3, 4, 5, 6])
for k in [2, 3, 4, 5, 6]:
    clinical_view = pd.read_csv('data/TwoViewsKCC_clinical_K_{}.csv'.format(k), index_col=0)
    proteome_view = pd.read_csv("data/TwoViewsKCC_proteome_K_{}.csv".format(k), index_col=0)
    
    Xs = [clinical_view, proteome_view]
    cons = ConsensusCluster(MultiviewKMeans, k, k+1, 100, resample_proportion=0.8)
    cons.fit(Xs)
    assignments[k] = cons.predict_data(Xs) + 1
    assert cons.Mk.shape[0] == 1
    Mks.append(cons.Mk[0])
assignments.to_csv('data/ClinicalProteomeViewsAssignments.csv')

Aks = []
plt.figure(figsize=(12, 9))

for k in [2, 3, 4, 5, 6]:
    hist, bins = np.histogram(Mks[k-2].ravel(), bins=50)
    pdf = hist / sum(hist)
    cdf = np.cumsum(pdf)
    acdf = np.sum(h*(b-a) for b, a, h in zip(bins[1:], bins[:-1], cdf))
    Aks.append(acdf)
    plt.plot(bins[1:], cdf, label=k)
    
plt.legend(fontsize=16)
plt.title('CDF: ClinicalProteomeViews', fontsize=25)
plt.ylim(-0.05, 1.05)
plt.tight_layout()
plt.tight_layout()
plt.savefig('CDF plots/CDF ClinicalProteomeViews.png', dpi=300)
plt.show()