In [1]:
import pandas as pd
import numpy as np
import json
import os

from tqdm import tqdm
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.cluster import SpectralClustering
from sklearn.metrics import silhouette_score, adjusted_rand_score
from sklearn.preprocessing import LabelEncoder

from myclass.CleanMergeDataset import Clean_Merge_Dataset
from myclass.BonferroniTtest import Bonferroni_Ttest

In [2]:
if os.path.exists('final_dataset_common.json') is False:

    data_normal = pd.read_pickle('./data-ready/RNA_dataframe_normal').replace('/', '\\')
    data_tumor = pd.read_pickle('./data-ready/RNA_dataframe').replace('/', '\\')
    dataset_RNA, y_RNA, cases_id_RNA = Clean_Merge_Dataset(name='RNA').transform(data_normal, data_tumor)
    df_RNA = pd.concat([dataset_RNA, cases_id_RNA], axis=1)

    data_normal = pd.read_pickle('./data-ready/miRNA_dataframe_normal').replace('/', '\\')
    data_tumor = pd.read_pickle('./data-ready/miRNA_dataframe').replace('/', '\\')
    dataset_miRNA, y_miRNA, cases_id_miRNA= Clean_Merge_Dataset(name='miRNA').transform(data_normal, data_tumor)
    df_miRNA = pd.concat([dataset_miRNA, cases_id_miRNA], axis=1)

    data_normal = pd.read_pickle('./data-ready/illumina-27-450-normal').replace('/', '\\')
    data_tumor = pd.read_pickle('./data-ready/illumina450-27-tumor').replace('/', '\\')
    dataset_illumina, y_illumina, cases_id_illumina= Clean_Merge_Dataset(name='illumina').transform(data_normal, data_tumor)
    df_illumina = pd.concat([dataset_illumina, cases_id_illumina], axis=1)

    dataset_RNA = Bonferroni_Ttest(label_case_id_into_X=True, alpha=0.05).fit_transform(pd.concat([df_RNA, y_RNA], axis=1), y_RNA)
    dataset_miRNA = Bonferroni_Ttest(label_case_id_into_X=True, alpha=0.05).fit_transform(pd.concat([df_miRNA, y_miRNA], axis=1), y_miRNA)
    dataset_illumina = Bonferroni_Ttest(label_case_id_into_X=True, alpha=0.05).fit_transform(pd.concat([df_illumina, y_illumina], axis=1), y_illumina)

    cases_id = set(dataset_illumina['case_id']) & set(dataset_miRNA['case_id']) & set(dataset_RNA['case_id'])
    df_final_illumina = dataset_illumina.loc[dataset_illumina['case_id'].isin(cases_id)]
    df_final_rna = dataset_RNA.loc[dataset_RNA['case_id'].isin(cases_id)]
    df_final_mirna = dataset_miRNA.loc[dataset_miRNA['case_id'].isin(cases_id)]

    print(df_final_illumina.shape)
    print(df_final_rna.shape)
    print(df_final_mirna.shape)
    
    df_final_illumina.to_pickle('illumina_pickle.pkl')
    df_final_rna.to_pickle('rna_pickle.pkl')
    df_final_mirna.to_pickle('miRNA_pickle.pkl')
    
    my_dict = {
        'miRNA': df_final_mirna.to_dict(),
        'RNA': df_final_rna.to_dict(),
        'illumina': df_final_illumina.to_dict()
    }
    with open('final_dataset_common.json', 'w') as outfile:
        json.dump(my_dict, outfile)
    
    df_illumina = df_final_illumina.copy()
    df_mirna = df_final_mirna.copy()
    df_rna = df_final_rna.copy()
    
    del my_dict
    del df_final_illumina
    del df_final_rna
    del df_final_mirna
    del dataset_illumina
    del dataset_RNA
    del dataset_miRNA
else:
    df_illumina = pd.read_pickle('illumina_pickle.pkl')
    df_mirna = pd.read_pickle('miRNA_pickle.pkl')
    df_rna = pd.read_pickle('rna_pickle.pkl')

In [10]:
import numpy as np
import pandas as pd

from scipy.spatial.distance import pdist, squareform
from copy import deepcopy
from sklearn.preprocessing import StandardScaler

class SimilarityNetworkFusion:
    def __init__(self, df_mirna, df_rna, df_illumina, k=3, mu=0.3):
        
        self.cases_id = df_rna.loc[:, 'case_id']
        self.rna = df_rna.copy()
        self.mirna = df_mirna.copy()
        self.illumina = df_illumina.copy()
        
        self.k = k
        self.mu = mu
        self.check_columns()
    
    def calculate_matrix(self):
        if hasattr(self, 'w_rna') is False:
            self.w_rna = self.__weights__(self.rna, 'RNA')
            self.w_mirna = self.__weights__(self.mirna, 'miRNA')
            self.w_illumina = self.__weights__(self.illumina, 'Illumina')
        
        if hasattr(self, 'p_rna') is False:
            self.p_rna = self.P_matrix(self.w_rna.to_numpy().tolist(), self.cases_id.shape[0], 'RNA')
            self.p_mirna = self.P_matrix(self.w_mirna.to_numpy().tolist(), self.cases_id.shape[0], 'miRNA')
            self.p_illumina = self.P_matrix(self.w_illumina.to_numpy().tolist(), self.cases_id.shape[0], 'Illumina')

        self.s_rna = self.S_matrix(self.w_rna.to_numpy().tolist(), self.cases_id.shape[0], 'RNA')
        self.s_mirna = self.S_matrix(self.w_mirna.to_numpy().tolist(), self.cases_id.shape[0], 'miRNA')
        self.s_illumina = self.S_matrix(self.w_illumina.to_numpy().tolist(), self.cases_id.shape[0], 'Illumina')
        
        return self
        

    def __weights__(self, dataset, name):
        print('Calculating weights for {}...'.format(name))
        df = pd.DataFrame(columns=self.cases_id, data=dataset.T.values)
        
        #calculate euclidean distance
        dist = pdist(dataset, 'euclidean')
        self.df_dist = pd.DataFrame(columns=self.cases_id, index=self.cases_id, data=squareform(dist))
        weights = pd.DataFrame(columns=self.cases_id, index=self.cases_id, data=[])
                
        for i, patient_i in enumerate(tqdm(self.cases_id)):
            for patient_j in self.cases_id.iloc[i:]:
                    tokK_mean_i = np.sort(self.df_dist.loc[patient_i, :].to_numpy())[:self.k].mean()
                    topK_mean_j = np.sort(self.df_dist.loc[patient_j, :].to_numpy())[:self.k].mean()
                    
                    eps = (tokK_mean_i + tokK_mean_i + self.df_dist.loc[patient_i, patient_j])/3

                    weights.loc[patient_i, patient_j] = np.exp(-(self.df_dist.loc[patient_i, patient_j]**2/(eps*self.mu)))
                    weights.loc[patient_j, patient_i] = np.exp(-(self.df_dist.loc[patient_j, patient_i]**2/(eps*self.mu)))
                    
        return weights       
    
    def check_columns(self):
        scaler = MinMaxScaler()
        if 'label' in self.mirna.columns:
            self.mirna.drop(['label'], axis=1, inplace=True)
        if 'case_id' in self.mirna.columns:
            self.mirna.drop(['case_id'], axis=1, inplace=True)
            
        if 'label' in self.rna.columns:
            self.rna.drop(['label'], axis=1, inplace=True)
        if 'case_id' in self.rna.columns:
            self.rna.drop(['case_id'], axis=1, inplace=True)
            
        if 'label' in self.illumina.columns:
            self.illumina.drop(['label'], axis=1, inplace=True)
        if 'case_id' in self.illumina.columns:
            self.illumina.drop(['case_id'], axis=1, inplace=True)
            
        self.mirna = pd.DataFrame(scaler.fit_transform(self.mirna))
        self.rna = pd.DataFrame(scaler.fit_transform(self.rna))
        self.illumina = pd.DataFrame(scaler.fit_transform(self.illumina))

        return


    def find_k_neighbors(self, row, i, k=None): 
        row=deepcopy(row)
        #case of P matrix
        if k==None:
            del row[i]  #delete element of the same column of row index
            return row

        #case of S (find k elements with minimum distance value of W[i][j])
        else:
            k_neighbors_index=[]
            neigh = 0
            max_value = max(row)
            for j in range(0, len(row)):
                if j!=i:
                    min_index = row.index(min(row))
                    k_neighbors_index.append(min_index)
                    neigh+=1
                    row[min_index] = max_value
                    if neigh == k:
                        return k_neighbors_index


    def P_matrix(self, W, n_case_id, name):
        print('Calculating P matrix for {}...'.format(name))
        P=[]
        for i in tqdm(range(0, n_case_id)):
            row=[]
            for j in range(0,n_case_id):
                if i==j:
                    row.append(1/2)

                else:
                    k_neighbors = self.find_k_neighbors(W[i], i)
                    denominator = 2*sum(k_neighbors)
                    row.append(W[i][j]/denominator)
            P.append(row)
        print(np.array(P))
        return np.array(P)

    def S_matrix(self, W, n_case_id, name):
        print('Calculating S matrix for {}...'.format(name))
        S=[]
        for i in tqdm(range(0, n_case_id)):
            S_row=[]
            neighbors_indeces = self.find_k_neighbors(self.df_dist.iloc[i,:].to_numpy().tolist(), i, self.k)
            for j in range(0,n_case_id):
                if j not in neighbors_indeces:
                    S_row.append(0)

                else:
                    np_row = np.array(W[i])
                    denominator = sum(np_row[neighbors_indeces])
                    S_row.append(W[i][j]/denominator)
            S.append(S_row)
        print(np.array(S))
        return np.array(S)
    
    def product_matrix(self, S_matrix, P_matrix):
        result = np.dot(S_matrix, P_matrix)
        result = np.dot(result, S_matrix.T)
        return result
    
    def sum_matrix_P(self, P1, P2):
        return np.add(P1,P2)/2
    
    def fit(self, num_iter=None):
        if num_iter is not None:
            for i in range(0, num_iter):
                self.p_rna_t1 = self.product_matrix(self.s_rna, self.sum_matrix_P(self.p_mirna, self.p_illumina))
                self.p_mirna_t1 = self.product_matrix(self.s_mirna, self.sum_matrix_P(self.p_rna, self.p_illumina))
                self.p_illumina_t1 = self.product_matrix(self.s_illumina, self.sum_matrix_P(self.p_mirna, self.p_rna))
                print(self.p_rna_t1)
                print(self.p_mirna_t1)
                print(self.p_illumina_t1)
                self.p_rna = self.p_rna_t1
                self.p_mirna = self.p_mirna_t1
                self.p_illumina = self.p_illumina_t1
        else:
            print('ciao')

        return self
    
    def clean(self):
        del self.p_rna
        del self.p_mirna
        del self.p_illumina
        
        del self.p_rna_t1
        del self.p_mirna_t1
        del self.p_illumina_t1
        
        
        del self.w_rna
        del self.w_mirna
        del self.w_illumina
        
        return self

In [30]:
df_mirna.sort_values(by='case_id', inplace=True)
df_rna.sort_values(by='case_id', inplace=True)
df_illumina.sort_values(by='case_id', inplace=True)

sm = SimilarityNetworkFusion(df_mirna.drop_duplicates(subset='case_id'),
                            df_rna.drop_duplicates(subset='case_id'),
                            df_illumina.drop_duplicates(subset='case_id'), k=100).calculate_matrix()

Calculating weights for RNA...


100%|██████████| 449/449 [04:48<00:00,  1.55it/s]
  0%|          | 0/449 [00:00<?, ?it/s]

Calculating weights for miRNA...


100%|██████████| 449/449 [02:11<00:00,  3.42it/s]


Calculating weights for Illumina...


100%|██████████| 449/449 [01:59<00:00,  3.75it/s]
  0%|          | 0/449 [00:00<?, ?it/s]

Calculating P matrix for RNA...


100%|██████████| 449/449 [04:50<00:00,  1.55it/s]
  0%|          | 0/449 [00:00<?, ?it/s]

[[5.00000000e-01 5.57150677e-16 1.39763524e-16 ... 5.28993010e-15
  5.55991034e-28 3.28982971e-06]
 [1.39388110e-12 5.00000000e-01 1.96092903e-13 ... 1.52924545e-15
  3.37501257e-14 1.28255930e-18]
 [3.35451522e-11 1.88124228e-11 5.00000000e-01 ... 2.40455260e-13
  1.42527669e-24 9.31355444e-13]
 ...
 [1.11154138e-13 1.28439863e-17 2.10510645e-17 ... 5.00000000e-01
  1.66602793e-27 2.03632237e-17]
 [3.15614858e-17 7.65794035e-07 3.37095467e-19 ... 4.50086750e-18
  5.00000000e-01 3.35465634e-18]
 [8.77390304e-14 1.36723700e-29 1.03490134e-25 ... 2.58458157e-26
  1.57607833e-36 5.00000000e-01]]
Calculating P matrix for miRNA...


100%|██████████| 449/449 [04:27<00:00,  1.68it/s]
  0%|          | 0/449 [00:00<?, ?it/s]

[[5.00000000e-01 1.65126674e-06 1.43105641e-03 ... 1.26894040e-03
  2.70502585e-07 1.45364865e-03]
 [2.37509594e-05 5.00000000e-01 4.17742784e-03 ... 1.03010617e-04
  1.37146412e-04 1.83278401e-04]
 [4.88520300e-04 9.91450179e-05 5.00000000e-01 ... 2.18685770e-04
  8.82187938e-08 2.42800831e-03]
 ...
 [4.56052919e-04 2.57390242e-06 2.30233585e-04 ... 5.00000000e-01
  8.25579787e-09 1.64637505e-03]
 [5.61553777e-05 1.97943142e-03 5.36482038e-05 ... 4.76875455e-06
  5.00000000e-01 1.35609995e-03]
 [3.19187096e-04 2.79790678e-06 1.56174521e-03 ... 1.00586718e-03
  1.43435828e-06 5.00000000e-01]]
Calculating P matrix for Illumina...


100%|██████████| 449/449 [04:35<00:00,  1.63it/s]
  0%|          | 0/449 [00:00<?, ?it/s]

[[5.00000000e-001 1.94525455e-030 2.03100094e-102 ... 3.20702797e-045
  7.59048158e-046 2.14527554e-044]
 [1.02921975e-017 5.00000000e-001 3.62030681e-091 ... 2.13744681e-029
  1.18709179e-013 4.18967537e-040]
 [2.91678766e-084 9.82671580e-086 5.00000000e-001 ... 4.78968266e-041
  2.23250501e-069 3.06567305e-099]
 ...
 [5.99139186e-029 7.54724804e-026 6.23070269e-043 ... 5.00000000e-001
  3.44979403e-021 1.21278750e-053]
 [9.32387809e-029 2.75600428e-009 1.90952351e-070 ... 2.26827364e-020
  5.00000000e-001 6.03391227e-047]
 [2.14150432e-041 7.90468518e-050 2.13091940e-114 ... 6.48030743e-067
  4.90351615e-061 5.00000000e-001]]
Calculating S matrix for RNA...


100%|██████████| 449/449 [00:06<00:00, 68.44it/s]
  1%|          | 5/449 [00:00<00:09, 46.23it/s]

[[1.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 1.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 1.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 ...
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 1.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 3.61447414e-31 0.00000000e+00 ... 0.00000000e+00
  1.00000000e+00 0.00000000e+00]
 [8.81446815e-20 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 9.99999771e-01]]
Calculating S matrix for miRNA...


100%|██████████| 449/449 [00:09<00:00, 49.63it/s]
  2%|▏         | 9/449 [00:00<00:05, 78.89it/s]

[[9.53624077e-01 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 9.92785206e-01 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 8.89281659e-01 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 ...
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 8.27827171e-01
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 3.84302822e-06 0.00000000e+00 ... 0.00000000e+00
  9.99298906e-01 0.00000000e+00]
 [4.53047009e-04 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 7.72727170e-01]]
Calculating S matrix for Illumina...


100%|██████████| 449/449 [00:06<00:00, 66.62it/s]

[[1.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 1.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 1.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 ...
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 1.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 1.69620492e-35 0.00000000e+00 ... 0.00000000e+00
  1.00000000e+00 0.00000000e+00]
 [1.62184284e-53 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 1.00000000e+00]]





In [31]:
sm.fit(num_iter=100)

[[5.00000000e-01 8.25633369e-07 7.15528204e-04 ... 6.34470201e-04
  1.35251292e-07 7.26824931e-04]
 [1.18754797e-05 5.00000000e-01 2.08871392e-03 ... 5.15053084e-05
  6.85732060e-05 9.16392303e-05]
 [2.44260150e-04 4.95725089e-05 5.00000000e-01 ... 1.09342885e-04
  4.41093969e-08 1.21400411e-03]
 ...
 [2.28026460e-04 1.28695121e-06 1.15116793e-04 ... 5.00000000e-01
  4.12789893e-09 8.23187354e-04]
 [2.80776889e-05 9.89717087e-04 2.68241019e-05 ... 2.38437728e-06
  5.00000000e-01 6.78049830e-04]
 [1.59593812e-04 1.39895833e-06 7.80872907e-04 ... 5.02933504e-04
  7.17180741e-07 4.99999781e-01]]
[[4.55179803e-01 1.76498731e-06 1.84978943e-04 ... 1.24834719e-05
  1.43710348e-06 1.76593685e-03]
 [7.47071483e-05 4.93055324e-01 1.13063207e-08 ... 2.62355134e-05
  3.62177844e-06 1.63835448e-04]
 [5.16663786e-06 1.03928356e-07 3.96126431e-01 ... 4.02644102e-05
  2.07129074e-08 2.05656236e-05]
 ...
 [1.39219342e-05 6.16473053e-07 5.71525351e-05 ... 3.60452441e-01
  7.53628317e-09 3.08356998e-05]

[[4.23900109e-01 1.28728485e-05 7.57710266e-04 ... 5.73643600e-04
  2.17401022e-06 2.70613127e-03]
 [1.50297971e-04 4.87624318e-01 1.20088104e-03 ... 9.40315765e-05
  5.11116469e-05 4.22698497e-04]
 [2.14579139e-04 3.33681286e-05 3.36165733e-01 ... 3.63789896e-04
  1.20901009e-07 9.19007110e-04]
 ...
 [2.25110796e-04 5.01014607e-06 3.87031545e-04 ... 2.81154366e-01
  8.31605000e-08 5.43499982e-04]
 [5.96316948e-05 7.46830801e-04 1.57167332e-05 ... 6.04733600e-06
  4.98766636e-01 3.66335527e-04]
 [1.40967171e-03 2.74879619e-05 6.50380394e-04 ... 3.86442818e-04
  7.78358214e-07 2.17458278e-01]]
[[3.98422817e-01 2.16360080e-05 8.71788244e-04 ... 7.06607926e-04
  2.89567860e-06 3.31127409e-03]
 [2.08525651e-04 4.83106618e-01 1.17777759e-03 ... 1.20310740e-04
  5.54462513e-05 5.82312027e-04]
 [2.51885348e-04 3.54258735e-05 2.88898782e-01 ... 5.63410011e-04
  1.75233360e-07 9.83503172e-04]
 ...
 [2.85881567e-04 7.85534798e-06 5.87867510e-04 ... 2.23437544e-01
  1.36261680e-07 6.03695504e-04]

[[3.52394599e-01 4.91972492e-05 1.03811037e-03 ... 1.00552652e-03
  4.67781569e-06 4.32321815e-03]
 [3.42233864e-04 4.74053015e-01 1.02237612e-03 ... 1.67518684e-04
  5.86606052e-05 9.34513775e-04]
 [3.08788298e-04 3.55865170e-05 2.17047158e-01 ... 1.08955718e-03
  2.85029047e-07 9.79136758e-04]
 ...
 [4.38242918e-04 1.57251183e-05 1.09936366e-03 ... 1.47093713e-01
  2.77548910e-07 7.64680436e-04]
 [1.04959144e-04 8.22674932e-04 1.41833540e-05 ... 1.40513491e-05
  4.97389032e-01 3.34704741e-04]
 [2.61795643e-03 1.03564223e-04 7.35279485e-04 ... 6.41099146e-04
  2.09370295e-06 8.81643458e-02]]
[[3.31047621e-01 6.54058000e-05 1.12137599e-03 ... 1.16156913e-03
  5.58985331e-06 4.73648855e-03]
 [4.11095273e-04 4.69590781e-01 9.65264686e-04 ... 1.92697448e-04
  6.09963118e-05 1.11007636e-03]
 [3.45166505e-04 3.63941389e-05 1.86811166e-01 ... 1.36938086e-03
  3.44770904e-07 9.89653454e-04]
 ...
 [5.24031919e-04 2.04841655e-05 1.37045937e-03 ... 1.17297988e-01
  3.66192455e-07 8.63196412e-04]

[[2.93093262e-01 1.05502414e-04 1.27246195e-03 ... 1.46591680e-03
  7.58272155e-06 5.35515099e-03]
 [5.56116254e-04 4.60808646e-01 8.64823274e-04 ... 2.45180728e-04
  6.56053083e-05 1.45224655e-03]
 [4.30517707e-04 3.86889107e-05 1.41180693e-01 ... 1.94179115e-03
  4.74424989e-07 1.01441411e-03]
 ...
 [7.12924576e-04 3.26097596e-05 1.92297459e-03 ... 7.81674825e-02
  6.07642708e-07 1.08266539e-03]
 [1.51019381e-04 8.67749281e-04 1.33466922e-05 ... 2.46896529e-05
  4.96012090e-01 3.61912857e-04]
 [3.58529488e-03 2.14670744e-04 7.95408791e-04 ... 9.74872651e-04
  4.51421847e-06 3.90705831e-02]]
[[2.75406378e-01 1.27568242e-04 1.34433753e-03 ... 1.61153062e-03
  8.62809289e-06 5.61525411e-03]
 [6.30152750e-04 4.56461110e-01 8.19250422e-04 ... 2.72128872e-04
  6.79254452e-05 1.61762363e-03]
 [4.76862457e-04 4.01406246e-05 1.21848884e-01 ... 2.22022317e-03
  5.43582923e-07 1.03023188e-03]
 ...
 [8.09982413e-04 3.94598410e-05 2.19182421e-03 ... 6.28798585e-02
  7.50643890e-07 1.19426121e-03]

[[2.51522511e-01 1.64705821e-04 1.44496241e-03 ... 1.81238114e-03
  1.03060102e-05 5.91652399e-03]
 [7.43792440e-04 4.50035563e-01 7.60598578e-04 ... 3.14056847e-04
  7.14130119e-05 1.85047438e-03]
 [5.53471212e-04 4.31019298e-05 9.92745843e-02 ... 2.60818304e-03
  6.60273265e-07 1.06214805e-03]
 ...
 [9.58043798e-04 5.14670671e-05 2.56720873e-03 ... 4.71962306e-02
  1.01937003e-06 1.36035723e-03]
 [1.90313999e-04 8.94836084e-04 1.35345476e-05 ... 3.54913367e-05
  4.94867104e-01 4.09766364e-04]
 [4.22526478e-03 3.22191245e-04 8.63223831e-04 ... 1.26123424e-03
  7.28344222e-06 2.23662896e-02]]
[[2.36401673e-01 1.90637159e-04 1.51001985e-03 ... 1.93977089e-03
  1.14606833e-05 6.09444983e-03]
 [8.20102672e-04 4.45783646e-01 7.24639576e-04 ... 3.42492810e-04
  7.37402915e-05 1.99992714e-03]
 [6.06529225e-04 4.53853188e-05 8.59199407e-02 ... 2.85339201e-03
  7.43943566e-07 1.08629393e-03]
 ...
 [1.05651352e-03 6.00387331e-05 2.80490351e-03 ... 3.84325445e-02
  1.21830601e-06 1.46910544e-03]

[[2.09519695e-01 2.47231178e-04 1.63194844e-03 ... 2.16571759e-03
  1.39290557e-05 6.36395955e-03]
 [9.74314736e-04 4.37422136e-01 6.66412303e-04 ... 4.01429398e-04
  7.84020060e-05 2.27227415e-03]
 [7.20217344e-04 5.14858630e-05 6.56939876e-02 ... 3.27898222e-03
  9.44073385e-07 1.14677745e-03]
 ...
 [1.25008651e-03 7.96687278e-05 3.21959401e-03 ... 2.69170813e-02
  1.70995948e-06 1.67521858e-03]
 [2.38228350e-04 9.19558038e-04 1.51185908e-05 ... 5.04081804e-05
  4.93496151e-01 4.79602367e-04]
 [4.83209006e-03 4.58793421e-04 9.71757455e-04 ... 1.58596593e-03
  1.13464468e-05 1.39573603e-02]]
[[1.96991977e-01 2.76738330e-04 1.69079365e-03 ... 2.27039283e-03
  1.52094687e-05 6.47875366e-03]
 [1.05156760e-03 4.33283790e-01 6.41220451e-04 ... 4.31467743e-04
  8.07342208e-05 2.40042541e-03]
 [7.78815135e-04 5.50485905e-05 5.71146096e-02 ... 3.47133268e-03
  1.05654560e-06 1.18034204e-03]
 ...
 [1.34509923e-03 9.02009323e-05 3.40776222e-03 ... 2.24110375e-02
  1.98562004e-06 1.77413266e-03]

[[1.80075669e-01 3.23346296e-04 1.77482989e-03 ... 2.41024194e-03
  1.72295286e-05 6.61444130e-03]
 [1.16726885e-03 4.27169916e-01 6.11774022e-04 ... 4.77685935e-04
  8.42343968e-05 2.57533444e-03]
 [8.69767120e-04 6.16069066e-05 4.70832090e-02 ... 3.71540867e-03
  1.25758413e-06 1.23725345e-03]
 ...
 [1.48287116e-03 1.07529172e-04 3.64822118e-03 ... 1.77696363e-02
  2.46771723e-06 1.91293059e-03]
 [2.78327614e-04 9.35936053e-04 1.77699031e-05 ... 6.41744014e-05
  4.92356240e-01 5.39426748e-04]
 [5.23080653e-03 5.73746352e-04 1.08178053e-03 ... 1.83254488e-03
  1.52239149e-05 1.10638653e-02]]
[[1.69366325e-01 3.55075047e-04 1.82961102e-03 ... 2.49829901e-03
  1.86080205e-05 6.69535393e-03]
 [1.24416426e-03 4.23124967e-01 5.94772573e-04 ... 5.08841758e-04
  8.65680445e-05 2.68643908e-03]
 [9.31117723e-04 6.64090602e-05 4.11468385e-02 ... 3.86403109e-03
  1.40421368e-06 1.27705704e-03]
 ...
 [1.57290718e-03 1.19564883e-04 3.79523331e-03 ... 1.51690479e-02
  2.81273786e-06 2.00224365e-03]

[[1.50328598e-01 4.21104732e-04 1.93404956e-03 ... 2.65318280e-03
  2.15028609e-05 6.82061686e-03]
 [1.39637180e-03 4.15173481e-01 5.72050894e-04 ... 5.72542565e-04
  9.12350012e-05 2.88518392e-03]
 [1.05617213e-03 7.80063794e-05 3.21498394e-02 ... 4.10208077e-03
  1.76020427e-06 1.36392625e-03]
 ...
 [1.74451849e-03 1.45712882e-04 4.03327359e-03 ... 1.17227473e-02
  3.61073475e-06 2.16687885e-03]
 [3.26026651e-04 9.52316161e-04 2.27181469e-05 ... 8.19577369e-05
  4.90991435e-01 6.06976623e-04]
 [5.60943062e-03 7.09196731e-04 1.23097516e-03 ... 2.09779935e-03
  2.03307563e-05 9.56655898e-03]]
[[1.41456906e-01 4.54746195e-04 1.98490719e-03 ... 2.72505628e-03
  2.29895406e-05 6.87455415e-03]
 [1.47181310e-03 4.11238872e-01 5.63853048e-04 ... 6.04747324e-04
  9.35679182e-05 2.97805132e-03]
 [1.11908641e-03 8.44218666e-05 2.83322830e-02 ... 4.20511923e-03
  1.95932867e-06 1.40911367e-03]
 ...
 [1.82770931e-03 1.59369939e-04 4.13711262e-03 ... 1.03654094e-02
  4.04265285e-06 2.24523480e-03]

[[1.25686420e-01 5.23728683e-04 2.08253634e-03 ... 2.85253562e-03
  2.60871275e-05 6.95846819e-03]
 [1.62002201e-03 4.03505496e-01 5.57314672e-04 ... 6.70151245e-04
  9.82306742e-05 3.14382728e-03]
 [1.24541762e-03 9.93364581e-05 2.25441032e-02 ... 4.36382289e-03
  2.43441464e-06 1.50415874e-03]
 ...
 [1.98544388e-03 1.88504150e-04 4.29961572e-03 ... 8.55018600e-03
  5.01697375e-06 2.38968666e-03]
 [3.72673330e-04 9.66324523e-04 2.96702570e-05 ... 1.00796941e-04
  4.89630029e-01 6.66865727e-04]
 [5.90328751e-03 8.39844378e-04 1.39226929e-03 ... 2.33204015e-03
  2.58154130e-05 8.93821186e-03]]
[[1.18337384e-01 5.58614132e-04 2.13025764e-03 ... 2.91208387e-03
  2.76710469e-05 6.99460212e-03]
 [1.69319249e-03 3.99679136e-01 5.56783386e-04 ... 7.03093425e-04
  1.00560772e-04 3.22127884e-03]
 [1.30851144e-03 1.07422703e-04 2.00872566e-02 ... 4.43084259e-03
  2.69692353e-06 1.55273644e-03]
 ...
 [2.06177326e-03 2.03578121e-04 4.36904555e-03 ... 7.83059185e-03
  5.53738716e-06 2.45855108e-03]

[[1.05273820e-01 6.29391333e-04 2.22239917e-03 ... 3.01904743e-03
  3.09494285e-05 7.05051271e-03]
 [1.83610233e-03 3.92159486e-01 5.64187621e-04 ... 7.69612863e-04
  1.05215757e-04 3.35967133e-03]
 [1.43392206e-03 1.25679572e-04 1.63591968e-02 ... 4.52922461e-03
  3.31023561e-06 1.65249909e-03]
 ...
 [2.20627488e-03 2.35299731e-04 4.47354130e-03 ... 6.85504656e-03
  6.68876185e-06 2.58603773e-03]
 [4.17781524e-04 9.78746959e-04 3.86047886e-05 ... 1.20397875e-04
  4.88272049e-01 7.18224717e-04]
 [6.13182690e-03 9.64807390e-04 1.55993552e-03 ... 2.53936619e-03
  3.15832025e-05 8.63423958e-03]]
[[9.91862394e-02 6.64990744e-04 2.26757954e-03 ... 3.06942772e-03
  3.26197015e-05 7.07440254e-03]
 [1.90643966e-03 3.88439135e-01 5.70221843e-04 ... 8.03012646e-04
  1.07541407e-04 3.42441272e-03]
 [1.49624361e-03 1.35425709e-04 1.47757591e-02 ... 4.56936991e-03
  3.64485937e-06 1.70289876e-03]
 ...
 [2.27618813e-03 2.51596521e-04 4.51701169e-03 ... 6.46462368e-03
  7.29742645e-06 2.64700811e-03]

[[8.58437068e-02 7.54600792e-04 2.37676874e-03 ... 3.18304157e-03
  3.69307484e-05 7.11811263e-03]
 [2.07675595e-03 3.79320242e-01 5.95238060e-04 ... 8.86949798e-04
  1.13345774e-04 3.56763324e-03]
 [1.64970504e-03 1.62608822e-04 1.18579386e-02 ... 4.63445240e-03
  4.61769774e-06 1.83026902e-03]
 ...
 [2.44065088e-03 2.94211263e-04 4.59118633e-03 ... 5.81544652e-03
  8.97139647e-06 2.78778291e-03]
 [4.68023857e-04 9.91864605e-04 5.13909065e-05 ... 1.43901467e-04
  4.86692103e-01 7.67991519e-04]
 [6.33507545e-03 1.10327550e-03 1.75793577e-03 ... 2.75285611e-03
  3.85776825e-05 8.42462413e-03]]
[[8.09568434e-02 7.90515246e-04 2.41953649e-03 ... 3.22571615e-03
  3.86886696e-05 7.13202087e-03]
 [2.14333907e-03 3.75720596e-01 6.07640111e-04 ... 9.20586555e-04
  1.15664630e-04 3.62050791e-03]
 [1.71033651e-03 1.74211927e-04 1.09062530e-02 ... 4.65280477e-03
  5.04454127e-06 1.88132965e-03]
 ...
 [2.50384737e-03 3.11714208e-04 4.61327057e-03 ... 5.61587867e-03
  9.68107646e-06 2.84131532e-03]

  4.47425369e-05 8.28864300e-03]]
[[7.02457837e-02 8.80246253e-04 2.52336069e-03 ... 3.32371677e-03
  4.31989787e-05 7.15552259e-03]
 [2.30382247e-03 3.66898543e-01 6.46660535e-04 ... 1.00471606e-03
  1.21449909e-04 3.73813486e-03]
 [1.85869957e-03 2.05847498e-04 9.14667821e-03 ... 4.67578734e-03
  6.25724425e-06 2.00856384e-03]
 ...
 [2.65278913e-03 3.57026538e-04 4.64558520e-03 ... 5.27215161e-03
  1.16029848e-05 2.96588570e-03]
 [5.15410283e-04 1.00392859e-03 6.64708597e-05 ... 1.67791278e-04
  4.85116891e-01 8.08410465e-04]
 [6.48602301e-03 1.23427149e-03 1.95402287e-03 ... 2.94134517e-03
  4.57822461e-05 8.26775991e-03]]
[[6.63226476e-02 9.16055768e-04 2.56412794e-03 ... 3.36095220e-03
  4.50315523e-05 7.16235853e-03]
 [2.36639149e-03 3.63416305e-01 6.64184359e-04 ... 1.03833573e-03
  1.23760637e-04 3.78174289e-03]
 [1.91711084e-03 2.19173572e-04 8.57108198e-03 ... 4.68011707e-03
  6.78194289e-06 2.05917426e-03]
 ...
 [2.71010559e-03 3.75530160e-04 4.65357918e-03 ... 5.16356053e-0

[[5.93485569e-02 9.87300085e-04 2.64375051e-03 ... 3.43090765e-03
  4.87669927e-05 7.17008133e-03]
 [2.48713958e-03 3.56574890e-01 7.03816665e-04 ... 1.10539286e-03
  1.28372519e-04 3.86075274e-03]
 [2.03128278e-03 2.47541488e-04 7.68941977e-03 ... 4.67846155e-03
  7.93883621e-06 2.15926615e-03]
 ...
 [2.81906489e-03 4.13464320e-04 4.65893596e-03 ... 5.00216081e-03
  1.41269655e-05 3.10334272e-03]
 [5.53653050e-04 1.01363838e-03 8.09854957e-05 ... 1.88390097e-04
  4.83770498e-01 8.37031526e-04]
 [6.58319654e-03 1.34110455e-03 2.11816989e-03 ... 3.08662353e-03
  5.20808368e-05 8.14852321e-03]]
[[5.60984995e-02 1.02277196e-03 2.68301864e-03 ... 3.46471010e-03
  5.06542184e-05 7.17238804e-03]
 [2.54619263e-03 3.53190622e-01 7.24863958e-04 ... 1.13884270e-03
  1.30675459e-04 3.89805693e-03]
 [2.08754028e-03 2.62215970e-04 7.31234166e-03 ... 4.67519822e-03
  8.54979303e-06 2.20888633e-03]
 ...
 [2.87194305e-03 4.32686543e-04 4.65905513e-03 ... 4.93360520e-03
  1.50151258e-05 3.14679816e-03]

[[4.89743131e-02 1.11059998e-03 2.77876301e-03 ... 3.54440914e-03
  5.54562005e-05 7.17177883e-03]
 [2.68768583e-03 3.44897671e-01 7.82594533e-04 ... 1.22200288e-03
  1.36418272e-04 3.98211795e-03]
 [2.22420984e-03 3.01071154e-04 6.60611912e-03 ... 4.65820259e-03
  1.02293238e-05 2.33061803e-03]
 ...
 [2.99710606e-03 4.81832708e-04 4.64985227e-03 ... 4.80462992e-03
  1.73721075e-05 3.24916328e-03]
 [5.95493253e-04 1.02438080e-03 9.94829612e-05 ... 2.12402526e-04
  4.82204150e-01 8.64913913e-04]
 [6.66716366e-03 1.45999651e-03 2.30330773e-03 ... 3.24041824e-03
  5.95353368e-05 8.01836001e-03]]
[[4.63647337e-02 1.14546736e-03 2.81643865e-03 ... 3.57516815e-03
  5.73975004e-05 7.17005851e-03]
 [2.74265900e-03 3.41624575e-01 8.06885854e-04 ... 1.25511922e-03
  1.38711387e-04 4.01356195e-03]
 [2.27779525e-03 3.17158368e-04 6.37260215e-03 ... 4.64965289e-03
  1.09417006e-05 2.37861660e-03]
 ...
 [3.04541039e-03 5.01756042e-04 4.64422597e-03 ... 4.76128643e-03
  1.83505006e-05 3.28858294e-03]

[[4.06439945e-02 1.23152443e-03 2.90836738e-03 ... 3.64837906e-03
  6.23205361e-05 7.16096699e-03]
 [2.87409170e-03 3.33604517e-01 8.71502578e-04 ... 1.33727521e-03
  1.44428680e-04 4.08494497e-03]
 [2.40764884e-03 3.59284409e-04 5.92986735e-03 ... 4.62365400e-03
  1.28739770e-05 2.49577283e-03]
 ...
 [3.15999236e-03 5.52460991e-04 4.62478046e-03 ... 4.67570383e-03
  2.09265595e-05 3.38190810e-03]
 [6.34416627e-04 1.03459743e-03 1.19361129e-04 ... 2.36268696e-04
  4.80642607e-01 8.88224442e-04]
 [6.72613390e-03 1.57330603e-03 2.48044010e-03 ... 3.38006166e-03
  6.70736988e-05 7.89558228e-03]]
[[3.85483568e-02 1.26562562e-03 2.94454989e-03 ... 3.67678543e-03
  6.43067324e-05 7.15621970e-03]
 [2.92509147e-03 3.30439238e-01 8.98249683e-04 ... 1.36995131e-03
  1.46711366e-04 4.11178059e-03]
 [2.45848777e-03 3.76611044e-04 5.78203539e-03 ... 4.61241612e-03
  1.36869452e-05 2.54183389e-03]
 ...
 [3.20427302e-03 5.72960206e-04 4.61596080e-03 ... 4.64606558e-03
  2.19908386e-05 3.41795578e-03]

[[3.39537656e-02 1.34959049e-03 3.03283664e-03 ... 3.74484588e-03
  6.93297480e-05 7.14074952e-03]
 [3.04682500e-03 3.22683728e-01 9.67992242e-04 ... 1.45088626e-03
  1.52401871e-04 4.17315549e-03]
 [2.58145274e-03 4.21582941e-04 5.49696308e-03 ... 4.58242795e-03
  1.58677947e-05 2.65385032e-03]
 ...
 [3.30950870e-03 6.24941110e-04 4.59127288e-03 ... 4.58519669e-03
  2.47750805e-05 3.50364456e-03]
 [6.70552675e-04 1.04436367e-03 1.40326259e-04 ... 2.59897813e-04
  4.79085884e-01 9.08087460e-04]
 [6.76549288e-03 1.68158751e-03 2.64892751e-03 ... 3.50779629e-03
  7.46714130e-05 7.77980899e-03]]
[[3.22705177e-02 1.38281634e-03 3.06758022e-03 ... 3.77135092e-03
  7.13528901e-05 7.13372944e-03]
 [3.09401643e-03 3.19622945e-01 9.96543791e-04 ... 1.48304701e-03
  1.54673679e-04 4.19634307e-03]
 [2.62954128e-03 4.39982300e-04 5.40052544e-03 ... 4.57017938e-03
  1.67792046e-05 2.69779903e-03]
 ...
 [3.35022537e-03 6.45910331e-04 4.58095248e-03 ... 4.56363924e-03
  2.59209001e-05 3.53682374e-03]

[[2.92771825e-02 1.44826939e-03 3.13555186e-03 ... 3.82258161e-03
  7.54330501e-05 7.11774362e-03]
 [3.18448047e-03 3.13610659e-01 1.05513240e-03 ... 1.54677189e-03
  1.59205366e-04 4.23943330e-03]
 [2.72280314e-03 4.77801664e-04 5.24492791e-03 ... 4.54552066e-03
  1.87050660e-05 2.78336036e-03]
 ...
 [3.42816331e-03 6.88276893e-04 4.55966182e-03 ... 4.52664277e-03
  2.82964073e-05 3.60043579e-03]
 [6.99429580e-04 1.05241753e-03 1.58963356e-04 ... 2.79917556e-04
  4.77755398e-01 9.23016641e-04]
 [6.78679833e-03 1.77075525e-03 2.78628004e-03 ... 3.60913195e-03
  8.12165360e-05 7.68611695e-03]]
[[2.78819028e-02 1.48068847e-03 3.16908637e-03 ... 3.84769869e-03
  7.74823804e-05 7.10924485e-03]
 [3.22856730e-03 3.10636837e-01 1.08480300e-03 ... 1.57844918e-03
  1.61467593e-04 4.26008870e-03]
 [2.76856825e-03 4.96996053e-04 5.17604164e-03 ... 4.53326381e-03
  1.96984214e-05 2.82543550e-03]
 ...
 [3.46613488e-03 7.09576525e-04 4.54895065e-03 ... 4.50967934e-03
  2.95087184e-05 3.63146927e-03]

<__main__.SimilarityNetworkFusion at 0x7f8818004400>

In [13]:
sm.p_rna

array([[0.00727313, 0.00697518, 0.00727846, ..., 0.00767245, 0.00737386,
        0.00728349],
       [0.00697907, 0.00812065, 0.0070112 , ..., 0.0076397 , 0.00709155,
        0.00700686],
       [0.00724265, 0.00697465, 0.00728557, ..., 0.00782442, 0.00741567,
        0.00730123],
       ...,
       [0.00723322, 0.00723688, 0.00743928, ..., 0.02232095, 0.0082115 ,
        0.00779819],
       [0.00734297, 0.00706106, 0.00742194, ..., 0.00865569, 0.00848779,
        0.00748583],
       [0.0072495 , 0.0069726 , 0.00730439, ..., 0.00818436, 0.00749522,
        0.0073428 ]])

In [15]:
sm.p_mirna

array([[0.00725825, 0.00698381, 0.00726324, ..., 0.00769989, 0.00736615,
        0.00727008],
       [0.00698737, 0.00798822, 0.00701627, ..., 0.00767483, 0.00710588,
        0.00701514],
       [0.00722935, 0.00698155, 0.0072683 , ..., 0.00784196, 0.00740165,
        0.00728495],
       ...,
       [0.00726631, 0.00727513, 0.00745961, ..., 0.02170023, 0.00821215,
        0.00780294],
       [0.0073352 , 0.00707537, 0.0074058 , ..., 0.0086476 , 0.00836591,
        0.00746676],
       [0.00723791, 0.0069826 , 0.0072879 , ..., 0.00818676, 0.00747681,
        0.00732598]])

In [16]:
sm.p_illumina

array([[0.00727313, 0.00697518, 0.00727846, ..., 0.00767245, 0.00737386,
        0.00728349],
       [0.00697907, 0.00812065, 0.0070112 , ..., 0.0076397 , 0.00709155,
        0.00700686],
       [0.00724265, 0.00697465, 0.00728557, ..., 0.00782442, 0.00741567,
        0.00730123],
       ...,
       [0.00723322, 0.00723688, 0.00743928, ..., 0.02232095, 0.0082115 ,
        0.00779819],
       [0.00734297, 0.00706106, 0.00742194, ..., 0.00865569, 0.00848779,
        0.00748583],
       [0.0072495 , 0.0069726 , 0.00730439, ..., 0.00818436, 0.00749522,
        0.0073428 ]])

# Loading the label

In [32]:
y_illumina = LabelEncoder().fit_transform(df_illumina.drop_duplicates(subset='case_id').loc[:, 'label'].transform(lambda x: str(x)))
y_mirna = LabelEncoder().fit_transform(df_mirna.drop_duplicates(subset='case_id').loc[:, 'label'].transform(lambda x:  str(x)))
y_rna = LabelEncoder().fit_transform(df_rna.drop_duplicates(subset='case_id').loc[:, 'label'].transform(lambda x: str(x)))

In [33]:
y_pred = SpectralClustering(n_clusters=3).fit_predict(sm.p_mirna)



In [34]:
print('illumina', adjusted_rand_score(y_illumina, y_pred))
print('Mirna', adjusted_rand_score(y_mirna, y_pred))
print('RNA:', adjusted_rand_score(y_rna, y_pred))

illumina 0.5158054886960569
Mirna 0.48859281382620695
RNA: 0.529657406271893
