In [1]:
import sys
sys.path.insert(0, '../../')
import ccal
%matplotlib inline
%config InlineBackend.figure_formats = {'svg',}

import numpy as np
import pandas as pd

from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import linkage, fcluster, cophenet
from scipy.spatial.distance import pdist

<20:14:41> Checking dependencies ...
<20:14:41> Using the following packages:
<20:14:41> 	matplotlib (v1.5.1)
<20:14:41> 	numpy (v1.10.4)
<20:14:41> 	pandas (v0.18.0)
<20:14:41> 	rpy2 (v2.7.9)
<20:14:41> 	scikit-learn (v0.17.1)
<20:14:41> 	scipy (v0.17.0)
<20:14:41> 	seaborn (v0.7.0)


In [2]:
kmin= 1
kmax = 5
kincrement = 1
nclustering = 3
h_matrix = ccal.support.read_gct('../../../hematopoietic_cancer/result/ccle_hema/k9/ccle_hema_k_9_H.gct')

In [12]:
information_coefficient = ccal.information.information_coefficient
print_log = print

def compare_matrices(matrix1, matrix2, axis=0, function=information_coefficient, is_distance=False):
    if axis is 1:
        matrix1 = matrix1.T
        matrix2 = matrix2.T
        
    compared_matrix = pd.DataFrame(index=matrix1.index, columns=matrix2.index, dtype=float)
    nrow = matrix1.shape[0]
    for i, (i1, r1) in enumerate(matrix1.iterrows()):
        print_log('Comparing {} ({}/{}) vs. ...'.format(i1, i + 1, nrow))
        for i2, r2 in matrix2.iterrows():
            compared_matrix.ix[i1, i2] = function(r1, r2)
    
    if is_distance:
        print_log('Converting association to is_distance (is_distance = 1 - association) ...')
        compared_matrix = 1 - compared_matrix

    return compared_matrix

In [13]:
results = pd.DataFrame(index=range(kmin, kmax + 1, kincrement), columns=list(h_matrix.columns) + ['cophenetic_correlation'])

standardized_h_matrix = ccal.support.standardize_pandas_object(h_matrix)
standardized_and_clipped_h_matrix = standardized_h_matrix.clip(-3, 3)
compared_matrix = compare_matrices(standardized_and_clipped_h_matrix, standardized_and_clipped_h_matrix, axis=1)

for k in range(kmin, kmax + 1, kincrement):
    labels = pd.DataFrame(index=range(nclustering), columns=compared_matrix.index)
    for i in range(nclustering):
        ward = AgglomerativeClustering(n_clusters=k)
        ward.fit(compared_matrix)
        labels.iloc[i, :] = ward.labels_

    cooccurences = pd.DataFrame(index=compared_matrix.index, columns=compared_matrix.columns)
    cooccurences.fillna(0, inplace=True)
    for n, s in labels.iterrows():
        for i in s.index:
            for j in s.index:
                if i is j or s.ix[i] == s.ix[j]:
                    cooccurences.ix[i, j] += 1
    cooccurences /= nclustering

    distances = 1 - cooccurences
    link = linkage(distances, method='ward')
    labels = fcluster(link, k, criterion='maxclust')
    cophenetic_correlation = cophenet(link, pdist(distances))[0]
    results.ix[k, h_matrix.columns] = labels
    results.ix[k, 'cophenetic_correlation'] = cophenetic_correlation

results

Comparing A3KAW_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE (1/166) vs. ...
Comparing A4FUK_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE (2/166) vs. ...
Comparing ALLSIL_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE (3/166) vs. ...
Comparing AML193_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE (4/166) vs. ...
Comparing AMO1_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE (5/166) vs. ...
Comparing BCP1_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE (6/166) vs. ...
Comparing BDCM_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE (7/166) vs. ...
Comparing BL41_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE (8/166) vs. ...
Comparing BL70_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE (9/166) vs. ...
Comparing BV173_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE (10/166) vs. ...
Comparing CA46_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE (11/166) vs. ...
Comparing CI1_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE (12/166) vs. ...
Comparing CMK_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE (13/166) vs. ...
Comparing CMLT1_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE (14/166) vs. ...
Comparing DAUDI_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE (15/166) vs. .

Unnamed: 0,A3KAW_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,A4FUK_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,ALLSIL_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,AML193_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,AMO1_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,BCP1_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,BDCM_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,BL41_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,BL70_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,BV173_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,...,SUPT11_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,SUPT1_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,TALL1_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,TF1_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,THP1_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,TO175T_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,TOLEDO_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,U266B1_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,U937_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,WSUDLCL2_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE
A3KAW_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,0.890544,0.786964,-0.163344,-0.175238,0.668492,0.869617,-0.211889,-0.178949,-0.202694,-0.178307,...,0.210828,-0.146586,-0.166222,-0.212229,-0.188871,0.288938,-0.274548,0.748788,-0.200004,-0.254445
A4FUK_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,0.786964,0.817255,-0.156255,-0.132414,0.642574,0.722474,-0.119722,-0.182060,-0.187354,-0.159538,...,-0.156386,-0.142303,-0.161957,-0.168078,-0.170595,-0.136060,-0.254481,0.678573,-0.150485,-0.233603
ALLSIL_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,-0.163344,-0.156255,0.754743,0.123304,-0.144550,-0.159965,-0.132696,-0.162177,0.247002,-0.179169,...,0.588236,0.661440,0.682985,-0.140683,-0.175488,-0.133060,-0.183835,-0.151221,0.120236,-0.150427
AML193_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,-0.175238,-0.132414,0.123304,0.788557,-0.152896,-0.224802,-0.126899,-0.209823,-0.225861,-0.134439,...,-0.187444,-0.155448,-0.145684,-0.127299,0.622999,-0.162680,-0.269394,-0.178782,0.740799,-0.231925
AMO1_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,0.668492,0.642574,-0.144550,-0.152896,0.768953,0.707732,-0.119336,0.129728,-0.135188,-0.156899,...,0.177389,-0.130232,-0.151643,-0.168631,-0.195647,0.177749,-0.180471,0.782846,-0.167985,-0.206238
BCP1_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,0.869617,0.722474,-0.159965,-0.224802,0.707732,0.921583,-0.243404,-0.193887,-0.223186,-0.177182,...,0.359290,0.158913,0.147367,-0.236080,-0.208029,0.503738,-0.282586,0.815901,-0.227683,-0.253689
BDCM_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,-0.211889,-0.119722,-0.132696,-0.126899,-0.119336,-0.243404,0.703614,-0.188265,-0.185047,-0.142129,...,-0.250067,-0.172875,-0.159343,-0.150608,-0.195750,-0.147596,-0.172802,-0.128717,-0.129965,-0.150059
BL41_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,-0.178949,-0.182060,-0.162177,-0.209823,0.129728,-0.193887,-0.188265,0.748371,0.676684,-0.110583,...,0.142313,0.128546,-0.145370,-0.158004,-0.205447,-0.128624,0.602885,-0.153591,-0.187396,0.607194
BL70_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,-0.202694,-0.187354,0.247002,-0.225861,-0.135188,-0.223186,-0.185047,0.676684,0.849124,-0.162809,...,0.268318,0.279121,0.270454,-0.162267,-0.213946,-0.142469,0.636792,-0.165869,-0.204305,0.718200
BV173_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,-0.178307,-0.159538,-0.179169,-0.134439,-0.156899,-0.177182,-0.142129,-0.110583,-0.162809,0.655264,...,-0.162384,-0.150669,-0.150490,-0.134542,-0.174724,0.148864,0.546415,-0.149287,-0.144355,-0.180107
