In [3]:
import matplotlib.pyplot as plt
import numpy as np
import json
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score

from src.clustering.kernel_kmeans import KernelKMeans
from src.clustering.ward import Ward
from src.graph.graph_generator import GraphGenerator
from src.kernel.kernel import get_all_kernels
from src.dataset.dataset import football, polbooks, polblogs, zachary, news

all_data = [football, polbooks, zachary]
# all_data += news

for data in all_data:
    graph, info = data
    print('dataset ' + info['name'] + ' started!')
    print('n_clusters = ' + str(info['k']))
    kmeans_results_ari = {}
    kmeans_results_nmi = {}
    ward_results_ari = {}
    ward_results_nmi = {}
    for kernel_class in get_all_kernels():
        kmeans_results_ari[kernel_class.name] = {}
        kmeans_results_nmi[kernel_class.name] = {}
        ward_results_ari[kernel_class.name] = {}
        ward_results_nmi[kernel_class.name] = {}
        nodes = graph.nodes
        edges = graph.edges
        kmeans_ari_s = []
        kmeans_nmi_s = []
        ward_ari_s = []
        ward_nmi_s = []
        kernel = kernel_class(edges)
        for K in kernel.get_Ks():
            kmeans_prediction = KernelKMeans(n_clusters=info['k']).fit_predict(K)
            kmeans_ari = adjusted_rand_score(nodes, kmeans_prediction)
            kmeans_ari_s.append(kmeans_ari)
            kmeans_nmi = normalized_mutual_info_score(nodes, kmeans_prediction)
            kmeans_nmi_s.append(kmeans_nmi)

            ward_prediction = Ward(n_clusters=info['k']).fit_predict(K)
            ward_ari = adjusted_rand_score(nodes, ward_prediction)
            ward_ari_s.append(ward_ari)
            ward_nmi = normalized_mutual_info_score(nodes, ward_prediction)
            ward_nmi_s.append(ward_nmi)

        kmeans_results_ari[kernel_class.name] = kmeans_ari_s
        kmeans_results_nmi[kernel_class.name] = kmeans_nmi_s
        ward_results_ari[kernel_class.name] = ward_ari_s
        ward_results_nmi[kernel_class.name] = ward_nmi_s
        print(info['name'] + ' K-means ari ' + kernel_class.name + ' ' 
              + str(np.mean(kmeans_results_ari[kernel_class.name])))
        print(info['name'] + ' K-means nmi ' + kernel_class.name + ' ' 
              + str(np.mean(kmeans_results_nmi[kernel_class.name])))
        print(info['name'] + ' Ward ari ' + kernel_class.name + ' ' 
              + str(np.mean(ward_results_ari[kernel_class.name])))
        print(info['name'] + ' Ward nmi ' + kernel_class.name + ' ' 
              + str(np.mean(ward_results_nmi[kernel_class.name])))
    with open('src/results/my/datasets/ari/' + 'K_means_' + info['name'] + '_ari.txt', 'w') as file:
        file.write(json.dumps(kmeans_results_ari))
    with open('src/results/my/datasets/ari/' + 'Ward_' + info['name'] + '_ari.txt', 'w') as file:
        file.write(json.dumps(ward_results_ari))
    with open('src/results/my/datasets/nmi/' + 'K_means_' + info['name'] + '_nmi.txt', 'w') as file:
        file.write(json.dumps(kmeans_results_nmi))
    with open('src/results/my/datasets/nmi/' + 'Ward_' + info['name'] + '_nmi.txt', 'w') as file:
        file.write(json.dumps(ward_results_nmi))
    print('dataset ' + info['name'] + ' finished!')

dataset football started!
n_clusters = 12


football K-means ari PlainWalk -0.0011242255411
football K-means nmi PlainWalk 0.244899469058
football Ward ari PlainWalk 0.898433931748
football Ward nmi PlainWalk 0.925414357428


football K-means ari LogPlainWalk 0.679992293874
football K-means nmi LogPlainWalk 0.772912925118
football Ward ari LogPlainWalk 0.896650009791
football Ward nmi LogPlainWalk 0.92419578687


football K-means ari Communicability 0.332443421502
football K-means nmi Communicability 0.53117871166
football Ward ari Communicability 0.808187623316
football Ward nmi Communicability 0.874302487254


football K-means ari LogCommunicability 0.416881078113
football K-means nmi LogCommunicability 0.593152516114
football Ward ari LogCommunicability 0.808187623316
football Ward nmi LogCommunicability 0.874302487254


football K-means ari Forest 0.00110986642525
football K-means nmi Forest 0.24804301595
football Ward ari Forest 0.89448553996
football Ward nmi Forest 0.922945168692


football K-means ari LogForest 0.00298002824236
football K-means nmi LogForest 0.248529125352
football Ward ari LogForest 0.89448553996
football Ward nmi LogForest 0.922945168692


football K-means ari Heat 0.34812906057
football K-means nmi Heat 0.533529250297
football Ward ari Heat 0.841575399192
football Ward nmi Heat 0.893600303894


football K-means ari LogHeat 0.289783128822
football K-means nmi LogHeat 0.446300304348
football Ward ari LogHeat 0.841575399192
football Ward nmi LogHeat 0.893600303894


football K-means ari SigmoidCommuteTime 0.144293993568
football K-means nmi SigmoidCommuteTime 0.368653813779
football Ward ari SigmoidCommuteTime 0.896650009791
football Ward nmi SigmoidCommuteTime 0.92419578687
dataset football finished!
dataset polbooks started!
n_clusters = 3


polbooks K-means ari PlainWalk 0.0331940911566
polbooks K-means nmi PlainWalk 0.0645720813427
polbooks Ward ari PlainWalk 0.437153872107
polbooks Ward nmi PlainWalk 0.444334976393


polbooks K-means ari LogPlainWalk 0.621438567793
polbooks K-means nmi LogPlainWalk 0.547344951514
polbooks Ward ari LogPlainWalk 0.70288048497
polbooks Ward nmi LogPlainWalk 0.605314795408


polbooks K-means ari Communicability 0.158883522085
polbooks K-means nmi Communicability 0.284265162894
polbooks Ward ari Communicability 0.324358090428
polbooks Ward nmi Communicability 0.39008244405


polbooks K-means ari LogCommunicability 0.153077075139
polbooks K-means nmi LogCommunicability 0.270879233115
polbooks Ward ari LogCommunicability 0.324358090428
polbooks Ward nmi LogCommunicability 0.39008244405


polbooks K-means ari Forest 0.259668227868
polbooks K-means nmi Forest 0.235893562697
polbooks Ward ari Forest 0.625136066334
polbooks Ward nmi Forest 0.560194730108


polbooks K-means ari LogForest 0.26184994569
polbooks K-means nmi LogForest 0.240862005946
polbooks Ward ari LogForest 0.625136066334
polbooks Ward nmi LogForest 0.560194730108


polbooks K-means ari Heat 0.45684089062
polbooks K-means nmi Heat 0.40358227787
polbooks Ward ari Heat 0.659769431356
polbooks Ward nmi Heat 0.583850552505


polbooks K-means ari LogHeat 0.457871231844
polbooks K-means nmi LogHeat 0.399748130691
polbooks Ward ari LogHeat 0.659769431356
polbooks Ward nmi LogHeat 0.583850552505


polbooks K-means ari SigmoidCommuteTime 0.629591635137
polbooks K-means nmi SigmoidCommuteTime 0.552265327919
polbooks Ward ari SigmoidCommuteTime 0.659846736512
polbooks Ward nmi SigmoidCommuteTime 0.5664190318
dataset polbooks finished!
dataset zachary started!
n_clusters = 2


zachary K-means ari PlainWalk 0.00634022316262
zachary K-means nmi PlainWalk 0.0275992991591
zachary Ward ari PlainWalk 0.472484613554
zachary Ward nmi PlainWalk 0.509664003718


zachary K-means ari LogPlainWalk 0.84658129246
zachary K-means nmi LogPlainWalk 0.830870765443
zachary Ward ari LogPlainWalk 0.976566857093
zachary Ward nmi LogPlainWalk 0.97982072783


zachary K-means ari Communicability 0.147559200456
zachary K-means nmi Communicability 0.16024002945
zachary Ward ari Communicability 0.469781533323
zachary Ward nmi Communicability 0.479491903522


zachary K-means ari LogCommunicability 0.1571909914
zachary K-means nmi LogCommunicability 0.166661097981
zachary Ward ari LogCommunicability 0.469781533323
zachary Ward nmi LogCommunicability 0.479491903522


zachary K-means ari Forest 0.078740550755
zachary K-means nmi Forest 0.0965924324517
zachary Ward ari Forest 0.119943275417
zachary Ward nmi Forest 0.242497336648


zachary K-means ari LogForest 0.0673860504609
zachary K-means nmi LogForest 0.0841026060842
zachary Ward ari LogForest 0.119943275417
zachary Ward nmi LogForest 0.242497336648


zachary K-means ari Heat 0.307382283837
zachary K-means nmi Heat 0.310525897861
zachary Ward ari Heat 0.298531833802
zachary Ward nmi Heat 0.394456946089


zachary K-means ari LogHeat 0.318302703381
zachary K-means nmi LogHeat 0.315144911692
zachary Ward ari LogHeat 0.298531833802
zachary Ward nmi LogHeat 0.394456946089


zachary K-means ari SigmoidCommuteTime 0.56235600135
zachary K-means nmi SigmoidCommuteTime 0.564452565009
zachary Ward ari SigmoidCommuteTime 0.72689327858
zachary Ward nmi SigmoidCommuteTime 0.76508959548
dataset zachary finished!


In [9]:
from src.kernel.kernel import Kernel

with open('src/results/my/datasets/nmi/K_means_polbooks_nmi.txt', 'r') as file:
    results = json.loads(file.read())
    print(results['LogPlainWalk'])
    plt.figure(figsize=(10,10)) 
    for transformation_name, resulting_score in results.items():
        plt.plot(Kernel.default_params, resulting_score, label=transformation_name)
    plt.title(kernel_class.name)
    plt.legend()
    plt.show()

[0.5743019767841789, 0.5328509885596608, 0.5556612695352728, 0.5725050109575065, 0.5434606061016177, 0.541043494249718, 0.5231588812045306, 0.5527834181253501, 0.5809479415564024, 0.541043494249718, 0.541043494249718, 0.5341861009737661, 0.5341861009737661, 0.5332563534780259, 0.541043494249718, 0.5296399216841381, 0.513198625549913, 0.541043494249718, 0.5725050109575065, 0.5827448728791333, 0.5278553576273091, 0.5827448728791333, 0.5472400895493607, 0.5277141556414882, 0.5550974100832172, 0.5683585183745067, 0.5827448728791333, 0.581444703800529, 0.5245149599570552, 0.5245149599570553, 0.5169760859761391, 0.597596747530739, 0.5416809295324339, 0.5245149599570552, 0.5416809295324339, 0.5136657322781998, 0.5365371004601733, 0.5636212209383146]
