In [1]:
import matplotlib.pyplot as plt
import numpy as np
import json
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score

from src.clustering.kernel_kmeans import KernelKMeans
from src.clustering.ward import Ward
from src.graph.graph_generator import GraphGenerator
from src.kernel.kernel import get_all_kernels
from src.dataset.dataset import football, polbooks, polblogs, zachary, news

all_data = [football, polbooks, zachary]
# all_data += news
# all_data = [zachary]

for data in all_data:
    graph, info = data
    print('dataset ' + info['name'] + ' started!')
    print('n_clusters = ' + str(info['k']))
    kmeans_results_ari = {}
    # kmeans_results_nmi = {}
    ward_results_ari = {}
    # ward_results_nmi = {}
    for kernel_class in get_all_kernels():
        kmeans_results_ari[kernel_class.name] = {}
        # kmeans_results_nmi[kernel_class.name] = {}
        ward_results_ari[kernel_class.name] = {}
        # ward_results_nmi[kernel_class.name] = {}
        nodes = graph.nodes
        edges = graph.edges
        kmeans_ari_s = []
        # kmeans_nmi_s = []
        ward_ari_s = []
        # ward_nmi_s = []
        kernel = kernel_class(edges)
        for K in kernel.get_Ks():
            kmeans_prediction = KernelKMeans(n_clusters=info['k'], random_state=0).fit_predict(K)
            kmeans_ari = adjusted_rand_score(nodes, kmeans_prediction)
            kmeans_ari_s.append(kmeans_ari)
            # kmeans_nmi = normalized_mutual_info_score(nodes, kmeans_prediction)
            # kmeans_nmi_s.append(kmeans_nmi)

            ward_prediction = Ward(n_clusters=info['k']).fit_predict(K)
            ward_ari = adjusted_rand_score(nodes, ward_prediction)
            ward_ari_s.append(ward_ari)
            # ward_nmi = normalized_mutual_info_score(nodes, ward_prediction)
            # ward_nmi_s.append(ward_nmi)

        kmeans_results_ari[kernel_class.name] = kmeans_ari_s
        # kmeans_results_nmi[kernel_class.name] = kmeans_nmi_s
        ward_results_ari[kernel_class.name] = ward_ari_s
        # ward_results_nmi[kernel_class.name] = ward_nmi_s
        print(info['name'] + ' K-means ari ' + kernel_class.name + ' ' 
              + str(np.mean(kmeans_results_ari[kernel_class.name])))
        # print(info['name'] + ' K-means nmi ' + kernel_class.name + ' ' 
        #       + str(np.mean(kmeans_results_nmi[kernel_class.name])))
        print(info['name'] + ' Ward ari ' + kernel_class.name + ' ' 
              + str(np.mean(ward_results_ari[kernel_class.name])))
        # print(info['name'] + ' Ward nmi ' + kernel_class.name + ' ' 
        #       + str(np.mean(ward_results_nmi[kernel_class.name])))
    with open('src/results/my/datasets/ari/' + 'K_means_' + info['name'] + '_ari.txt', 'w') as file:
        file.write(json.dumps(kmeans_results_ari))
    with open('src/results/my/datasets/ari/' + 'Ward_' + info['name'] + '_ari.txt', 'w') as file:
        file.write(json.dumps(ward_results_ari))
    # with open('src/results/my/datasets/nmi/' + 'K_means_' + info['name'] + '_nmi.txt', 'w') as file:
    #     file.write(json.dumps(kmeans_results_nmi))
    # with open('src/results/my/datasets/nmi/' + 'Ward_' + info['name'] + '_nmi.txt', 'w') as file:
        # file.write(json.dumps(ward_results_nmi))
    print('dataset ' + info['name'] + ' finished!')

dataset football started!
n_clusters = 12


football K-means ari PlainWalk -0.0140042750261
football Ward ari PlainWalk 0.898433931748


football K-means ari LogPlainWalk 0.650043664517
football Ward ari LogPlainWalk 0.896650009791


football K-means ari Communicability 0.258494910251
football Ward ari Communicability 0.808187623316


football K-means ari LogCommunicability 0.258494910251
football Ward ari LogCommunicability 0.808187623316


football K-means ari Forest 0.00214699312187
football Ward ari Forest 0.89448553996


football K-means ari LogForest 0.00214699312187
football Ward ari LogForest 0.89448553996


football K-means ari Heat 0.292455853023
football Ward ari Heat 0.841575399192


football K-means ari LogHeat 0.292455853023
football Ward ari LogHeat 0.841575399192


football K-means ari SigmoidCommuteTime 0.181941217566
football Ward ari SigmoidCommuteTime 0.896650009791
dataset football finished!
dataset polbooks started!
n_clusters = 3


polbooks K-means ari PlainWalk -0.00519776108604
polbooks Ward ari PlainWalk 0.437153872107


polbooks K-means ari LogPlainWalk 0.564095379817
polbooks Ward ari LogPlainWalk 0.70288048497


polbooks K-means ari Communicability 0.106894412127
polbooks Ward ari Communicability 0.324358090428


polbooks K-means ari LogCommunicability 0.106894412127
polbooks Ward ari LogCommunicability 0.324358090428


polbooks K-means ari Forest 0.186359912778
polbooks Ward ari Forest 0.625136066334


polbooks K-means ari LogForest 0.186359912778
polbooks Ward ari LogForest 0.625136066334


polbooks K-means ari Heat 0.450394325539
polbooks Ward ari Heat 0.659769431356


polbooks K-means ari LogHeat 0.450394325539
polbooks Ward ari LogHeat 0.659769431356


polbooks K-means ari SigmoidCommuteTime 0.681502327056
polbooks Ward ari SigmoidCommuteTime 0.659846736512
dataset polbooks finished!
dataset zachary started!
n_clusters = 2


zachary K-means ari PlainWalk -0.00667219024888
zachary Ward ari PlainWalk 0.472484613554


zachary K-means ari LogPlainWalk 0.97521211397
zachary Ward ari LogPlainWalk 0.976566857093


zachary K-means ari Communicability 0.344328483447
zachary Ward ari Communicability 0.469781533323


zachary K-means ari LogCommunicability 0.344328483447
zachary Ward ari LogCommunicability 0.469781533323


zachary K-means ari Forest 0.098289016226
zachary Ward ari Forest 0.119943275417


zachary K-means ari LogForest 0.098289016226
zachary Ward ari LogForest 0.119943275417


zachary K-means ari Heat 0.36614452661
zachary Ward ari Heat 0.298531833802


zachary K-means ari LogHeat 0.36614452661
zachary Ward ari LogHeat 0.298531833802


zachary K-means ari SigmoidCommuteTime 0.827657939297
zachary Ward ari SigmoidCommuteTime 0.72689327858
dataset zachary finished!


In [5]:
from src.kernel.kernel import Kernel
import json
import matplotlib.pyplot as plt

Ward_name = 'Ward'
Kmeans_name = 'K_means'
with open('src/results/my/datasets/ari/' + Kmeans_name +'_football_ari.txt', 'r') as file:
    results = json.loads(file.read())
    print(results)
    plt.figure(figsize=(10,10)) 
    for transformation_name, resulting_score in results.items():
        # buf = np.sort(resulting_score, kind='mergesort').tolist()
        # buf.reverse()
        plt.plot(Kernel.default_params, resulting_score,
                 label=transformation_name)
    plt.title('Title')
    plt.legend()
    plt.savefig('src/results/my/datasets/img/' + Kmeans_name + '_football_ari' + '.png')
    # plt.show()

{'PlainWalk': [0.031209326810705224, 0.008007451551282279, -0.008481855861564974, -0.011084129004621905, 0.007869845735078897, -0.017590420399125204, 0.0034865097922978107, -0.0030663057475008347, 0.027565783803665606, 0.016561362317620773, -9.823883978350951e-05, 0.007720982775715777, -0.008853133292428294, -0.007414516195133953, -0.012521819446488894, 0.011358505085188784, -0.006945195614553472, 0.009397825660322793, 0.0014728967310356598, -0.00410840538100442, 0.019152767277643763, 0.013983323123149182, 0.0042118248833109264, 0.0011413649170717653, -0.0010186009888851696, 0.015079617371549432, -0.0024949050267984714, 0.019447766428832152, 0.009378130758124192, 0.021000564064737274, 0.0063065277223926285, 0.007497233040331507, -0.007421267361070235, 0.0208449005245046, -0.00042790522261950564, -0.001079618673797379, -2.1406123525154384e-05, -0.0184173147681282], 'LogPlainWalk': [0.6880099933511978, 0.7481866364587949, 0.6244518036729573, 0.6890775929564003, 0.8966500097910517, 0.7558

In [None]:
files = ['football', 'polbooks', 'zachary']
