In [6]:
import matplotlib.pyplot as plt
import numpy as np
import json
from sklearn.metrics import adjusted_rand_score

from src.clustering.ward import Ward
from src.clustering.kernel_kmeans import KernelKMeans
from src.graph.graph_generator import GraphGenerator
from src.kernel.kernel import get_all_kernels

n_clusters = 3
generator = GraphGenerator(100, n_clusters, 0.3, 0.1)

for kernel_class in get_all_kernels():
    results = {}
    all_scores = []
    for i in range(0, 20):
        graph = generator.generate_graph()
        nodes = graph.nodes
        edges = graph.edges
        scores = []
        kernel = kernel_class(edges)
        for K in kernel.get_Ks():
            prediction = KernelKMeans(n_clusters).fit_predict(K)
            score = adjusted_rand_score(nodes, prediction)
            scores.append(score)
        all_scores.append(scores)
    all_scores = [list(i) for i in zip(*all_scores)]
    resulting_score = [abs(np.mean(x)) for x in all_scores]
    results[kernel_class.name] = resulting_score
    
    # with open('src/results/my/generated_graphs/' + 'Kmeans_' 
    #           + kernel_class.name + '.txt', 'w') as file:
    #     file.write(json.dumps(results))
       
    plt.figure(figsize=(10,10))
    for kernel_name, resulting_score in results.items():
        print(kernel_name, 'best result: ', np.max(resulting_score), 
              'mean result: ', np.mean(resulting_score))
        plt.plot(kernel_class.default_params, resulting_score, label=kernel_name)
    plt.title('Kmeans ' + kernel_class.name + ', n_clusters=' + str(n_clusters))
    plt.legend()
    plt.show()

PlainWalk best result:  0.00673930032385 mean result:  0.00265168740324


KeyboardInterrupt: 

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import json
from sklearn.metrics import adjusted_rand_score

from src.clustering.ward import Ward
from src.clustering.kernel_kmeans import KernelKMeans
from src.graph.graph_generator import GraphGenerator
from src.kernel.kernel import get_all_kernels

n_clusters = 3
generator = GraphGenerator(100, n_clusters, 0.3, 0.05)

for kernel_class in get_all_kernels():
    results = {}
    all_scores = []
    for i in range(0, 10):
        graph = generator.generate_graph()
        nodes = graph.nodes
        edges = graph.edges
        scores = []
        kernel = kernel_class(edges)
        for K in kernel.get_Ks():
            prediction = Ward(n_clusters).fit_predict(K)
            score = adjusted_rand_score(nodes, prediction)
            scores.append(score)
        all_scores.append(scores)
    all_scores = [list(i) for i in zip(*all_scores)]
    resulting_score = [abs(np.mean(x)) for x in all_scores]
    results[kernel_class.name] = resulting_score
    
    # with open('src/results/my/generated_graphs/' + 'Kmeans_' 
    #           + kernel_class.name + '.txt', 'w') as file:
    #     file.write(json.dumps(results))
       
    plt.figure(figsize=(10,10))
    for kernel_name, resulting_score in results.items():
        print(kernel_name, 'best result: ', np.max(resulting_score),
              'mean result: ', np.mean(resulting_score))
        plt.plot(kernel_class.default_params, resulting_score, label=kernel_name)
    plt.title('Ward ' + kernel_class.name + ', n_clusters=' + str(n_clusters))
    plt.legend()
    plt.show()

PlainWalk best result:  0.972866729887 mean result:  0.9527858446


LogPlainWalk best result:  0.984850756344 mean result:  0.976602647077


Communicability best result:  0.997015482757 mean result:  0.631347738062


LogCommunicability best result:  0.996970049397 mean result:  0.906577448737


KeyboardInterrupt: 

In [5]:
import matplotlib.pyplot as plt
import numpy as np
import json
from sklearn.metrics import adjusted_rand_score

from sklearn.cluster.k_means_ import k_means
from sklearn.cluster import SpectralClustering
# from sklearn.cluster import AgglomerativeClustering
# from src.clustering.spectral_clustering import SpectralClustering
# from src.clustering.vanilla_kernel_kmeans import VanillaKernelKMeans
from src.clustering.ward import Ward
from src.clustering.kernel_kmeans import KernelKMeans
from src.graph.graph_generator import GraphGenerator
from src.kernel.kernel import get_all_kernels

log_kernels = ['LogPlainWalk', 'LogCommunicability', 'LogForest', 'LogHeat', 'LogSigmoidCommuteTime']

n_clusters = 3
generator = GraphGenerator(100, n_clusters, 0.3, 0.1)
graphs_num = 10

for kernel_class in get_all_kernels():
    if kernel_class.name in log_kernels:
        continue
    results = {}
    all_scores = []
    for i in range(graphs_num):
        graph = generator.generate_graph()
        nodes = graph.nodes
        edges = graph.edges
        scores = []
        kernel = kernel_class(edges)
        for K in kernel.get_Ks():
            # print('kernel_class.name = ', kernel_class.name, 'K = ', K)
            # prediction = SpectralClustering(n_clusters, random_state=0,
            #                                 affinity='precomputed').fit_predict(K)
            _, prediction, _ = k_means(K, n_clusters,
                                 precompute_distances=True, random_state=0)
            score = adjusted_rand_score(nodes, prediction)
            scores.append(score)
        all_scores.append(scores)
    all_scores = [list(i) for i in zip(*all_scores)]
    resulting_score = [abs(np.mean(x)) for x in all_scores]
    results[kernel_class.name] = resulting_score
    
    # with open('src/results/my/generated_graphs/' + 'Kmeans_' 
    #           + kernel_class.name + '.txt', 'w') as file:
    #     file.write(json.dumps(results))
       
    plt.figure(figsize=(10, 10))
    for kernel_name, resulting_score in results.items():
        print(kernel_name, 'best result: ', np.max(resulting_score),
              'mean result: ', np.mean(resulting_score))
        plt.plot(kernel_class.default_params, resulting_score, label=kernel_name)
    plt.title('SpectralClustering FULL ' + kernel_class.name + ', n_clusters=' + str(n_clusters))
    plt.legend()
    plt.show()



PlainWalk best result:  0.982091946275 mean result:  0.980588473568


Communicability best result:  0.99403191548 mean result:  0.518810696993


Forest best result:  0.98815282668 mean result:  0.987998286526


Heat best result:  0.987877303388 mean result:  0.86979000437


SigmoidCommuteTime best result:  0.979429335491 mean result:  0.979429335491


In [2]:
import matplotlib.pyplot as plt
import numpy as np
import json
from sklearn.metrics import adjusted_rand_score

from src.clustering.ward import Ward
from src.clustering.kernel_kmeans import KernelKMeans
from src.graph.graph_generator import GraphGenerator
from src.kernel.kernel import get_all_kernels

folder_name = '100_3_03_01/'
n_clusters = 3
n_nodes = 100
pin = 0.3
pout = 0.1
generator = GraphGenerator(n_nodes, n_clusters, pin, pout)

graphs_num = 50

print('100_3_03_01 is starting...')
for kernel_class in get_all_kernels():
    ward_results = {}
    ward_all_scores = []
    kmeans_results = {}
    kmeans_all_scores = []
    for i in range(0, graphs_num):
        graph = generator.generate_graph()
        nodes = graph.nodes
        edges = graph.edges
        ward_scores = []
        kmeans_scores = []
        kernel = kernel_class(edges)
        for K in kernel.get_Ks():
            prediction = Ward(n_clusters).fit_predict(K)
            ward_score = adjusted_rand_score(nodes, prediction)
            ward_scores.append(ward_score)
            
            prediction = KernelKMeans(n_clusters, random_state=0).fit_predict(K)
            kmeans_score = adjusted_rand_score(nodes, prediction)
            kmeans_scores.append(kmeans_score)
            
        ward_all_scores.append(ward_scores)
        kmeans_all_scores.append(kmeans_scores)
        
    ward_all_scores = [list(i) for i in zip(*ward_all_scores)]
    ward_resulting_score = [abs(np.mean(x)) for x in ward_all_scores]
    ward_results[kernel_class.name] = ward_resulting_score
    
    kmeans_all_scores = [list(i) for i in zip(*kmeans_all_scores)]
    kmeans_resulting_score = [abs(np.mean(x)) for x in kmeans_all_scores]
    kmeans_results[kernel_class.name] = kmeans_resulting_score
    
    with open('src/results/my/generated_graphs/' + folder_name
              + 'Ward_' + kernel_class.name + '.txt', 'w') as file:
        file.write(json.dumps(ward_results))
    with open('src/results/my/generated_graphs/' + folder_name 
              + 'KMeans_' + kernel_class.name + '.txt', 'w') as file:
        file.write(json.dumps(kmeans_results))
       
    plt.figure(figsize=(10, 10))
    for kernel_name, resulting_score in ward_results.items():
        print('Ward: ', kernel_name, 'best result: ', np.max(resulting_score))
        plt.plot(kernel_class.default_params, resulting_score, label='Ward')
    for kernel_name, resulting_score in kmeans_results.items():
        print('KMeans: ', kernel_name, 'best result: ', np.max(resulting_score))
        plt.plot(kernel_class.default_params, resulting_score, label='Kernel K-means')
    plt.title(kernel_class.name + ', n_clusters=' + str(n_clusters))
    plt.legend()
    plt.savefig('src/results/my/generated_graphs/' + folder_name + 'img/' 
                + kernel_class.name + '.png')
    # plt.show()
print('100_3_03_005 has finished!!!')



print('100_2_025_005 is starting...')
folder_name = '100_2_025_005/'
n_clusters = 2
n_nodes = 100
pin = 0.25
pout = 0.05
generator = GraphGenerator(n_nodes, n_clusters, pin, pout)

graphs_num = 50

for kernel_class in get_all_kernels():
    ward_results = {}
    ward_all_scores = []
    kmeans_results = {}
    kmeans_all_scores = []
    for i in range(0, graphs_num):
        graph = generator.generate_graph()
        nodes = graph.nodes
        edges = graph.edges
        ward_scores = []
        kmeans_scores = []
        kernel = kernel_class(edges)
        for K in kernel.get_Ks():
            prediction = Ward(n_clusters).fit_predict(K)
            ward_score = adjusted_rand_score(nodes, prediction)
            ward_scores.append(ward_score)
            
            prediction = KernelKMeans(n_clusters, random_state=0).fit_predict(K)
            kmeans_score = adjusted_rand_score(nodes, prediction)
            kmeans_scores.append(kmeans_score)
            
        ward_all_scores.append(ward_scores)
        kmeans_all_scores.append(kmeans_scores)
        
    ward_all_scores = [list(i) for i in zip(*ward_all_scores)]
    ward_resulting_score = [abs(np.mean(x)) for x in ward_all_scores]
    ward_results[kernel_class.name] = ward_resulting_score
    
    kmeans_all_scores = [list(i) for i in zip(*kmeans_all_scores)]
    kmeans_resulting_score = [abs(np.mean(x)) for x in kmeans_all_scores]
    kmeans_results[kernel_class.name] = kmeans_resulting_score
    
    with open('src/results/my/generated_graphs/' + folder_name
              + 'Ward_' + kernel_class.name + '.txt', 'w') as file:
        file.write(json.dumps(ward_results))
    with open('src/results/my/generated_graphs/' + folder_name 
              + 'KMeans_' + kernel_class.name + '.txt', 'w') as file:
        file.write(json.dumps(kmeans_results))
       
    plt.figure(figsize=(10, 10))
    for kernel_name, resulting_score in ward_results.items():
        print('Ward: ', kernel_name, 'best result: ', np.max(resulting_score))
        plt.plot(kernel_class.default_params, resulting_score, label='Ward')
    for kernel_name, resulting_score in kmeans_results.items():
        print('KMeans: ', kernel_name, 'best result: ', np.max(resulting_score))
        plt.plot(kernel_class.default_params, resulting_score, label='Kernel K-means')
    plt.title(kernel_class.name + ', n_clusters=' + str(n_clusters))
    plt.legend()
    plt.savefig('src/results/my/generated_graphs/' + folder_name + 'img/' 
                + kernel_class.name + '.png')
    # plt.show()
print('100_2_025_005 has finished!!!')


print('200_2_03_005 is starting...')
folder_name = '200_2_03_005/'
n_clusters = 2
n_nodes = 200
pin = 0.3
pout = 0.05
generator = GraphGenerator(n_nodes, n_clusters, pin, pout)

graphs_num = 50

for kernel_class in get_all_kernels():
    ward_results = {}
    ward_all_scores = []
    kmeans_results = {}
    kmeans_all_scores = []
    for i in range(0, graphs_num):
        graph = generator.generate_graph()
        nodes = graph.nodes
        edges = graph.edges
        ward_scores = []
        kmeans_scores = []
        kernel = kernel_class(edges)
        for K in kernel.get_Ks():
            prediction = Ward(n_clusters).fit_predict(K)
            ward_score = adjusted_rand_score(nodes, prediction)
            ward_scores.append(ward_score)
            
            prediction = KernelKMeans(n_clusters, random_state=0).fit_predict(K)
            kmeans_score = adjusted_rand_score(nodes, prediction)
            kmeans_scores.append(kmeans_score)
            
        ward_all_scores.append(ward_scores)
        kmeans_all_scores.append(kmeans_scores)
        
    ward_all_scores = [list(i) for i in zip(*ward_all_scores)]
    ward_resulting_score = [abs(np.mean(x)) for x in ward_all_scores]
    ward_results[kernel_class.name] = ward_resulting_score
    
    kmeans_all_scores = [list(i) for i in zip(*kmeans_all_scores)]
    kmeans_resulting_score = [abs(np.mean(x)) for x in kmeans_all_scores]
    kmeans_results[kernel_class.name] = kmeans_resulting_score
    
    with open('src/results/my/generated_graphs/' + folder_name
              + 'Ward_' + kernel_class.name + '.txt', 'w') as file:
        file.write(json.dumps(ward_results))
    with open('src/results/my/generated_graphs/' + folder_name 
              + 'KMeans_' + kernel_class.name + '.txt', 'w') as file:
        file.write(json.dumps(kmeans_results))
       
    plt.figure(figsize=(10, 10))
    for kernel_name, resulting_score in ward_results.items():
        print('Ward: ', kernel_name, 'best result: ', np.max(resulting_score))
        plt.plot(kernel_class.default_params, resulting_score, label='Ward')
    for kernel_name, resulting_score in kmeans_results.items():
        print('KMeans: ', kernel_name, 'best result: ', np.max(resulting_score))
        plt.plot(kernel_class.default_params, resulting_score, label='Kernel K-means')
    plt.title(kernel_class.name + ', n_clusters=' + str(n_clusters))
    plt.legend()
    plt.savefig('src/results/my/generated_graphs/' + folder_name + 'img/' 
                + kernel_class.name + '.png')
    # plt.show()
print('100_2_025_005 has finished!!!')

100_3_03_01 is starting...


Ward:  PlainWalk best result:  0.795003938758
KMeans:  PlainWalk best result:  0.0100807155925


Ward:  LogPlainWalk best result:  0.806059467134
KMeans:  LogPlainWalk best result:  0.773163975428


Ward:  Communicability best result:  0.871260602122
KMeans:  Communicability best result:  0.449949373586


Ward:  LogCommunicability best result:  0.937063231645
KMeans:  LogCommunicability best result:  0.809669561343


Ward:  Forest best result:  0.079873641986
KMeans:  Forest best result:  0.00589751759459


Ward:  LogForest best result:  0.796080342748
KMeans:  LogForest best result:  0.716060417643


Ward:  Heat best result:  0.37020053588
KMeans:  Heat best result:  0.140673308301


Ward:  LogHeat best result:  0.885618736725
KMeans:  LogHeat best result:  0.768198807178


Ward:  SigmoidCommuteTime best result:  0.121237764763
KMeans:  SigmoidCommuteTime best result:  0.00169859911234


Ward:  LogSigmoidCommuteTime best result:  0.152983934516
KMeans:  LogSigmoidCommuteTime best result:  0.000474877171193


100_3_03_005 has finished!!!
100_2_025_005 is starting...


Ward:  PlainWalk best result:  0.976878562748
KMeans:  PlainWalk best result:  0.081531259869


Ward:  LogPlainWalk best result:  0.980830432659
KMeans:  LogPlainWalk best result:  0.99199921633


Ward:  Communicability best result:  0.997599764899
KMeans:  Communicability best result:  0.98166294654


Ward:  LogCommunicability best result:  1.0
KMeans:  LogCommunicability best result:  0.988031189684


Ward:  Forest best result:  0.900886483172
KMeans:  Forest best result:  0.0352875921829


Ward:  LogForest best result:  0.991199137963
KMeans:  LogForest best result:  0.985728123767


Ward:  Heat best result:  0.98
KMeans:  Heat best result:  0.8487044873


Ward:  LogHeat best result:  1.0
KMeans:  LogHeat best result:  0.988031163824


Ward:  SigmoidCommuteTime best result:  0.942979174404
KMeans:  SigmoidCommuteTime best result:  0.0293993177696


Ward:  LogSigmoidCommuteTime best result:  0.963600750724
KMeans:  LogSigmoidCommuteTime best result:  0.026267170163


100_2_025_005 has finished!!!
200_2_03_005 is starting...




Ward:  PlainWalk best result:  0.999599990101
KMeans:  PlainWalk best result:  0.000816726860239


Ward:  LogPlainWalk best result:  0.999199980202
KMeans:  LogPlainWalk best result:  1.0


Ward:  Communicability best result:  1.0
KMeans:  Communicability best result:  1.0


Ward:  LogCommunicability best result:  1.0
KMeans:  LogCommunicability best result:  1.0


Ward:  Forest best result:  0.987708300246
KMeans:  Forest best result:  0.0395382482944


Ward:  LogForest best result:  1.0
KMeans:  LogForest best result:  1.0


Ward:  Heat best result:  1.0
KMeans:  Heat best result:  1.0


Ward:  LogHeat best result:  1.0
KMeans:  LogHeat best result:  1.0


Ward:  SigmoidCommuteTime best result:  0.995215966836
KMeans:  SigmoidCommuteTime best result:  0.000141333261953


Ward:  LogSigmoidCommuteTime best result:  0.992072203726
KMeans:  LogSigmoidCommuteTime best result:  0.00040541393666


100_2_025_005 has finished!!!


In [5]:
import matplotlib.pyplot as plt
import numpy as np
import json
from sklearn.metrics import adjusted_rand_score

from src.clustering.ward import Ward
from src.clustering.kernel_kmeans import KernelKMeans
from src.graph.graph_generator import GraphGenerator
from src.kernel.kernel import get_all_kernels

folder_name = '100_3_03_005/'

n_clusters = 4
n_nodes = 100
pin = 0.2
pout = 0.05
generator = GraphGenerator(n_nodes, n_clusters, pin, pout)

graphs_num = 10
for i in range(graphs_num):
    graph = generator.generate_graph()
    nodes = graph.nodes
    edges = graph.edges
    wprediction = Ward(n_clusters).fit_predict(edges)
    kprediction = KernelKMeans(n_clusters).fit_predict(edges)
    print('Ward: ', adjusted_rand_score(nodes, wprediction))
    print('KMeans: ', adjusted_rand_score(nodes, kprediction))

Ward:  0.604319149986
KMeans:  0.264132313009


Ward:  0.559538254681
KMeans:  0.107361370516


Ward:  0.471243042672
KMeans:  0.151679306609


Ward:  0.160349987088
KMeans:  0.136301478772


Ward:  0.326571333555
KMeans:  0.217849543948
Ward:  0.386435554716
KMeans:  0.0230280962456


Ward:  0.41291385913
KMeans:  0.258800231042
Ward:  0.509659044674
KMeans:  0.437222895668


Ward:  0.45662591249
KMeans:  0.0977369305267
Ward:  0.559565539373
KMeans:  0.209140840192


In [4]:
from sklearn.cluster import SpectralClustering

import matplotlib.pyplot as plt
import numpy as np
import json
from sklearn.metrics import adjusted_rand_score

from src.clustering.ward import Ward
from src.clustering.kernel_kmeans import KernelKMeans
from src.graph.graph_generator import GraphGenerator
from src.kernel.kernel import get_all_kernels

folder_name = '100_3_03_005/'

n_clusters = 4
n_nodes = 100
pin = 0.2
pout = 0.05
generator = GraphGenerator(n_nodes, n_clusters, pin, pout)

graphs_num = 10
for i in range(graphs_num):
    graph = generator.generate_graph()
    nodes = graph.nodes
    edges = graph.edges
    wprediction = Ward(n_clusters).fit_predict(edges)
    kkprediction = KernelKMeans(n_clusters, random_state=0).fit_predict(edges)
    kprediction = SpectralClustering(n_clusters, random_state=0, 
                                     affinity='precomputed').fit_predict(edges)
    print('Ward: ', adjusted_rand_score(nodes, wprediction))
    print('KKMeans: ', adjusted_rand_score(nodes, kkprediction))
    print('SpectralClustering: ', adjusted_rand_score(nodes, kprediction))



Ward:  0.371374635318
KKMeans:  0.176944760398
SpectralClustering:  0.919790903643


Ward:  0.445829701146
KKMeans:  0.684197387035
SpectralClustering:  0.893940150872


Ward:  0.361543155915
KKMeans:  0.216998997438
SpectralClustering:  0.919264375468


Ward:  0.368809771802
KKMeans:  0.262458634237
SpectralClustering:  0.920272589433


Ward:  0.391836213229
KKMeans:  0.177866328564
SpectralClustering:  0.919790903643


Ward:  0.444986519451
KKMeans:  0.202237150744
SpectralClustering:  0.867936434134


Ward:  0.310013138899
KKMeans:  0.102321205535
SpectralClustering:  0.723883418539


Ward:  0.517698470503
KKMeans:  0.488910648715
SpectralClustering:  0.893880067314


Ward:  0.556214183675
KKMeans:  0.280168748774
SpectralClustering:  0.863706480452


Ward:  0.363566555421
KKMeans:  0.37067427756
SpectralClustering:  0.867065630025


In [1]:
import matplotlib.pyplot as plt
import numpy as np
import json
from sklearn.metrics import adjusted_rand_score

from sklearn.cluster.k_means_ import k_means
from sklearn.cluster import AgglomerativeClustering, SpectralClustering
from src.graph.graph_generator import GraphGenerator
from src.kernel.kernel import get_all_kernels

folder_name = '100_3_03_01/'
n_clusters = 3
n_nodes = 100
pin = 0.3
pout = 0.1
generator = GraphGenerator(n_nodes, n_clusters, pin, pout)

graphs_num = 100

print('100_3_03_01 is starting...')
for kernel_class in get_all_kernels():
    ward_results = {}
    ward_all_scores = []
    kmeans_results = {}
    kmeans_all_scores = []
    for i in range(0, graphs_num):
        graph = generator.generate_graph()
        nodes = graph.nodes
        edges = graph.edges
        ward_scores = []
        kmeans_scores = []
        kernel = kernel_class(edges)
        for K in kernel.get_Ks():
            prediction = AgglomerativeClustering(n_clusters=n_clusters, 
                                                 connectivity=K, linkage='ward')\
                .fit_predict(K)
            ward_score = adjusted_rand_score(nodes, prediction)
            ward_scores.append(ward_score)
            
            _, prediction, _ = k_means(K, n_clusters,
                                 precompute_distances=True, random_state=0)
            kmeans_score = adjusted_rand_score(nodes, prediction)
            kmeans_scores.append(kmeans_score)
            
        ward_all_scores.append(ward_scores)
        kmeans_all_scores.append(kmeans_scores)
        
    ward_all_scores = [list(i) for i in zip(*ward_all_scores)]
    ward_resulting_score = [abs(np.mean(x)) for x in ward_all_scores]
    ward_results[kernel_class.name] = ward_resulting_score
    
    kmeans_all_scores = [list(i) for i in zip(*kmeans_all_scores)]
    kmeans_resulting_score = [abs(np.mean(x)) for x in kmeans_all_scores]
    kmeans_results[kernel_class.name] = kmeans_resulting_score
    
    with open('src/results/my/generated_graphs/sklearn/' + folder_name
              + 'Ward_' + kernel_class.name + '.txt', 'w') as file:
        file.write(json.dumps(ward_results))
    with open('src/results/my/generated_graphs/sklearn/' + folder_name 
              + 'KMeans_' + kernel_class.name + '.txt', 'w') as file:
        file.write(json.dumps(kmeans_results))
       
    plt.figure(figsize=(10, 10))
    for kernel_name, resulting_score in ward_results.items():
        print('Ward: ', kernel_name, 'best result: ', np.max(resulting_score))
        plt.plot(kernel_class.default_params, resulting_score, label='Ward')
    for kernel_name, resulting_score in kmeans_results.items():
        print('KMeans: ', kernel_name, 'best result: ', np.max(resulting_score))
        plt.plot(kernel_class.default_params, resulting_score, label='Kernel K-means')
    plt.title(kernel_class.name + ', n_clusters=' + str(n_clusters))
    plt.legend()
    plt.savefig('src/results/my/generated_graphs/sklearn/' + folder_name + 'img/' 
                + kernel_class.name + '.png')
    # plt.show()
print('100_3_03_005 has finished!!!')



# print('100_2_025_005 is starting...')
# folder_name = '100_2_025_005/'
# n_clusters = 2
# n_nodes = 100
# pin = 0.25
# pout = 0.05
# generator = GraphGenerator(n_nodes, n_clusters, pin, pout)
# graphs_num = 50
# 
# for kernel_class in get_all_kernels():
#     ward_results = {}
#     ward_all_scores = []
#     kmeans_results = {}
#     kmeans_all_scores = []
#     for i in range(0, graphs_num):
#         graph = generator.generate_graph()
#         nodes = graph.nodes
#         edges = graph.edges
#         ward_scores = []
#         kmeans_scores = []
#         kernel = kernel_class(edges)
#         for K in kernel.get_Ks():
#             prediction = AgglomerativeClustering(n_clusters=n_clusters, 
#                                                  connectivity=K, linkage='ward')\
#                 .fit_predict(K)
#             ward_score = adjusted_rand_score(nodes, prediction)
#             ward_scores.append(ward_score)
#             
#             _, prediction, _ = k_means(K, n_clusters,
#                                  precompute_distances=True, random_state=0)
#             kmeans_score = adjusted_rand_score(nodes, prediction)
#             kmeans_scores.append(kmeans_score)
#             
#         ward_all_scores.append(ward_scores)
#         kmeans_all_scores.append(kmeans_scores)
#         
#     ward_all_scores = [list(i) for i in zip(*ward_all_scores)]
#     ward_resulting_score = [abs(np.mean(x)) for x in ward_all_scores]
#     ward_results[kernel_class.name] = ward_resulting_score
#     
#     kmeans_all_scores = [list(i) for i in zip(*kmeans_all_scores)]
#     kmeans_resulting_score = [abs(np.mean(x)) for x in kmeans_all_scores]
#     kmeans_results[kernel_class.name] = kmeans_resulting_score
#     
#     with open('src/results/my/generated_graphs/sklearn/' + folder_name
#               + 'Ward_' + kernel_class.name + '.txt', 'w') as file:
#         file.write(json.dumps(ward_results))
#     with open('src/results/my/generated_graphs/sklearn/' + folder_name 
#               + 'KMeans_' + kernel_class.name + '.txt', 'w') as file:
#         file.write(json.dumps(kmeans_results))
#        
#     plt.figure(figsize=(10, 10))
#     for kernel_name, resulting_score in ward_results.items():
#         print('Ward: ', kernel_name, 'best result: ', np.max(resulting_score))
#         plt.plot(kernel_class.default_params, resulting_score, label='Ward')
#     for kernel_name, resulting_score in kmeans_results.items():
#         print('KMeans: ', kernel_name, 'best result: ', np.max(resulting_score))
#         plt.plot(kernel_class.default_params, resulting_score, label='Kernel K-means')
#     plt.title(kernel_class.name + ', n_clusters=' + str(n_clusters))
#     plt.legend()
#     plt.savefig('src/results/my/generated_graphs/sklearn/' + folder_name + 'img/' 
#                 + kernel_class.name + '.png')
#     # plt.show()
# print('100_2_025_005 has finished!!!')
# 
# 
# print('200_2_03_005 is starting...')
# folder_name = '200_2_03_005/'
# n_clusters = 2
# n_nodes = 200
# pin = 0.3
# pout = 0.05
# generator = GraphGenerator(n_nodes, n_clusters, pin, pout)
# graphs_num = 50
# 
# for kernel_class in get_all_kernels():
#     ward_results = {}
#     ward_all_scores = []
#     kmeans_results = {}
#     kmeans_all_scores = []
#     for i in range(0, graphs_num):
#         graph = generator.generate_graph()
#         nodes = graph.nodes
#         edges = graph.edges
#         ward_scores = []
#         kmeans_scores = []
#         kernel = kernel_class(edges)
#         for K in kernel.get_Ks():
#             prediction = AgglomerativeClustering(n_clusters=n_clusters, 
#                                                  connectivity=K, linkage='ward')\
#                 .fit_predict(K)
#             ward_score = adjusted_rand_score(nodes, prediction)
#             ward_scores.append(ward_score)
#             
#             _, prediction, _ = k_means(K, n_clusters,
#                                  precompute_distances=True, random_state=0)
#             kmeans_score = adjusted_rand_score(nodes, prediction)
#             kmeans_scores.append(kmeans_score)
#             
#         ward_all_scores.append(ward_scores)
#         kmeans_all_scores.append(kmeans_scores)
#         
#     ward_all_scores = [list(i) for i in zip(*ward_all_scores)]
#     ward_resulting_score = [abs(np.mean(x)) for x in ward_all_scores]
#     ward_results[kernel_class.name] = ward_resulting_score
#     
#     kmeans_all_scores = [list(i) for i in zip(*kmeans_all_scores)]
#     kmeans_resulting_score = [abs(np.mean(x)) for x in kmeans_all_scores]
#     kmeans_results[kernel_class.name] = kmeans_resulting_score
#     
#     with open('src/results/my/generated_graphs/sklearn/' + folder_name
#               + 'Ward_' + kernel_class.name + '.txt', 'w') as file:
#         file.write(json.dumps(ward_results))
#     with open('src/results/my/generated_graphs/sklearn/' + folder_name 
#               + 'KMeans_' + kernel_class.name + '.txt', 'w') as file:
#         file.write(json.dumps(kmeans_results))
#        
#     plt.figure(figsize=(10, 10))
#     for kernel_name, resulting_score in ward_results.items():
#         print('Ward: ', kernel_name, 'best result: ', np.max(resulting_score))
#         plt.plot(kernel_class.default_params, resulting_score, label='Ward')
#     for kernel_name, resulting_score in kmeans_results.items():
#         print('KMeans: ', kernel_name, 'best result: ', np.max(resulting_score))
#         plt.plot(kernel_class.default_params, resulting_score, label='Kernel K-means')
#     plt.title(kernel_class.name + ', n_clusters=' + str(n_clusters))
#     plt.legend()
#     plt.savefig('src/results/my/generated_graphs/sklearn/' + folder_name + 'img/' 
#                 + kernel_class.name + '.png')
#     # plt.show()
# print('100_2_025_005 has finished!!!')

100_3_03_01 is starting...


Ward:  PlainWalk best result:  0.804918482287
KMeans:  PlainWalk best result:  0.512330186446


Ward:  LogPlainWalk best result:  0.826006516521
KMeans:  LogPlainWalk best result:  0.93836153923


Ward:  Communicability best result:  0.840095360863
KMeans:  Communicability best result:  0.955345158824


Ward:  LogCommunicability best result:  0.810328198967
KMeans:  LogCommunicability best result:  0.932122085488


Ward:  Forest best result:  0.660073819034
KMeans:  Forest best result:  0.321978152694


Ward:  LogForest best result:  0.943654301009
KMeans:  LogForest best result:  0.990376979375


Ward:  Heat best result:  0.864354995097
KMeans:  Heat best result:  0.886308535891


Ward:  LogHeat best result:  0.96755222347
KMeans:  LogHeat best result:  0.987404005484


Ward:  SigmoidCommuteTime best result:  0.731085729559
KMeans:  SigmoidCommuteTime best result:  0.400778976004


Ward:  LogSigmoidCommuteTime best result:  0.721731797105
KMeans:  LogSigmoidCommuteTime best result:  0.371217435996


100_3_03_005 has finished!!!


In [1]:
import matplotlib.pyplot as plt
import numpy as np
import json
from sklearn.metrics import adjusted_rand_score

from sklearn.cluster.k_means_ import k_means
from sklearn.cluster import AgglomerativeClustering, SpectralClustering
from src.graph.graph_generator import GraphGenerator
from src.kernel.kernel import get_all_kernels, SigmoidCommuteTime, LogSigmoidCommuteTime

n_clusters = 2
n_nodes = 100
pin = 0.3
pout = 0.07
generator = GraphGenerator(n_nodes, n_clusters, pin, pout)

graphs_num = 30
nodes_in_first_num = range(1, 48, 3)

print('unbalanced is starting...')
for kernel_class in [SigmoidCommuteTime, LogSigmoidCommuteTime]:
    ward_results = {}
    ward_all_result_scores = []
    kmeans_results = {}
    kmeans_all_result_scores = []
    for num in nodes_in_first_num:
        ward_all_scores = []
        kmeans_all_scores = []
        for i in range(0, graphs_num):
            graph = generator.generate_unbalanced_graph([num, n_nodes-num])
            nodes = graph.nodes
            edges = graph.edges
            ward_scores = []
            kmeans_scores = []
            kernel = kernel_class(edges)
            for K in kernel.get_Ks():
                prediction = AgglomerativeClustering(n_clusters=n_clusters, 
                                                     connectivity=K, linkage='ward')\
                    .fit_predict(K)
                ward_score = adjusted_rand_score(nodes, prediction)
                ward_scores.append(ward_score)
                _, prediction, _ = k_means(K, n_clusters,
                                     precompute_distances=True, random_state=0)
                kmeans_score = adjusted_rand_score(nodes, prediction)
                kmeans_scores.append(kmeans_score)
                
            ward_all_scores.append(np.max(ward_scores))
            kmeans_all_scores.append(np.max(kmeans_scores))
        
        ward_resulting_score = np.mean(ward_all_scores)
        ward_all_result_scores.append(ward_resulting_score)
        kmeans_resulting_score = np.mean(kmeans_all_scores)
        kmeans_all_result_scores.append(kmeans_resulting_score)
        
    ward_results[kernel_class.name] = ward_all_result_scores
    kmeans_results[kernel_class.name] = kmeans_all_result_scores
        
    with open('src/results/my/generated_graphs/sklearn/unbalanced/Ward_' 
              + kernel_class.name + '.txt', 'w') as file:
        file.write(json.dumps(ward_results))
    with open('src/results/my/generated_graphs/sklearn/unbalanced/KMeans_' 
              + kernel_class.name + '.txt', 'w') as file:
        file.write(json.dumps(kmeans_results))
       
    plt.figure(figsize=(10, 10))
    for kernel_name, resulting_score in ward_results.items():
        print('Ward: ', kernel_name, 'best result: ', np.max(resulting_score))
        plt.plot(nodes_in_first_num, resulting_score, label='Ward')
    for kernel_name, resulting_score in kmeans_results.items():
        print('KMeans: ', kernel_name, 'best result: ', np.max(resulting_score))
        plt.plot(nodes_in_first_num, resulting_score, label='Kernel K-means')
    plt.title(kernel_class.name + ', n_clusters=' + str(n_clusters))
    plt.legend()
    plt.savefig('src/results/my/generated_graphs/sklearn/unbalanced/img/' 
                + kernel_class.name + '.png')
    # plt.show()
print('unbalanced has finished!!!')

unbalanced is starting...


Ward:  PlainWalk best result:  1.0
KMeans:  PlainWalk best result:  1.0


Ward:  LogPlainWalk best result:  1.0
KMeans:  LogPlainWalk best result:  1.0


  x = um.multiply(x, x, out=x)
  distances += XX
  max_iter=max_iter, verbose=verbose)
  max_iter=max_iter, verbose=verbose)
  inertia = np.sum((X - centers[labels]) ** 2, dtype=np.float64)


  distances *= -2


  distances += XX
  distances += YY


  distances += YY


Ward:  Communicability best result:  1.0
KMeans:  Communicability best result:  1.0


Ward:  LogCommunicability best result:  1.0
KMeans:  LogCommunicability best result:  1.0


Ward:  Forest best result:  1.0
KMeans:  Forest best result:  1.0


Ward:  LogForest best result:  1.0
KMeans:  LogForest best result:  1.0


Ward:  Heat best result:  1.0
KMeans:  Heat best result:  1.0


Ward:  LogHeat best result:  1.0
KMeans:  LogHeat best result:  1.0


Ward:  SigmoidCommuteTime best result:  1.0
KMeans:  SigmoidCommuteTime best result:  1.0


Ward:  LogSigmoidCommuteTime best result:  1.0
KMeans:  LogSigmoidCommuteTime best result:  1.0


unbalanced has finished!!!
