In [10]:
import matplotlib.pyplot as plt
import numpy as np
import json
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score

from src.clustering.spectral_clustering import SpectralClustering
from src.clustering.ward import Ward
from src.graph.graph_generator import GraphGenerator
from src.kernel.kernel import get_all_kernels

n_clusters = 4
generator = GraphGenerator(100, n_clusters, 0.2, 0.05)

for kernel_class in get_all_kernels():
    ward_results = {}
    spectral_results = {}
    all_ward_scores = []
    all_spectral_scores = []
    for i in range(0, 10):
        graph = generator.generate_graph()
        nodes = graph.nodes
        edges = graph.edges
        ward_scores = []
        spectral_scores = []
        kernel = kernel_class(edges)
        for K in kernel.get_Ks():
            ward_prediction = Ward(n_clusters).fit_predict(K)
            ward_score = adjusted_rand_score(nodes, ward_prediction)
            ward_scores.append(ward_score)
            spectral_prediction = SpectralClustering(n_clusters).fit_predict(K)
            spectral_score = adjusted_rand_score(nodes, spectral_prediction)
            spectral_scores.append(spectral_score)
        all_ward_scores.append(ward_scores)
        all_spectral_scores.append(spectral_scores)
    all_ward_scores = [list(i) for i in zip(*all_ward_scores)]
    all_spectral_scores = [list(i) for i in zip(*all_spectral_scores)]
    resulting_ward_score = [np.mean(x) for x in all_ward_scores]
    resulting_spectral_score = [np.mean(x) for x in all_spectral_scores]
    ward_results[kernel_class.name] = resulting_ward_score
    spectral_results[kernel_class.name] = resulting_spectral_score
    
    with open('src/results/my/my_graphs/ward' + kernel_class.name + '.txt', 'w') as file:
        file.write(json.dumps(ward_results))
    with open('src/results/my/my_graphs/k-means' + kernel_class.name + '.txt', 'w') as file:
        file.write(json.dumps(spectral_results))
        
    plt.figure(figsize=(10,10))
    print('Ward ', kernel_class.name, 'best result: ', np.max(resulting_ward_score))
    print('K-means ', kernel_class.name, 'best result: ', np.max(resulting_spectral_score))
    for kernel_name, resulting_score in ward_results.items():
        print(kernel_name, 'best result: ', np.max(resulting_score))
        plt.plot(kernel_class.default_params, resulting_score, label='Ward')
    for kernel_name, resulting_score in spectral_results.items():
        print(kernel_name, 'best result: ', np.max(resulting_score))
        plt.plot(kernel_class.default_params, resulting_score, label='K-means')
    # plt.plot(kernel_class.default_params, resulting_spectral_score, label='K-means')
    plt.title(kernel_class.name + ', n_clusters=' + str(n_clusters))
    plt.legend()
    plt.show()

  array = np.array(array, dtype=dtype, order=order, copy=copy)


Ward  PlainWalk best result:  0.612507880271
K-means  PlainWalk best result:  0.267730324034
PlainWalk best result:  0.612507880271
PlainWalk best result:  0.267730324034


ValueError: max() arg is an empty sequence

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import json
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score

from src.clustering.spectral_clustering import SpectralClustering
from src.graph.graph_generator import GraphGenerator
from src.kernel.kernel import get_all_kernels


generator = GraphGenerator(200, 4, 0.2, 0.05)
n_clusters = 4

for kernel_class in get_all_kernels():
    results = {}
    all_scores = []
    for i in range(0, 50):
        graph = generator.generate_graph()
        nodes = graph.nodes
        edges = graph.edges
        scores = []
        kernel = kernel_class(edges)
        for K in kernel.get_Ks():
            prediction = SpectralClustering(n_clusters).fit_predict(K)
            score = adjusted_rand_score(nodes, prediction)
            scores.append(score)
        all_scores.append(scores)
    all_scores = [list(i) for i in zip(*all_scores)]
    resulting_score = [np.mean(x) for x in all_scores]
    results[kernel_class.name] = resulting_score
    
    with open('src/results/my/' + kernel_class.name + '.txt', 'w') as file:
        file.write(json.dumps(results))
        
    plt.figure(figsize=(10,10))
    for kernel_name, resulting_score in results.items():
        print(kernel_name, 'best result: ', np.max(resulting_score))
        plt.plot(kernel_class.default_params, resulting_score, label=kernel_name)
    plt.title('K-means ' + kernel_class.name + ', n_clusters=' + str(n_clusters))
    plt.legend()
    plt.show()

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import json
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score

from src.clustering.ward import Ward
from src.graph.graph_generator import GraphGenerator
from src.kernel.kernel import get_all_kernels
from src.dataset.dataset import football, polbooks, polblogs, zachary, news

all_data = news

for data in all_data:
    graph, info = data
    results = {}
    for kernel_class in get_all_kernels():
        results[kernel_class.name] = {}
        nodes = graph.nodes
        edges = graph.edges
        scores_1 = []
        scores_2 = []
        kernel = kernel_class(edges)
        for K in kernel.get_Ks():
            prediction = Ward(info['k']).fit_predict(K)
            score_1 = adjusted_rand_score(nodes, prediction)
            scores_1.append(score_1)
            score_2 = normalized_mutual_info_score(nodes, prediction)
            scores_2.append(score_2)
        results[kernel_class.name] = str(max(scores_1)) + ' ' + str(max(scores_2))
        print(info['name'] + ' ' + kernel_class.name + ' ' + str(max(scores_1)) + ' ' + str(max(scores_2)))
    print('data ' + info['name'] + ' finished!')
    with open('src/results/my/news/' + info['name'] + '.txt', 'w') as file:
        file.write(json.dumps(results))

news_2cl_1 PlainWalk 0.046538240459 0.131073947536


news_2cl_1 LogPlainWalk 0.765036841281 0.666211167588


news_2cl_1 Communicability 0.0398139851986 0.137056806879


news_2cl_1 LogCommunicability 0.0398139851986 0.137056806879


news_2cl_1 Forest 0.0 0.0157732781716


news_2cl_1 LogForest 0.0 0.0157732781716


news_2cl_1 Heat 0.0 0.0157732781716


news_2cl_1 LogHeat 0.0 0.0157732781716
data news_2cl_1 finished!


news_2cl_2 PlainWalk 0.0301532840005 0.141592004211


news_2cl_2 LogPlainWalk 0.678368603577 0.584491891861


news_2cl_2 Communicability 0.0897374128245 0.216205884855


news_2cl_2 LogCommunicability 0.0897374128245 0.216205884855


news_2cl_2 Forest 5.0760113667e-05 0.0159338802362


news_2cl_2 LogForest 5.0760113667e-05 0.0159338802362


news_2cl_2 Heat 5.0760113667e-05 0.0159338802362


news_2cl_2 LogHeat 5.0760113667e-05 0.0159338802362
data news_2cl_2 finished!


news_2cl_3 PlainWalk 0.606586521764 0.558476362302


news_2cl_3 LogPlainWalk 0.836424412989 0.748423673259


news_2cl_3 Communicability 0.420069705463 0.446307464619


news_2cl_3 LogCommunicability 0.420069705463 0.446307464619


news_2cl_3 Forest 2.51892309488e-05 0.0158531978678


news_2cl_3 LogForest 2.51892309488e-05 0.0158531978678


news_2cl_3 Heat 2.51892309488e-05 0.0158531978678


news_2cl_3 LogHeat 2.51892309488e-05 0.0158531978678
data news_2cl_3 finished!


news_3cl_1 PlainWalk 0.294076407196 0.435537100686


news_3cl_1 LogPlainWalk 0.698476902125 0.626289453208


news_3cl_1 Communicability 0.396134440162 0.527740021915


news_3cl_1 LogCommunicability 0.396134440162 0.527740021915


news_3cl_1 Forest 1.1167038899e-05 0.0223206287314


news_3cl_1 LogForest 1.1167038899e-05 0.0223206287314


In [1]:
import matplotlib.pyplot as plt
import numpy as np
import json
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score

from src.clustering.ward import Ward
from src.graph.graph_generator import GraphGenerator
from src.kernel.kernel import get_all_kernels
from src.dataset.dataset import football, polbooks, polblogs, zachary, news

all_data = [football, polbooks, zachary, polblogs]

for data in all_data:
    graph, info = data
    results = {}
    for kernel_class in get_all_kernels():
        results[kernel_class.name] = {}
        nodes = graph.nodes
        edges = graph.edges
        scores_1 = []
        scores_2 = []
        kernel = kernel_class(edges)
        for K in kernel.get_Ks():
            prediction = Ward(info['k']).fit_predict(K)
            score_1 = adjusted_rand_score(nodes, prediction)
            scores_1.append(score_1)
            score_2 = normalized_mutual_info_score(nodes, prediction)
            scores_2.append(score_2)
        results[kernel_class.name] = str(max(scores_1)) + ' ' + str(max(scores_2))
        print(info['name'] + ' ' + kernel_class.name + ' ' + str(max(scores_1)) + ' ' + str(max(scores_2)))
    print('data ' + info['name'] + ' finished!')
    with open('src/results/my/others/' + info['name'] + '.txt', 'w') as file:
        file.write(json.dumps(results))

football PlainWalk 0.906334157557 0.930810884185


football LogPlainWalk 0.896650009791 0.92419578687


football Communicability 0.909377088152 0.935845790439


football LogCommunicability 0.909377088152 0.935845790439


football Forest 0.896650009791 0.92419578687


football LogForest 0.896650009791 0.92419578687


football Heat 0.896650009791 0.926884056453


football LogHeat 0.896650009791 0.926884056453
data football finished!


polbooks PlainWalk 0.689859519674 0.58624585164


polbooks LogPlainWalk 0.71834696317 0.62718880803


polbooks Communicability 0.680461669793 0.576107450818


polbooks LogCommunicability 0.680461669793 0.576107450818


polbooks Forest 0.665203435003 0.597513219481


polbooks LogForest 0.665203435003 0.597513219481


polbooks Heat 0.708482177779 0.625320906524


polbooks LogHeat 0.708482177779 0.625320906524
data polbooks finished!


zachary PlainWalk 1.0 1.0


zachary LogPlainWalk 1.0 1.0


zachary Communicability 1.0 1.0


zachary LogCommunicability 1.0 1.0


zachary Forest 0.153463105478 0.272495191494


zachary LogForest 0.153463105478 0.272495191494


zachary Heat 1.0 1.0


zachary LogHeat 1.0 1.0
data zachary finished!


KeyboardInterrupt: 

In [11]:
import matplotlib.pyplot as plt
import numpy as np
import json
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score

from src.clustering.kernel_kmeans import KernelKMeans
from src.clustering.ward import Ward
from src.graph.graph_generator import GraphGenerator
from src.kernel.kernel import get_all_kernels
from src.dataset.dataset import football, polbooks, polblogs, zachary, news

all_data = [football, polbooks, zachary]

for data in all_data:
    graph, info = data
    print('data ' + info['name'] + 'for Ward' + ' started!')
    results = {}
    for kernel_class in get_all_kernels():
        results[kernel_class.name] = {}
        nodes = graph.nodes
        edges = graph.edges
        scores_1 = []
        scores_2 = []
        kernel = kernel_class(edges)
        for K in kernel.get_Ks():
            prediction = Ward(info['k']).fit_predict(K)
            score_1 = adjusted_rand_score(nodes, prediction)
            scores_1.append(score_1)
            score_2 = normalized_mutual_info_score(nodes, prediction)
            scores_2.append(score_2)
        results[kernel_class.name] = 'ARI=' + str(max(scores_1)) + ' NMI=' + str(max(scores_2))
        print(info['name'] + ' ' + kernel_class.name + ' ' + results[kernel_class.name])
    print('data ' + info['name'] + 'for Ward' + ' finished!')
    with open('src/results/my/datasets/' + 'Ward_' + info['name'] + '.txt', 'w') as file:
        file.write(json.dumps(results))

data footballfor Ward started!


football PlainWalk ARI=0.906334157557 NMI=0.930810884185


football LogPlainWalk ARI=0.896650009791 NMI=0.92419578687


football Communicability ARI=0.909377088152 NMI=0.935845790439


football LogCommunicability ARI=0.909377088152 NMI=0.935845790439


football Forest ARI=0.896650009791 NMI=0.92419578687


football LogForest ARI=0.896650009791 NMI=0.92419578687


football Heat ARI=0.896650009791 NMI=0.926884056453


football LogHeat ARI=0.896650009791 NMI=0.926884056453
data footballfor Ward finished!
data polbooksfor Ward started!


polbooks PlainWalk ARI=0.689859519674 NMI=0.58624585164


polbooks LogPlainWalk ARI=0.71834696317 NMI=0.62718880803


polbooks Communicability ARI=0.680461669793 NMI=0.576107450818


polbooks LogCommunicability ARI=0.680461669793 NMI=0.576107450818


polbooks Forest ARI=0.665203435003 NMI=0.597513219481


polbooks LogForest ARI=0.665203435003 NMI=0.597513219481


polbooks Heat ARI=0.708482177779 NMI=0.625320906524


polbooks LogHeat ARI=0.708482177779 NMI=0.625320906524
data polbooksfor Ward finished!
data zacharyfor Ward started!


zachary PlainWalk ARI=1.0 NMI=1.0


zachary LogPlainWalk ARI=1.0 NMI=1.0


zachary Communicability ARI=1.0 NMI=1.0


zachary LogCommunicability ARI=1.0 NMI=1.0


zachary Forest ARI=0.153463105478 NMI=0.272495191494


zachary LogForest ARI=0.153463105478 NMI=0.272495191494


zachary Heat ARI=1.0 NMI=1.0


zachary LogHeat ARI=1.0 NMI=1.0
data zacharyfor Ward finished!
data footballfor K-means started!


football PlainWalk ARI=0.896650009791 NMI=0.92419578687


football LogPlainWalk ARI=0.903633402408 NMI=0.931437993801


  array = np.array(array, dtype=dtype, order=order, copy=copy)


football Communicability ARI=0.896650009791 NMI=0.92419578687


football LogCommunicability ARI=0.918537930393 NMI=0.941566907282


football Forest ARI=0.896650009791 NMI=0.92419578687


football LogForest ARI=0.896650009791 NMI=0.92419578687


football Heat ARI=0.896650009791 NMI=0.92419578687


football LogHeat ARI=0.896650009791 NMI=0.92419578687
data footballfor K-means finished!
data polbooksfor K-means started!


polbooks PlainWalk ARI=0.464821119481 NMI=0.503576380332


polbooks LogPlainWalk ARI=0.659870621792 NMI=0.556589890139


polbooks Communicability ARI=0.479292423774 NMI=0.520377033233


polbooks LogCommunicability ARI=0.472807846629 NMI=0.503576380332


polbooks Forest ARI=0.757927891189 NMI=0.686669885114


polbooks LogForest ARI=0.757927891189 NMI=0.686669885114


polbooks Heat ARI=0.757927891189 NMI=0.686669885114


polbooks LogHeat ARI=0.757927891189 NMI=0.686669885114
data polbooksfor K-means finished!
data zacharyfor K-means started!


zachary PlainWalk ARI=1.0 NMI=1.0


zachary LogPlainWalk ARI=1.0 NMI=1.0


zachary Communicability ARI=1.0 NMI=1.0


zachary LogCommunicability ARI=1.0 NMI=1.0


zachary Forest ARI=0.403372835005 NMI=0.455001600015


zachary LogForest ARI=0.403372835005 NMI=0.455001600015


zachary Heat ARI=0.403372835005 NMI=0.455001600015


zachary LogHeat ARI=0.403372835005 NMI=0.455001600015
data zacharyfor K-means finished!


In [4]:
import matplotlib.pyplot as plt
import numpy as np
import json
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score

from src.clustering.kernel_kmeans import KernelKMeans
from src.clustering.ward import Ward
from src.graph.graph_generator import GraphGenerator
from src.kernel.kernel import get_all_kernels
from src.dataset.dataset import football, polbooks, polblogs, zachary, news

all_data = [football, polbooks, zachary]

for data in all_data:
    graph, info = data
    print('data ' + info['name'] + 'for K-means' + ' started!')
    print('n_clusters = ' + str(info['k']))
    results = {}
    for kernel_class in get_all_kernels():
        results[kernel_class.name] = {}
        nodes = graph.nodes
        edges = graph.edges
        scores_1 = []
        scores_2 = []
        kernel = kernel_class(edges)
        for K in kernel.get_Ks():
            prediction = KernelKMeans(n_clusters=info['k']).fit_predict(K)
            score_1 = adjusted_rand_score(nodes, prediction)
            scores_1.append(score_1)
            score_2 = normalized_mutual_info_score(nodes, prediction)
            scores_2.append(score_2)
        results[kernel_class.name] = 'ARI=' + str(max(scores_1)) + ' NMI=' + str(max(scores_2))
        print(info['name'] + ' ' + kernel_class.name + ' ' + results[kernel_class.name])
    print('data ' + info['name'] + 'for K-means' + ' finished!')
    with open('src/results/my/datasets/' + 'K_means_' + info['name'] + '.txt', 'w') as file:
        file.write(json.dumps(results))

data footballfor K-means started!
n_clusters = 12


football PlainWalk ARI=0.272706412144 NMI=0.49931821818


  dist[:, j] -= 2 * np.sum(sw[mask] * K[:, mask], axis=1) / denom


football LogPlainWalk ARI=0.896650009791 NMI=0.926884056453


football Communicability ARI=0.894583009987 NMI=0.926699613767


football LogCommunicability ARI=0.906334157557 NMI=0.937330772002


football Forest ARI=0.109535427741 NMI=0.365833755789


football LogForest ARI=0.1111167055 NMI=0.355340140575


football Heat ARI=0.817137827277 NMI=0.895149110781


football LogHeat ARI=0.896650009791 NMI=0.926884056453
data footballfor K-means finished!
data polbooksfor K-means started!
n_clusters = 3


polbooks PlainWalk ARI=0.294661370959 NMI=0.387835998191


polbooks LogPlainWalk ARI=0.676084837938 NMI=0.586701745183


polbooks Communicability ARI=0.415932406825 NMI=0.45732035514


polbooks LogCommunicability ARI=0.414806413942 NMI=0.455811255154


polbooks Forest ARI=0.707819422326 NMI=0.625493215541


polbooks LogForest ARI=0.677609550339 NMI=0.609400447299


polbooks Heat ARI=0.707819422326 NMI=0.608778043266


polbooks LogHeat ARI=0.707819422326 NMI=0.608778043266
data polbooksfor K-means finished!
data zacharyfor K-means started!
n_clusters = 2


  within_distances[j] = dist_j


TypeError: Cannot cast ufunc add output from dtype('complex128') to dtype('float64') with casting rule 'same_kind'