In [1]:
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale

In [21]:
fw_1 = nx.average_eight_patterns(FW_Chesa)
fw_2 = nx.average_eight_patterns(FW_Baywet)
fw_3 = nx.average_eight_patterns(FW_Baydry)
fw_4 = nx.average_eight_patterns(FW_LittleRock)

trust_1 = nx.average_eight_patterns(Soc_Alpha)
trust_2 = nx.average_eight_patterns(Soc_OTC)

cit_1 = nx.average_eight_patterns(Cit_DBLP)
cit_2 = nx.average_eight_patterns(Cit_Cora)
cit_3 = nx.average_eight_patterns(Cit_HepPh)
cit_4 = nx.average_eight_patterns(Cit_HepTh)

qa_1 = nx.average_eight_patterns(QA_Math)
qa_2 = nx.average_eight_patterns(QA_Ubuntu)

ppi_1 = nx.average_eight_patterns(PPI_Figeys)
ppi_2 = nx.average_eight_patterns(PPI_Stelzl)

In [152]:
X = []
X.append(trust_1)
X.append(trust_2)
X.append(fw_1)
X.append(fw_2)
X.append(fw_3)
X.append(fw_4)
X.append(cit_1)
X.append(cit_2)
X.append(cit_3)
X.append(cit_4)
X.append(qa_1)
X.append(qa_2)
X.append(ppi_1)
X.append(ppi_2)

In [23]:
labels = [1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5]

In [154]:
data = scale(X)
n_samples, n_features = data.shape
n_cluster = len(np.unique(labels))

In [155]:
def classify_networks(data, n_cluster, repeat=500):
    homo = 0
    compl = 0
    v_measure = 0
    for i in range(repeat):
        estimator = KMeans(init='random', n_clusters=n_cluster, n_init=1)
        estimator.fit(data)
        h = metrics.homogeneity_score(labels, estimator.labels_)
        c = metrics.completeness_score(labels, estimator.labels_)
        v = metrics.v_measure_score(labels, estimator.labels_)
        if h > homo:
            homo = h
        if c > compl:
            compl = c
        if v > v_measure:
            v_measure = v
            model = estimator
    return model, homo, compl, v_measure

# Closure patterns are better features in network classification

In [156]:
classify_networks(data[:, :4], n_cluster=5, repeat=1000)

(KMeans(init='random', n_clusters=5, n_init=1),
 0.8722165935233487,
 0.8937703335722145,
 0.8828619326448721)

In [157]:
classify_networks(data[:, 4:], n_cluster=5, repeat=1000)

(KMeans(init='random', n_clusters=5, n_init=1),
 0.7569035545284655,
 0.8049897350527181,
 0.7706836403528206)

In [169]:
classify_networks(data[:, [7]], n_cluster=5, repeat=1000)

(KMeans(init='random', n_clusters=5, n_init=1),
 0.5045270290651706,
 0.6016667847080637,
 0.5234623836850996)

In [None]:
# use k, C, E, I, O
reduced_data = PCA(n_components=2).fit_transform(processed_data)
kmeans,_,_,_ = classify_networks(reduced_data, n_cluster=6, repeat=1000)
print(metrics.homogeneity_score(labels, kmeans.labels_))
print(metrics.completeness_score(labels, kmeans.labels_))
print(metrics.v_measure_score(labels, kmeans.labels_))

In [None]:
# pca = PCA(n_components=5)
# pca.fit(processed_data)
# print(pca.explained_variance_ratio_)
# print(pca.singular_values_)

In [None]:
# use k, C, E
reduced_data_2 = PCA(n_components=2).fit_transform(processed_data[:, :3])
kmeans_2,_,_,_ = classify_networks(reduced_data_2, n_cluster=6, repeat=1000)
print(metrics.homogeneity_score(labels, kmeans_2.labels_))
print(metrics.completeness_score(labels, kmeans_2.labels_))
print(metrics.v_measure_score(labels, kmeans_2.labels_))

In [None]:
h = .01
# Plot the decision boundary. For that, we will assign a color to each
x_min, x_max = reduced_data[:, 0].min() - 0.8, reduced_data[:, 0].max() + 0.8
y_min, y_max = reduced_data[:, 1].min() - 0.8, reduced_data[:, 1].max() + 0.8
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Obtain labels for each point in mesh. Use last trained model.
Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)

# bottom figure
x_min_2, x_max_2 = reduced_data_2[:, 0].min() - 0.8, reduced_data_2[:, 0].max() + 0.8
y_min_2, y_max_2 = reduced_data_2[:, 1].min() - 0.8, reduced_data_2[:, 1].max() + 0.8
xx_2, yy_2 = np.meshgrid(np.arange(x_min_2, x_max_2, h), np.arange(y_min_2, y_max_2, h))
# Obtain labels for each point in mesh. Use last trained model.
Z_2 = kmeans_2.predict(np.c_[xx_2.ravel(), yy_2.ravel()])
# Put the result into a color plot
Z_2 = Z_2.reshape(xx_2.shape)


onepic, axes = plt.subplots(1,2,figsize=(21,7.7))
plt.subplots_adjust(hspace=0.12, wspace=.12) 
axes[0].imshow(Z, interpolation='nearest',
           extent=(xx.min(), xx.max(), yy.min(), yy.max()),
           cmap='summer',
           aspect='auto', origin='lower')
axes[0].plot(tag_fw[:, 0], tag_fw[:, 1], 's', markersize=9, color="blue", label="FW")
axes[0].plot(tag_soc[:, 0], tag_soc[:, 1], '>', markersize=10, color="red", label="Soc")
axes[0].plot(tag_ppi[:, 0], tag_ppi[:, 1], 'D', markersize=9, color="blue", label="PPI")
axes[0].plot(tag_cit[:, 0], tag_cit[:, 1], '<', markersize=10, color="red", label="Cit")
axes[0].plot(tag_rd[:, 0], tag_rd[:, 1], 'd', markersize=11, color="blue", label="Rd")
axes[0].plot(tag_qa[:, 0], tag_qa[:, 1], '^', markersize=10, color="red", label="QA")

axes[0].legend(prop={'size': 14}, ncol=2)
centroids = kmeans.cluster_centers_
# area labels
axes[0].text(-2, 2.5, "6", size=22, bbox=dict(boxstyle="circle",ec='k',fc='w',))
axes[0].text(1.3, 0.9, "4", size=22, bbox=dict(boxstyle="circle",ec='k',fc='w',))
axes[0].text(-0.73, 0.50, "5", size=22, bbox=dict(boxstyle="circle",ec='k',fc='w',))
axes[0].text(3.28, 1.18, "1", size=22, bbox=dict(boxstyle="circle",ec='k',fc='w',))
axes[0].text(-2, -1.4, "3", size=22, bbox=dict(boxstyle="circle",ec='k',fc='w',))
axes[0].text(0.55, -1.8, "2", size=22, bbox=dict(boxstyle="circle",ec='k',fc='w',))
axes[0].scatter(centroids[:, 0], centroids[:, 1], marker='x', s=200, color='k')
axes[0].set_title('(a). with quadrangle coefficients', fontsize=19)
axes[0].set_xticks(())
axes[0].set_yticks(())

# bottom figure
axes[1].imshow(Z_2, interpolation='nearest',
           extent=(xx_2.min(), xx_2.max(), yy_2.min(), yy_2.max()),
           cmap='summer',
           aspect='auto', origin='lower')
axes[1].plot(tag_fw_2[:, 0], tag_fw_2[:, 1], 's', markersize=9, color="blue", label="FW")
axes[1].plot(tag_soc_2[:, 0], tag_soc_2[:, 1], '>', markersize=10, color="red", label="Soc")
axes[1].plot(tag_ppi_2[:, 0], tag_ppi_2[:, 1], 'D', markersize=9, color="blue", label="PPI")
axes[1].plot(tag_cit_2[:, 0], tag_cit_2[:, 1], '<', markersize=10, color="red", label="Cit")
axes[1].plot(tag_rd_2[:, 0], tag_rd_2[:, 1], 'd', markersize=11, color="blue", label="Rd")
axes[1].plot(tag_qa_2[:, 0], tag_qa_2[:, 1], '^', markersize=10, color="red", label="QA")

axes[1].legend(prop={'size': 14}, ncol=2)
centroids_2 = kmeans_2.cluster_centers_
axes[1].text(3.45, -0.22, "2", size=22, bbox=dict(boxstyle="circle",ec='k',fc='w',))
axes[1].text(-1.6, 0.4, "5", size=22, bbox=dict(boxstyle="circle",ec='k',fc='w',))
axes[1].text(-0.25, -0.37, "3", size=22, bbox=dict(boxstyle="circle",ec='k',fc='w',))
axes[1].text(-0.8, -0.9, "1", size=22, bbox=dict(boxstyle="circle",ec='k',fc='w',))
axes[1].text(-0, 0.94, "6", size=22, bbox=dict(boxstyle="circle",ec='k',fc='w',))
axes[1].text(1.58, 0, "4", size=22, bbox=dict(boxstyle="circle",ec='k',fc='w',))
axes[1].scatter(centroids_2[:, 0], centroids_2[:, 1], marker='x', s=200, color='k')
axes[1].set_title('(b). without quadrangle coefficients', fontsize=19)
axes[1].set_xticks(())
axes[1].set_yticks(())

plt.savefig('network_classification.pdf')

In [None]:
kmeans.labels_ + 1

In [None]:
kmeans_2.labels_ + 1

In [None]:
tag_fw = reduced_data[:2]
tag_soc = reduced_data[2:6]
tag_ppi = reduced_data[6:10]
tag_cit = reduced_data[10:12]
tag_rd = reduced_data[12:14]
tag_qa = reduced_data[14:]

In [None]:
tag_fw_2 = reduced_data_2[:2]
tag_soc_2 = reduced_data_2[2:6]
tag_ppi_2 = reduced_data_2[6:10]
tag_cit_2 = reduced_data_2[10:12]
tag_rd_2 = reduced_data_2[12:14]
tag_qa_2 = reduced_data_2[14:]

In [18]:
FW_Chesa = nx.read_edgelist("datasets/FW-Chesa", create_using=nx.DiGraph)
FW_Baywet = nx.read_edgelist("datasets/FW-Baywet", create_using=nx.DiGraph, data = (('weight', float),))
FW_Baydry = nx.read_edgelist("datasets/FW-Baydry", create_using=nx.DiGraph, data = (('weight', float),))
FW_LittleRock = nx.read_edgelist("datasets/FW-LittleRock", create_using=nx.DiGraph)

#Soc_EmailEu = nx.read_edgelist("datasets/Soc-EmailEu", create_using=nx.Graph, data=(('sec', int),))
#Soc_Msg = nx.read_edgelist("datasets/CollegeMsg", create_using=nx.Graph, data=(('sec', int),))
Soc_Alpha = nx.read_edgelist("datasets/BTC-Alpha", delimiter = ',', create_using=nx.DiGraph, data=(('weight', int),('sec', int),))
Soc_OTC = nx.read_edgelist("datasets/BTC-OTC", delimiter = ',', create_using=nx.DiGraph, data=(('weight', int),('sec', float),))

#Soc_FB = nx.read_edgelist("datasets/Soc-FB", delimiter = ',', create_using=nx.Graph)
#Soc_Twitch = nx.read_edgelist("datasets/Soc-Twitch-Fr", delimiter = ',', create_using=nx.Graph)

PPI_Stelzl = nx.read_edgelist("datasets/HP-Stelzl", create_using=nx.DiGraph)
PPI_Figeys = nx.read_edgelist("datasets/HP-Figeys", create_using=nx.DiGraph)

Cit_DBLP = nx.read_edgelist("datasets/Cit-DBLP", create_using=nx.DiGraph, data=(('weight', int),('sec', int),))
Cit_Cora = nx.read_edgelist("datasets/Cit-Cora", create_using=nx.DiGraph)
Cit_HepPh = nx.read_edgelist("datasets/Cit-HepPh", create_using=nx.DiGraph)
Cit_HepTh = nx.read_edgelist("datasets/Cit-HepTh", create_using=nx.DiGraph)


QA_Math = nx.read_edgelist("datasets/QA-MathOvfl-a2q", create_using=nx.DiGraph, data=(('sec', int),))
QA_Ubuntu = nx.read_edgelist("datasets/QA-AskUbuntu-a2q", create_using=nx.DiGraph, data=(('sec', int),))

(0.12249791150450536,
 0.08562257743701326,
 0.08977220795195943,
 0.006579827310761368,
 0.555383670848817,
 0.0851832536618068,
 0.38867819878455623,
 0.013631433570303537)