# ReadData

In [None]:
from sklearn.cluster import DBSCAN
import numpy as np
import scipy.sparse as sp
from sklearn import metrics

np.set_printoptions(threshold=4200)  

# Dbscan in lode Embedding

In [None]:
def preprocess_features_v1(features):
    """Row-normalize feature matrix and convert to tuple representation"""
    rowsum = np.array(features.sum(1))
    r_inv = np.power(rowsum, -1,).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    features = r_mat_inv.dot(features)
    return np.matrix(features)

In [None]:
features = np.load('featts.npy')

In [None]:
features = np.squeeze(features)

In [None]:
features.shape

In [None]:
features = preprocess_features_v1(features)

In [None]:
features

In [None]:
lable_onehot = np.load('one_hot_labels.npy').astype(int)

In [None]:
label = lable_onehot.argmax(axis=1)[:,np.newaxis]

In [None]:
label.shape

In [None]:
label = np.squeeze(label)

In [None]:
label.shape

# Dbscan

In [None]:
clustering = DBSCAN(eps=0.0049, min_samples=17).fit(features)
clustering

In [None]:
prelabel = clustering.labels_

In [None]:
prelabel.max()

In [None]:
ind = []
pre = []
for i in range(len(prelabel)):
    if prelabel[i] != -1:
        ind.append(i)
        pre.append(prelabel[i])

In [None]:
labels_true = label[ind]

In [None]:
len(ind)

**FMI**

In [None]:
metrics.fowlkes_mallows_score(labels_true, pre)

**ARI**

In [None]:
metrics.adjusted_rand_score(labels_true, pre)

**NMI**

In [None]:
metrics.normalized_mutual_info_score(labels_true, pre)

# K-means

In [None]:
from sklearn.cluster import KMeans

In [None]:
bestari = -1
ariepoch = -1

bestnmi = -1
nmiepoch = -1

# bestvm = -1
# vmepoch = -1

bestfmi = -1
fmiepoch = -1

for i in range(0,100):
    print(i)
    kmeans = KMeans(n_clusters=4, random_state=i).fit(features)
    pppre = kmeans.labels_
    ari = metrics.adjusted_rand_score(label,pppre)
    if ari > bestari:
        bestari = ari
        ariepoch = i
    
    fmi = metrics.fowlkes_mallows_score(label, pppre)
    if fmi > bestfmi:
        bestfmi = fmi
        fmiepoch = i
    
    nmi = metrics.normalized_mutual_info_score(label,pppre)
    if nmi >bestnmi:
        bestnmi = nmi
        nmiepoch = i
        
#     vm = metrics.v_measure_score(label,pppre)
#     if vm >bestvm:
#         bestvm = vm
#         vmepoch = i

print('the bestARI：',bestari,'random_state:',ariepoch)
print('the bestNMI：',bestnmi,'random_state:',nmiepoch)
# print('the bestVM：',bestvm,'random_state:',vmepoch)
print('the bestFMI：',bestfmi,'random_state:',fmiepoch)

# ==============================================================

# Dbscan in weighted adjmaxtrix

In [None]:
import tensorflow as tf
import numpy as np

In [None]:
# with tf.Session() as sess:
#     print('1')
#     new_saver = tf.train.import_meta_graph("G:\\desktop\\准备的期刊论文\\20190928任务\\DBLP实验\\HIN+GCN\\premodel\\.meta")
#     print('2')
#     new_saver.restore(sess, tf.train.latest_checkpoint('G:\\desktop\\准备的期刊论文\\20190928任务\\DBLP实验\\HIN+GCN\\premodel\\'))
#     print('3')
#     print(new_saver)
    
    
#     variable_names = [v.name for v in tf.trainable_variables()]
#     values = sess.run(variable_names)
#     for k, v in zip(variable_names, values):
#         print("Variable: ", k)
#         print("Shape: ", v.shape)
#         print(v)
#         W = v
#         break
    
#     print(sess.run('W:0')) 
    
#     W = sess.run('weights_n')

In [None]:
W = [0.6489845,1.0374146,1.1753616]

In [None]:
W[0]

In [None]:
adjdata = np.load('small_adj_data.npy')

In [None]:
weightedadj = adjdata[0]*W[0] + adjdata[1]*W[1] +adjdata[2]*W[2]

In [None]:
weightedadj = preprocess_features_v1(weightedadj)

In [None]:
weightedadj

In [None]:
weightedadj.shape

# Dbscan

In [None]:
clustering2 = DBSCAN(eps=0.01,min_samples=5).fit(weightedadj)
clustering2

In [None]:
prelabel2 = clustering2.labels_

In [None]:
prelabel2.max()

In [None]:
ind2 = []
pre2 = []
for i in range(len(prelabel2)):
    if prelabel2[i] != -1:
        ind2.append(i)
        pre2.append(prelabel2[i])

In [None]:
labels_true2 = label[ind2]

In [None]:
len(labels_true2)

**FMI**

In [None]:
metrics.fowlkes_mallows_score(labels_true2, pre2)

**ARI**

In [None]:
metrics.adjusted_rand_score(labels_true2, pre2)

**NMI**

In [None]:
metrics.normalized_mutual_info_score(labels_true2, pre2)

# Kmeans

In [None]:
from sklearn.cluster import KMeans

In [None]:
bestari = -1
ariepoch = -1

bestnmi = -1
nmiepoch = -1

# bestvm = -1
# vmepoch = -1

bestfmi = -1
fmiepoch = -1

for i in range(0,100):
    print(i)
    kmeans = KMeans(n_clusters=4, random_state=i).fit(weightedadj)
    pppre = kmeans.labels_
    ari = metrics.adjusted_rand_score(label,pppre)
    if ari > bestari:
        bestari = ari
        ariepoch = i
    
    fmi = metrics.fowlkes_mallows_score(label, pppre)
    if fmi > bestfmi:
        bestfmi = fmi
        fmiepoch = i
    
    
    nmi = metrics.normalized_mutual_info_score(label,pppre)
    if nmi >bestnmi:
        bestnmi = nmi
        nmiepoch = i
        
#     vm = metrics.v_measure_score(label,pppre)
#     if vm >bestvm:
#         bestvm = vm
#         vmepoch = i

print('the best ARI：',bestari,'random_state:',ariepoch)
print('the best NMI：',bestnmi,'random_state:',nmiepoch)
# print('the bestVM：',bestvm,'random_state:',vmepoch)
print('the best FMI：',bestfmi,'random_state:',fmiepoch)

In [None]:
kmeans2 = KMeans(n_clusters=6, random_state=60).fit(weightedadj)

In [None]:
pppre2 = kmeans2.labels_

In [None]:
np.sum((pppre2 == label)+0)/len(pppre2)

In [None]:
metrics.completeness_score(label,pppre2)

In [None]:
# maxacc = 0.227619
# epoch = 12
# for i in range(17,100):
#     print(i)
#     kmeans2 = KMeans(n_clusters=6, random_state=i).fit(weightedadj)
#     pppre2 = kmeans2.labels_
#     acc = np.sum((pppre2 == label)+0)/len(pppre2)
#     if acc > maxacc:
#         maxacc = acc
#         epoch = i
#         print('the best acc:',maxacc,'random_state is：', epoch)