In [122]:
import numpy as np
from tensorflow import keras
import tensorflow as tf
from tensorflow.keras import regularizers


class MNN(keras.Model):
    def __init__(self, node_size, nhid0, nhid1, droput, alpha):
        super(MNN, self).__init__()
        self.encode0 = tf.keras.layers.Dense(nhid0,kernel_regularizer=regularizers.L1L2(nu1, nu2))
        self.encode1 = tf.keras.layers.Dense(nhid1,kernel_regularizer=regularizers.L1L2(nu1, nu2))
        self.decode0 = tf.keras.layers.Dense(nhid0,kernel_regularizer=regularizers.L1L2(nu1, nu2))
        self.decode1 = tf.keras.layers.Dense(node_size,kernel_regularizer=regularizers.L1L2(nu1, nu2))
        self.activation = tf.keras.layers.LeakyReLU()
        self.droput = droput
        self.alpha = alpha

    def call(self, adj_batch, adj_mat, b_mat):
        
        t0 = self.activation(self.encode0(adj_batch))
        t0 = self.activation(self.encode1(t0))
        embedding = t0
        t0 = self.activation(self.decode0(t0))
        t0 = self.activation(self.decode1(t0))
        embedding_norm = tf.reduce_sum(embedding * embedding, axis=1, keepdims=True)
        L_1st = tf.reduce_sum(tf.cast(adj_mat,dtype=tf.float32) * (embedding_norm -
                                     2 * tf.matmul(embedding, tf.transpose(embedding))
                                     + tf.transpose(embedding_norm)))
        
        
        L_2nd = tf.reduce_sum((
            (tf.cast(adj_batch,dtype=tf.float32) - t0) * tf.cast(b_mat,dtype=tf.float32)) * ((tf.cast(adj_batch,dtype=tf.float32) - t0) * tf.cast(b_mat,dtype=tf.float32)))
        return L_1st, self.alpha * L_2nd, L_1st + self.alpha * L_2nd

    def savector(self, adj):
        t0 = self.encode0(adj)
        t0 = self.encode1(t0)
        return t0


In [96]:
import networkx as nx
import numpy as np

In [31]:

def Read_graph(file_name):
    # 文本文件中的每一行必须含有相同的数据; delimiter分隔符默认是空格; 类型是numpy array
    edge = np.loadtxt(file_name).astype(np.int32)
    # 得到图中点的最小和最大编号; .min()返回数组中所有元素最小的
    min_node, max_node = edge.min(), edge.max()
    # Node表示图上一共有多少个顶点，如果标号是从0开始，那么顶点数 = max_node + 1
    if min_node == 0:
        Node = max_node + 1
    else:
        Node = max_node
    # 这里面使用networkx将图的信息存入
    G = nx.Graph()
    # Adj就是图的邻接表矩阵，是一个n*n大小的numpy矩阵，这里n是顶点的个数
    Adj = np.zeros([Node, Node], dtype=np.int32)
    # 遍历边的文件，将每条边存入networkx的图，以及邻接矩阵Adj所对应的位置(i, j)
    for i in range(edge.shape[0]):
        G.add_edge(edge[i][0], edge[i][1])
        if min_node == 0:
            Adj[edge[i][0], edge[i][1]] = 1
            Adj[edge[i][1], edge[i][0]] = 1
        else:
            Adj[edge[i][0] - 1, edge[i][1] - 1] = 1
            Adj[edge[i][1] - 1, edge[i][0] - 1] = 1
    # 转化成tensor
    Adj = np.array(Adj)

    return G, Adj, Node


In [55]:
G, Adj, Node = Read_graph('../../Graph/karate.edgelist')
Data = tf.data.Dataset.from_tensor_slices([i for i in range(Node)])
Test = Data.batch(25)
for index in Test:
    print(index.numpy())
    index= index.numpy()
    print(Adj)
    adj_batch = Adj[index]
    adj_mat = adj_batch[:, index]
    b_mat = np.ones_like(adj_batch)
    print(adj_batch != 0)
    b_mat[adj_batch != 0] = 5
    print(b_mat)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24]
[[0 1 1 ... 1 0 0]
 [1 0 1 ... 0 0 0]
 [1 1 0 ... 0 1 0]
 ...
 [1 0 0 ... 0 1 1]
 [0 0 1 ... 1 0 1]
 [0 0 0 ... 1 1 0]]
[[False  True  True  True  True  True  True  True  True False  True  True
   True  True False False False  True False  True False  True False False
  False False False False False False False  True False False]
 [ True False  True  True False False False  True False False False False
  False  True False False False  True False  True False  True False False
  False False False False False False  True False False False]
 [ True  True False  True False False False  True  True  True False False
  False  True False False False False False False False False False False
  False False False  True  True False False False  True False]
 [ True  True  True False False False False  True False False False False
   True  True False False False False False False False False False False
  False False False F

In [48]:
Test.take(1)

<TakeDataset shapes: (None,), types: tf.int32>

In [108]:
nhid0 = 1000
nhid1 = 128 
dropout = 0.5
alpha = 1e-2
lr = 0.001 
step_size = 10 
decay_rate = 0.9 
beta = 5
epochs = 100
nu1 = 1e-5
nu2 = 1e-5


In [123]:

# ./models/model.py中的MNN类，基于pytorch实现的论文中的自编码器
model = MNN(Node, nhid0, nhid1, dropout, alpha)
# Adam算法优化模型参数
opt = keras.optimizers.Adam(0.01)
# 设置模型的学习率的超参数

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(lr, decay_steps=step_size, decay_rate=decay_rate, staircase=True)




In [125]:

# 选用gpu或cpu训练
# 共训练epoch次数
Data = tf.data.Dataset.from_tensor_slices([i for i in range(Node)]).batch(25)
for epoch in range(1, epochs + 1):
    loss_sum, loss_L1, loss_L2, loss_reg = 0, 0, 0, 0
    # 每次训练组数：batchsize
    for index in Data:
        # Adj = N * N
        # batch_size = n
        # adj_batch = n * N
        index= index.numpy()

        adj_batch = Adj[index]
        print(index)
        # adj_mat = n * n
        adj_mat = adj_batch[:, index]
        
        # 将邻接矩阵中的为0项设为1，为1项设为beta
        b_mat = np.ones_like(adj_batch)
        b_mat[adj_batch != 0] = beta
        adj_batch = tf.convert_to_tensor(adj_batch)
        b_mat = tf.convert_to_tensor(b_mat)
        adj_mat = tf.convert_to_tensor(adj_mat)
        
        # 在做BP之前将gradients置0因为是累加的
        L_1st, L_2nd, L_all = model(adj_batch, adj_mat, b_mat)
        L_reg = 0
        

        with tf.GradientTape() as tape:
            L_1st, L_2nd, L_all = model(adj_batch, adj_mat, b_mat)
            loss = L_all
        gradients = tape.gradient(loss, model.trainable_variables)
        opt.apply_gradients(zip(gradients, model.trainable_variables))

        # 将损失值和正则化项加在一起构成最终的损失函数
        # 计算梯度
      
        # 记录相应部分loss值
        loss_sum += loss
        # 一阶相似度的loss值
        loss_L1 += L_1st
        # 二阶相似度的loss值
        loss_L2 += L_2nd
        # 正则化项loss值
    # 每次epoch输出训练情况，loss值等
    print("loss for epoch %d is:" %epoch)
    print("loss_sum is %f" %loss_sum)
    print("loss_L1 is %f" %loss_L1)
    print("loss_L2 is %f" %loss_L2)
    print("loss_reg is %f" %loss_reg)
embedding = model.savector(Adj)


[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24]
[25 26 27 28 29 30 31 32 33]
loss for epoch 1 is:
loss_sum is 676.611694
loss_L1 is 633.834412
loss_L2 is 42.777283
loss_reg is 0.000000
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24]
[25 26 27 28 29 30 31 32 33]
loss for epoch 2 is:
loss_sum is 727.901611
loss_L1 is 684.691284
loss_L2 is 43.210342
loss_reg is 0.000000
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24]
[25 26 27 28 29 30 31 32 33]
loss for epoch 3 is:
loss_sum is 441.288330
loss_L1 is 408.769287
loss_L2 is 32.519043
loss_reg is 0.000000
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24]
[25 26 27 28 29 30 31 32 33]
loss for epoch 4 is:
loss_sum is 354.041077
loss_L1 is 328.453278
loss_L2 is 25.587814
loss_reg is 0.000000
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24]
[25 26 27 28 29 30 31 32 33]
loss for epoch 5 is:
loss_sum is 

[25 26 27 28 29 30 31 32 33]
loss for epoch 47 is:
loss_sum is 9.381681
loss_L1 is 2.052987
loss_L2 is 7.328694
loss_reg is 0.000000
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24]
[25 26 27 28 29 30 31 32 33]
loss for epoch 48 is:
loss_sum is 16.070965
loss_L1 is 2.151696
loss_L2 is 13.919270
loss_reg is 0.000000
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24]
[25 26 27 28 29 30 31 32 33]
loss for epoch 49 is:
loss_sum is 7.393719
loss_L1 is 1.507031
loss_L2 is 5.886688
loss_reg is 0.000000
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24]
[25 26 27 28 29 30 31 32 33]
loss for epoch 50 is:
loss_sum is 17.791988
loss_L1 is 5.059321
loss_L2 is 12.732667
loss_reg is 0.000000
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24]
[25 26 27 28 29 30 31 32 33]
loss for epoch 51 is:
loss_sum is 25.729742
loss_L1 is 11.946552
loss_L2 is 13.783190
loss_reg is 0.000000
[ 0  1  2  3  4 

[25 26 27 28 29 30 31 32 33]
loss for epoch 94 is:
loss_sum is 4.314034
loss_L1 is 0.560849
loss_L2 is 3.753184
loss_reg is 0.000000
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24]
[25 26 27 28 29 30 31 32 33]
loss for epoch 95 is:
loss_sum is 4.255511
loss_L1 is 0.554205
loss_L2 is 3.701307
loss_reg is 0.000000
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24]
[25 26 27 28 29 30 31 32 33]
loss for epoch 96 is:
loss_sum is 4.155381
loss_L1 is 0.599101
loss_L2 is 3.556280
loss_reg is 0.000000
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24]
[25 26 27 28 29 30 31 32 33]
loss for epoch 97 is:
loss_sum is 4.163047
loss_L1 is 0.597818
loss_L2 is 3.565229
loss_reg is 0.000000
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24]
[25 26 27 28 29 30 31 32 33]
loss for epoch 98 is:
loss_sum is 4.061459
loss_L1 is 0.583327
loss_L2 is 3.478132
loss_reg is 0.000000
[ 0  1  2  3  4  5  6  

array([[ 0.13989162, -1.5701236 , -2.1391547 , ..., -0.5981258 ,
        -0.6402912 , -0.5369024 ],
       [-0.02202164, -1.5351268 , -2.3133867 , ..., -0.64747363,
        -0.5031823 , -0.45129693],
       [-0.02860043, -1.3366505 , -2.3474708 , ..., -0.44417658,
        -0.7338315 , -0.35262   ],
       ...,
       [ 0.04131178, -0.8395436 , -1.9084347 , ..., -0.14251831,
        -0.18977213, -0.4544294 ],
       [ 0.05114252, -0.9519491 , -1.8633006 , ..., -0.08990844,
        -0.04226676, -0.4284957 ],
       [ 0.12561488, -0.939856  , -1.9134054 , ..., -0.18669155,
        -0.10901473, -0.5097248 ]], dtype=float32)

In [131]:

# k-means聚类
from sklearn import  cluster
from sklearn.metrics import adjusted_rand_score
from sklearn.model_selection import train_test_split
import pandas as pd
embedding_node=[]
for i in range(Node):
    t = embedding[i]
    embedding_node.append(t)
embedding_node=np.matrix(embedding_node).reshape((Node,-1))
y_pred = cluster.KMeans(n_clusters=3, random_state=9).fit_predict(embedding_node) # 调用 test_RandomForestClassifier
y_pred


array([1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1,
       0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)