In [1]:
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
import networkx as nx

# gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
# tf.config.experimental.set_visible_devices(devices=gpus[0], device_type='GPU')

# Read Data

In [2]:
def readCora():
    node_id_map = {}
    adj_dict_list = {}
    node_label = []
    label_id = {}


    with open("cora/cora.content",'r') as f:
        lines  = f.readlines()
        features = []
        for idx, line in enumerate(lines):
            l = line.strip().split('\t')
            node_feature = list(np.array(l[:-1], dtype = np.float))
            node = node_feature[0]
            node_id_map[node] = idx
            feature = node_feature[1:]
            features.append(feature)
            label = l[-1]
            if label not in label_id:
                label_id[label] = len(label_id)
            node_label.append(label_id[label])

    adj_dict_list = {}
    with open("cora/cora.cites", 'r') as f:
        for line in f.readlines():
            edge = line.strip().split('\t')
            head = node_id_map[float(edge[0])]
            tail = node_id_map[float(edge[1])]
            if head not in adj_dict_list:
                adj_dict_list[head] = set()
            if tail not in adj_dict_list:
                adj_dict_list[tail] = set()
            adj_dict_list[head].add(tail)
            adj_dict_list[tail].add(head)
    
    return features, node_label, label_id, adj_dict_list
        

In [3]:
features, node_label, label_id, adj_dict_list = readCora()

In [4]:
print(label_id)

{'Neural_Networks': 0, 'Rule_Learning': 1, 'Reinforcement_Learning': 2, 'Probabilistic_Methods': 3, 'Theory': 4, 'Genetic_Algorithms': 5, 'Case_Based': 6}


In [5]:
print(adj_dict_list[0])  # id 0 --> 31336  : {10531,1129442,31349,686532,31353} --> {258, 544,8,435, 14}

{544, 258, 8, 14, 435}


In [6]:
features = np.array(features)

In [7]:
aggregator = 'mean' # {'gcn', 'lstm', 'pooling'}
use_gcn = False
supervise = True
num_nodes = features.shape[0]
feature_dim = features.shape[1]
num_sample = 10
num_class = 7
depth = 2
seed = 7
concat_self = True  # False: inductive 每个节点的特征为其邻居的聚合（不包括自身） True:包括自身
embed_dim = 128
lr = 0.01

In [8]:
class Aggregator(keras.layers.Layer):
    def __init__(self, 
                 output_dims, 
                 num_samples,
                 method = 'mean', 
                 use_gcn = False, 
                 dropout = 0., 
                 activation = keras.activations.relu, 
                 concat_self = True,
                 bilstm = False,
                 **kwargs):
        super(Aggregator,self).__init__(**kwargs)
        self.method = method
        self.output_dims = output_dims
        self.method = method
        self.use_gcn = use_gcn
        self.dropout = dropout
        self.activation = activation
        self.concat_self = concat_self
        self.bilstm = bilstm
        self.num_samples = num_samples
        self.concat = keras.layers.Concatenate()
        self.dense = keras.layers.Dense(units=self.output_dims)
        self.bilstm = keras.layers.Bidirectional(
                    keras.layers.LSTM(units=self.output_dims // 2,
                                      stateful = False,
                                    #   recurrent_initializer=keras.initializers.GlorotUniform,
                                      return_sequences=False)
                )
        self.lstm = keras.layers.LSTM(units=self.output_dims, 
                                           stateful=False, 
                                        #    recurrent_initializer=keras.initializers.GlorotUniform,
                                           return_sequences=False)

    def build(self, input_shape):
        super(Aggregator, self).build(input_shape)
        # if self.method == 'mean':
        #     if self.use_gcn:
        #         self.W = self.add_weight(name = "weight_gcn",
        #                                  shape = (input_shape[0][1], self.output_dims),
        #                                  initializer=keras.initializers.GlorotUniform,
        #                                  trainable=True)
        #     else:
        #         if self.concat_self:
        #             self.W = self.add_weight(name = "weight_mean_concat",
        #                                      shape = (input_shape[0][1] * 2, self.output_dims),
        #                                      initializer= keras.initializers.GlorotUniform,
        #                                      trainable = True)
        #         else:
        #             self.W = self.add_weight(name = "weight_mean",
        #                                      shape = (input_shape[0][1], self.output_dims),
        #                                      initializer = keras.initializers.GlorotUniform,
        #                                      trainable = True)
        # elif self.method == 'lstm':
        #     if self.concat_self:
        #         self.W = self.add_weight(name = "weight_lstm_concat",
        #                                        shape = (input_shape[0][1] * 2, self.output_dims),
        #                                        initializer= keras.initializers.GlorotUniform,
        #                                        trainable = True)
        #     else:
        #         self.W = self.add_weight(name = "weight_lstm",
        #                                        shape = (input_shape[0][1], self.output_dims),
        #                                        initializer = keras.initializers.GlorotUniform,
        #                                        trainable = True)            
        # else:  # max pooling
        #     if self.concat_self:
        #         self.W = self.add_weight(name = "weight_pool_concat",
        #                                        shape = (input_shape[0][1] * 2, self.output_dims),
        #                                        initializer= keras.initializers.GlorotUniform,
        #                                        trainable = True)
        #     else:
        #         self.W = self.add_weight(name = "weight_pool",
        #                                        shape = (input_shape[0][1], self.output_dims),
        #                                        initializer = keras.initializers.GlorotUniform,
        #                                        trainable = True)                
    
    def call(self, inputs):
        # self_features: (None, 1433)
        # neigh_features: [tensor, tensor,......]
        self_features, neigh_features = inputs
        if self.method == "mean":
            # self_features = keras.layers.Dropout(self.dropout)(self_features, training = training)  # [[],[],[]]
            
            if self.use_gcn:
                # neigh_features include self
                means = tf.stack([tf.reduce_mean(neigh_fea,axis = 0) for neigh_fea in neigh_features])
                output = self.dense(means)
                # output = tf.matmul(means, self.W)
                return self.activation(output)
            else:
                if self.concat_self:
                    ## self_features: [[1,2,3],[4,5,6],[7,8,9]]  3 self node
                    ## neigh_features: [[[11,22,33],[44,55,66],[77,66,99]] ,   [[],[]],   [[]]]
                    # neigh_means = tf.reduce_mean(neigh_features, axis=1).to_tensor()
                    
                    neigh_means = tf.stack([tf.reduce_mean(neigh_fea,axis = 0) for neigh_fea in neigh_features])

                    # lint 5 in Algorithm 1
                    concat = self.concat([self_features, neigh_means])
                    # output = tf.matmul(concat, self.W)
                    output = self.dense(concat)
                else:
                    neigh_means = tf.stack([tf.reduce_mean(neigh_fea,axis = 0) for neigh_fea in neigh_features])
                    output = self.dense(neigh_means)
                    # output = tf.matmul(neigh_means, self.W)
                return self.activation(output)
        elif self.method == "lstm":
            if self.concat_self:
                to_features = self.concat([self_features, neigh_features])
            else:
                to_features = neigh_features
            
            if self.bilstm:
                output = self.bilstm(to_features)
            else:
                output = self.lstm(to_features)
            

            output = self.dense(output)
            # output = tf.matmul(output,self.W)
            return self.activation(output)
            
        else:  # max pooling
            if self.concat_self:
                to_features = self.concat([self_features, neigh_features])   
            else:
                to_features = neigh_features
            
            output = tf.reduce_max(to_features,axis = 1)
            output = self.dense(output)
            # output = tf.matmul(output,self.W)
            return self.activation(output)

In [9]:
class GraphSAGELayer(keras.layers.Layer):
    def __init__(self, 
                 adj_list,
                 num_nodes, 
                 embed_dims,
                 aggregator = 'mean', 
                 use_gcn = False,
                 num_sample = 5, 
                 concat = True,
                 dropout = 0.,
                 seed = 7,
                 **kwargs):
        super(GraphSAGELayer, self).__init__(**kwargs)
        self.adj_list = adj_list
        self.aggregator = aggregator
        self.num_nodes = num_nodes
        self.num_sample = num_sample
        self.concat = concat
        self.use_gcn = use_gcn
        self.embed_dims = embed_dims
        self.dropout = dropout
        self.seed = seed
        if self.aggregator == 'mean':
            self.agglayer = Aggregator(output_dims = self.embed_dims, 
                   num_samples = self.num_sample,
                   method = self.aggregator, 
                   use_gcn = self.use_gcn, 
                   dropout = self.dropout,
                   activation=keras.activations.relu, 
                   concat_self = self.concat)

    def _sample_neighbors(self, nodes):
        """sample neighbors for nodes"""
        neighs = [list(self.adj_list[node]) for node in nodes]
        np.random.seed(self.seed)
        sampled_neighbor = [list(np.random.choice(neigh, self.num_sample, replace = False)) if len(neigh) >= self.num_sample 
                                else neigh for neigh in neighs]
        if self.aggregator == 'mean' and self.use_gcn:
            for i, node in enumerate(nodes):
                sampled_neighbor[i].append(node)  # add self to neighborhood

        if self.aggregator == 'lstm':
            # 打乱节点顺序
            np.random.seed(0)
            sampled_neighbor = [list(np.random.permutation(neigh)) for neigh in sampled_neighbor]
            
        return sampled_neighbor

    def build(self, input_shape):
        super(GraphSAGELayer,self).build(input_shape)

        
    def call(self, inputs):
        # 先为每个节点采样邻居 输入为inputs = features
        features = inputs  # ndarray  (None,1433)
        sampled_neighbors = self._sample_neighbors(np.arange(self.num_nodes))
        # sampled_neighbors_mask:
        # mask = np.zeros(shape=(self.num_nodes,self.num_nodes),dtype=np.bool)
        # for idx, neighs in enumerate(sampled_neighbors):
        #     mask[idx][neighs] = True
        
        # 获取邻居的feature  [tensors , .... ]
        # neigh_features = [features[mask[i]] for i in np.arange(self.num_nodes)]

        neigh_features = [tf.gather(features, neighs) for neighs in sampled_neighbors]
        agg_input = [features, neigh_features]
        output = self.agglayer(agg_input)
        return output

In [10]:
# shuffle node and train_test_val split
np.random.seed(seed)
rand_indices = np.random.permutation(num_nodes)
test = rand_indices[:1000]
val = rand_indices[1000:1500]
train = rand_indices[1500:]

In [11]:
# 2 次迭代
input_features = keras.Input(shape = (feature_dim, ))
agg1 = GraphSAGELayer(adj_list = adj_dict_list,
                      num_nodes = num_nodes, 
                      embed_dims = embed_dim,
                      aggregator = 'mean', 
                      use_gcn = False)(input_features)                    
agg2 = GraphSAGELayer(adj_list = adj_dict_list,
                      num_nodes = num_nodes, 
                      embed_dims = num_class,
                      aggregator = 'mean', 
                      use_gcn = False)(agg1)

model = keras.models.Model(inputs = input_features, outputs = agg2)

model.compile(loss = keras.losses.sparse_categorical_crossentropy, optimizer = keras.optimizers.Adam(learning_rate=lr),
              weighted_metrics=['sparse_categorical_crossentropy', 'acc'])

model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 1433)]            0         
_________________________________________________________________
graph_sage_layer (GraphSAGEL (2708, 128)               366976    
_________________________________________________________________
graph_sage_layer_1 (GraphSAG (2708, 7)                 1799      
Total params: 368,775
Trainable params: 368,775
Non-trainable params: 0
_________________________________________________________________


In [12]:
import scipy.sparse as sp

callback = [keras.callbacks.ModelCheckpoint('./best_model.h5',
                                            monitor='val_weighted_sparse_categorical_crossentropy',
                                            save_best_only=True,
                                            save_weights_only=True), 
              keras.callbacks.EarlyStopping(monitor="val_acc",
              min_delta=1e-4, patience=10)]


print("start training")
node_labels = np.array(node_label)
def sample_mask(idx, l):
    mask = np.zeros(l)
    mask[idx] = 1
    return np.array(mask, dtype=np.bool)

y_train = np.zeros(node_labels.shape)
train_mask = sample_mask(train, node_labels.shape[0])
y_train[train_mask] = node_labels[train_mask]

y_val = np.zeros(node_labels.shape)
val_mask = sample_mask(val, node_labels.shape[0])
y_val[val_mask] = node_labels[val_mask]

y_test = np.zeros(node_labels.shape)
test_mask = sample_mask(test, node_labels.shape[0])
y_test[test_mask] = node_labels[test_mask]

def preprocess_features(features):
    row_sum = np.array(features.sum(1))
    reverse_row_sum = np.power(row_sum,-1).flatten()
    reverse_row_sum[np.isinf(reverse_row_sum)] = 0.
    new_features = sp.diags(reverse_row_sum).dot(features)
    return new_features
features_ = preprocess_features(features)

history = model.fit(x=features_, y = y_train, sample_weight= train_mask, validation_data=(features_, y_val, val_mask),
                    batch_size = num_nodes, epochs = 150, shuffle = False, workers=10, use_multiprocessing=True,callbacks=callback)

_crossentropy: 1.4057 - acc: 0.2997 - val_loss: 0.2768 - val_sparse_categorical_crossentropy: 1.4989 - val_acc: 0.3080
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150


In [14]:
eval_results = model.evaluate(features_, y_test, sample_weight=test_mask, batch_size=num_nodes)

