In [1]:
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import pandas as pd
import sys
import os
import scipy.sparse as sp
import  pickle as pkl
import networkx as nx


# os.environ['CUDA_VISIBLE_DEVICES']='1, 2'
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
tf.config.experimental.set_visible_devices(devices=gpus[1:3], device_type='GPU')

# DataLoad

without sparse operation in this project

In [2]:
dataset_name = "cora"

def preprocess_features(features):
    row_sum = np.array(features.sum(1))
    reverse_row_sum = np.power(row_sum,-1).flatten()
    reverse_row_sum[np.isinf(reverse_row_sum)] = 0.
    new_features = sp.diags(reverse_row_sum).dot(features)
    return new_features

def load_data(datasetname):
    names = ['x','tx','allx','y','ty','ally','graph']
    objects = {}
    for name in names:
        with open("data/ind.{}.{}".format(datasetname, name),'rb') as f:
            objects[name] = pkl.load(f, encoding='latin1')
    
    with open("data/ind.{}.test.index".format(datasetname), 'r') as f:
        test_index = []
        for line in f.readlines():
            test_index.append(int(line.strip()))

    test_index_reorder = np.sort(test_index)
    
    whole_features = sp.vstack((objects['allx'], objects['tx'])).tolil()

    whole_features[test_index] = whole_features[test_index_reorder]

    num_nodes = whole_features.shape[0]

    adj = nx.adjacency_matrix(nx.from_dict_of_lists(objects['graph']))

    whole_labels = np.r_[objects['ally'], objects['ty']]

    whole_labels[test_index] = whole_labels[test_index_reorder]

    train_idx = np.arange(len(objects['y']))
    val_idx = np.arange(len(objects['y']), len(objects['y'])+ 500)
    test_idx = test_index_reorder



    return adj, whole_features, whole_labels, train_idx, val_idx, test_idx
    

In [3]:
adj, whole_features, whole_labels, train_idx, val_idx, test_idx = load_data(dataset_name)

In [4]:
# normalize features
features = preprocess_features(whole_features).todense()

# 创建GAT Layer

$L2_{reg} = \frac{\lambda}{2m}||W||^2_2$

$a^T[h_iW||h_jW] = <a^T_1,h_iW> + <a^T_2, h_jW>$  其中， $a = concat(a_1, a_2)$

In [5]:
num_nodes = features.shape[0]
feature_dim = features.shape[1]
epochs = 500
n_classes = whole_labels.shape[1]
n_att_heads = 8
feature_dim_each_head = 8
dropout_rate = 0.6  # for input and normalized attention coefficients \alpha in paper
learning_rate = 5e-3
L2_reg = 5e-4

class GATlayer(keras.layers.Layer):
    """
    GATLayer为GAT的层， 每层的输入为:
    output_dims: dim of the output of the current layer  (F' in the paper)
    dropout: both to input and normalized attention coefficient (训练阶段，节点会随机采样邻居)
    n_heads: number of attention head
    aggregation: {avg, concat}
    activation: elu in the paper
    """
    def __init__(self, output_dims, 
                       num_nodes, 
                       dropout = 0.6, 
                       n_heads = 8, 
                       activation = None,  
                       aggregation = "avg",  
                       use_bias = False,
                       L2_reg = 5e-3,
                       **kwargs):

        self.output_dims = output_dims
        self.num_nodes = num_nodes
        self.n_heads = n_heads
        self.dropout = dropout
        self.activation = activation
        self.aggregation = aggregation
        self.L2_reg = L2_reg
        self.use_bias = use_bias
        super(GATlayer,self).__init__(**kwargs)

    def build(self, input_shape):
        """
        input: all features and adj
        input_shape: [(X.shape), (A.shape)]
        X.shape = (None,1433)
        A.shape = (None,2708)
        """
        print(input_shape)
        self.all_feature_transform_weights = []
        self.all_feature_transform_bias = []
        self.all_attention = []
        for head in range(self.n_heads):
            weights_feature_transform = self.add_weight(name = "Weight_{}".format(head),
                                                             shape = (input_shape[0][1], self.output_dims),  
                                                             initializer = keras.initializers.glorot_uniform,
                                                             regularizer = keras.regularizers.L2(self.L2_reg),
                                                             trainable=True)
            
            if self.use_bias:
                bias_feature_transform = self.add_weight(name = "bias_{}".format(head),
                                                              shape = (self.output_dims,1),
                                                              initializer = keras.initializers.zeros,
                                                              trainable=True)
                self.all_feature_transform_bias.append(bias_feature_transform)
            
            self.all_feature_transform_weights.append(weights_feature_transform)
            

        
            attention_weight_self = self.add_weight(name = "att_{}_self".format(head),
                                                    shape = (self.output_dims,1),
                                                    initializer = keras.initializers.glorot_uniform,
                                                    regularizer = keras.regularizers.L2(self.L2_reg),
                                                    trainable=True)
            attention_weight_neigh = self.add_weight(name = "att_{}_neigh".format(head),
                                                          shape=(self.output_dims,1),
                                                          initializer= keras.initializers.glorot_uniform,
                                                          regularizer=keras.regularizers.L2(self.L2_reg),
                                                          trainable=True)

            self.all_attention.append([attention_weight_self, attention_weight_neigh])
        super(GATlayer,self).build(input_shape)

    
    def _toSparseTensor(self,X):
        """
        X is a dense np matrix
        """
        idx = tf.where(tf.not_equal(X, 0))
        sparse = tf.SparseTensor(indices=idx, values = tf.gather_nd(X,idx), dense_shape=X.shape)
        return sparse

    def call(self, inputs):
        """
        inputs: [features, adj]
        """
        assert self.aggregation == "avg" or self.aggregation == "concat", "Aggregation must be 'concat' or 'avg'"
        features = inputs[0]
        # sparse_features = self._toSparseTensor(features)
        adj = inputs[1]
        # sparse_adj = self._toSparseTensor(adj)
        output = []
        for head in range(self.n_heads):

            feature_transform = tf.matmul(features, self.all_feature_transform_weights[head])  # 为每个节点做特征变换 hW  (N,F')

            # a = [a_1,a_2]  a.shape (2*F', 1)
            attention_to_center = tf.matmul(feature_transform, self.all_attention[head][0])   # 所有 h_iWa_1  (None, 1)
            
            attention_to_neighbor = tf.matmul(feature_transform, self.all_attention[head][1])   # (None, 1)

            # compute eq.1 in the paper, to obtain a matrix
            # 计算每个节点和和其他节点之间的attention系数， 1. 特征变换， 2. 拼接， 3. 单层前馈 
            attention_to_center = tf.tile(attention_to_center, multiples=[1, self.num_nodes])  

            att_coef_matrix = tf.add(attention_to_center, tf.transpose(attention_to_neighbor))  # (N,N)  xx

            att_coef_mask = keras.layers.LeakyReLU(alpha=0.2)(att_coef_matrix)

            # neighbor mask
            # att_coef_mask = tf.multiply(att_coef_matrix, adj)  将非邻居置为0（错误） 应置为-inf  应为exp(-inf) = 0
            mask = -10e9 * (1.0 - adj)
            att_coef_mask = att_coef_matrix + mask

            # eq3:
            alpha_matrix = keras.activations.softmax(att_coef_mask)
            
            # dropout for input feature and attention matrix
            # dropout_feature = keras.layers.Dropout(self.dropout)(feature_transform)  # 一部分置0  (N,F')
            # dropout_att = keras.layers.Dropout(self.dropout)(alpha_matrix)  # 一部分置0 文中解释：聚合时随机采样邻居 (N,N)

            # eq4:
            new_node_features = tf.matmul(alpha_matrix, feature_transform)    # (N,F')

            if self.use_bias:
                bias = self.all_feature_transform_bias[head]   # (F',1)
                new_node_features = tf.nn.bias_add(new_node_features, tf.squeeze(tf.reshape(bias,shape=(1,-1)))  ) # (N,F')
            

            # 将一个head的node features保存
            output.append(new_node_features) 
        
        if self.aggregation == "avg":
            out = tf.reduce_mean(output,axis = 0)
        else:
            out = tf.concat(output, axis = 1)

        if self.activation is not None:
            out = self.activation(out)

        return out

In [6]:
input_features = keras.Input(shape = (features.shape[1],))  
input_adj = keras.Input(shape = (adj.shape[1],))
dropout1 = keras.layers.Dropout(dropout_rate)(input_features)
GAT1 = GATlayer(output_dims = feature_dim_each_head,  # 8
                num_nodes = num_nodes,
                dropout=dropout_rate, 
                n_heads = n_att_heads, 
                activation = keras.activations.elu,
                aggregation="concat",
                use_bias=False, 
                L2_reg=L2_reg)([dropout1, input_adj])

dropout2 = keras.layers.Dropout(dropout_rate)(GAT1)

GAT2 = GATlayer(output_dims=n_classes,
                num_nodes = num_nodes,
                dropout = dropout_rate, 
                n_heads = 1, 
                activation = keras.activations.softmax,
                aggregation='avg',
                use_bias = False,
                L2_reg=L2_reg)([dropout2,input_adj])

model = keras.models.Model(inputs = [input_features, input_adj], outputs = GAT2)



model.compile(loss = keras.losses.categorical_crossentropy, 
              optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
              metrics = ['accuracy'])

model.summary()

[TensorShape([None, 1433]), TensorShape([None, 2708])]
[TensorShape([None, 64]), TensorShape([None, 2708])]
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1433)]       0                                            
__________________________________________________________________________________________________
dropout (Dropout)               (None, 1433)         0           input_1[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 2708)]       0                                            
__________________________________________________________________________________________________
ga_tlayer (GATlayer)            (None, 64)           91840       dropout[0][0]

In [7]:
# A = adj + np.eye(num_nodes)
A = adj.todense()

In [8]:
# model([features,A])
def mask(X, ids):
    mask_X = np.zeros(shape = X.shape)
    mask = np.zeros(shape = X.shape[0]).astype(np.bool)
    mask[ids] = True
    mask_X[ids] = X[mask]
    return mask_X

history = model.fit([mask(features, train_idx),A], 
                    mask(whole_labels,train_idx),
                    epochs=epochs, 
                    # sample_weight = mask(np.ones(num_nodes), train_idx),
                    batch_size = num_nodes,
                    shuffle=False, 
                    validation_data=([mask(features,val_idx), A], mask(whole_labels, val_idx)),
                    workers=10, use_multiprocessing=True)
                                                                                

 109ms/step - loss: 0.1006 - accuracy: 0.9140 - val_loss: 0.3593 - val_accuracy: 0.8220
Epoch 361/500
Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 378/500
Epoch 379/500
Epoch 380/500
Epoch 381/500
Epoch 382/500
Epoch 383/500
Epoch 384/500
Epoch 385/500
Epoch 386/500
Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 391/500
Epoch 392/500
Epoch 393/500
Epoch 394/500
Epoch 395/500
Epoch 396/500
Epoch 397/500
Epoch 398/500
Epoch 399/500
Epoch 400/500
Epoch 401/500
Epoch 402/500
Epoch 403/500
Epoch 404/500
Epoch 405/500
Epoch 406/500
Epoch 407/500
Epoch 408/500
Epoch 409/500
Epoch 410/500
Epoch 411/500
Epoch 412/500
Epoch 413/500
Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 419/500
Epoch 420/500
Epoch 421/500
Epoch 422/500
Epoch 423/500
Epoch 424/500
Epoch 425/500
Ep

# Test

In [16]:
eval_test = model.evaluate([mask(features,test_idx), A], mask(whole_labels,test_idx), batch_size=num_nodes)

