In [None]:
# link colab to google drive directory where this project data is placed
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)


In [None]:
%cd  /content/gdrive/My Drive/GraphAttnProject/CoraCiteseer/CitationExperiment/

In [None]:
import numpy as np

# this project is based on tensorflow 2
import tensorflow as tf 
print(tf.__version__)

from process import *
#from models import *
from gat import *

import matplotlib.pyplot as plt

In [None]:

# parameter settings for cora dataset
class args_config_pubmed():
    
    def __init__(self):
      
        self.nb_epochs = 100000
        self.patience = 100
        self.lr = 0.005  # learning rate
        self.l2_coef = 0.0005  # weight decay
        self.hid_units = [8] # numbers of hidden units per each attention head in each layer
        self.n_heads = [8, 1] # additional entry for the output layer
        self.residual = False
        self.nonlinearity = tf.nn.elu
        self.batch_size = 1
        
        self.nb_nodes = 19717 # number of nodes in cora dataset
        self.nb_features = 500 # number of features 
        self.out_sz = 3 # number of classes 
       
        self.concat_similarity = False  # uses the graph attention method in paper Graph Attention Networks: https://arxiv.org/pdf/1710.10903.pdf
        self.dot_product_similarity = True # uses the graph attention method in paper Attention Is All you Need: https://arxiv.org/pdf/1706.03762.pdf

        self.sparse = False # this parameter is set as False for Cora and Citeseer, and probably need to set as True for Pubmed due to memory issue
        

        self.num_repeats = 15 # number of repeats

        # for deep walk algorithm
        
        self.walk_length = 4 # random walk length for GKAT
        self.number_walks = 100 # number of random walks from each node 
        
        self.ffd_drop = 0.6 # in_drop
        self.attn_drop = 0.6 # coef_drop
        
        self.optimizer = tf.keras.optimizers.Adam(lr = self.lr)
        
        self.seeds = [{"tf_seed": 649737, "np_seed": 29820},
                      {"tf_seed": 395408, "np_seed": 185228},
                      {"tf_seed": 252356, "np_seed": 703889},
                      {"tf_seed": 343053, "np_seed": 999360},
                      {"tf_seed": 743746, "np_seed": 67440},
                      {"tf_seed": 175343, "np_seed": 378945},
                      {"tf_seed": 856516, "np_seed": 597688},
                      {"tf_seed": 474313, "np_seed": 349903},
                      {"tf_seed": 838382, "np_seed": 897904},
                      {"tf_seed": 202003, "np_seed": 656146},

                      {"tf_seed": 773885, "np_seed": 189288},
                      {"tf_seed": 849634, "np_seed": 419482},
                      {"tf_seed": 175379, "np_seed": 760273},
                      {"tf_seed": 65097, "np_seed": 662295},
                      {"tf_seed": 636040, "np_seed": 440640},
                      {"tf_seed": 792273, "np_seed": 707957},
                      {"tf_seed": 802029, "np_seed": 591393},
                      {"tf_seed": 283051, "np_seed": 599978},
                      {"tf_seed": 327575, "np_seed": 266086},
                      {"tf_seed": 422558, "np_seed": 735819}]
                      
                            
        
        
        
        
    def print_args(self):
        print(f"patience: {self.patience} | lr: {self.lr} | l2_coef: {self.l2_coef}")
        print(f"hid_units: {self.hid_units} | n_heads: {self.n_heads}")
        print(f"residual: {self.residual} | nonlinearity: {self.nonlinearity}")
        #print(f"data load seed: {self.data_load_seed}")
        print(f"dotproduct similarity: {self.dot_product_similarity} | concat similarity: {self.concat_similarity}")
        print(f"ffd_drop: {self.ffd_drop} | attn_drop: {self.attn_drop}")
        print(f"num repeats: {self.num_repeats}")
        print(f"repeats seeds: {self.seeds}")
        print(f"walk length: {self.walk_length}")
        
        
        


# **Main**

In [None]:
dataset_str = 'pubmed'
args = args_config_pubmed()



adj, walker, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(dataset_str, args)



features, spars = preprocess_features(features)

nb_nodes = features.shape[0]
ft_size = features.shape[1]
nb_classes = y_train.shape[1]

features = features[np.newaxis]
y_train = y_train[np.newaxis]
y_val = y_val[np.newaxis]
y_test = y_test[np.newaxis]
train_mask = train_mask[np.newaxis]
val_mask = val_mask[np.newaxis]
test_mask = test_mask[np.newaxis]




adj = adj.todense()
adj[range(nb_nodes), range(nb_nodes)] = 1
adj = adj.astype('float32')



# model.walker.walks_dict:
  # keys: starting node
  # values: list with length = # random walk repeats from each node. Each position stores a single random walk from the starting node

# the following code changes walks_dict to frequency matrix, which will be used as GKAT kernel later. 
freq_mat = np.zeros([args.nb_nodes, args.nb_nodes])

for key in walker.walks_dict:
  for i in range(len(walker.walks_dict[key])):
    for j in range(args.walk_length):
      freq_mat[int(key),int(walker.walks_dict[key][i][j])] +=1
freq_mat /= args.number_walks

dot_prod = np.matmul(freq_mat, np.transpose(freq_mat))

# divide the dot_prod kernel by the norm of the kernel
deno = np.matmul(np.diagonal(dot_prod)[:, None], np.transpose(np.diagonal(dot_prod)[:, None]))
dot_kernel = dot_prod / np.sqrt(deno)  #np.diagonal(dot_prod)[:, None]


biases = dot_kernel # if we set biases as dot_kernel, then it is our GKAT
biases = adj # if we set biases as adj, then it will be pure GAT



In [None]:
'''
if we set args.sparse == True, then we can run the following code

import scipy

biases[biases<0.05] =0 # we can replace values less than a threshold (0.05 here) by zero to make the dot_product kernel sparser to save computation time

# change the dense biases matrix into sparse matrix, otherwise it will run out of memory
if args.sparse:
  biases = scipy.sparse.csr_matrix(biases, dtype = np.float32)
'''


In [None]:


args.print_args()


train_loss_list_all = []
train_acc_list_all = []
val_loss_list_all = []
val_acc_list_all = []
ts_loss_list = []
ts_acc_list = []


for nr in range(args.num_repeats):

    tf.random.set_seed(args.seeds[nr]['tf_seed'])
    np.random.seed(args.seeds[nr]['np_seed'])
    
    model = GAT(args.hid_units, args.n_heads, args.out_sz, args.nb_nodes, False, ffd_drop = args.ffd_drop, attn_drop = args.attn_drop, activation = tf.nn.elu, residual=False)

    train_loss_list = []
    train_acc_list = []
    val_loss_list = []
    val_acc_list = []


    vlss_mn = np.inf
    vacc_mx = 0.0
    curr_step = 0

    train_loss_avg = 0
    train_acc_avg = 0
    val_loss_avg = 0
    val_acc_avg = 0

    model_number = 0

    for epoch in range(args.nb_epochs):

        ###Training Segment###
        tr_step = 0
        tr_size = features.shape[0]
        while tr_step * args.batch_size < tr_size:                
            bbias = biases #[tr_step*args.batch_size:(tr_step+1)*args.batch_size]               
            logits_tr, acc_tr,loss_value_tr = train( model, inputs= features[tr_step*args.batch_size:(tr_step+1)*args.batch_size],  bias_mat= bbias, lbl_in = y_train[tr_step*args.batch_size:(tr_step+1)*args.batch_size], msk_in = train_mask[tr_step*args.batch_size:(tr_step+1)*args.batch_size], training=True, args = args)
            train_loss_avg += loss_value_tr
            train_acc_avg += acc_tr
            tr_step += 1
            train_loss_list.append(train_loss_avg.numpy())
            train_acc_list.append(train_acc_avg.numpy())
             
        ###Validation Segment###
        vl_step = 0
        vl_size = features.shape[0]
        while vl_step * args.batch_size < vl_size:          
            bbias = biases #[vl_step*args.batch_size:(vl_step+1)*args.batch_size]          
            _, acc_vl,loss_value_vl = evaluate(model, inputs= features[vl_step*args.batch_size:(vl_step+1)*args.batch_size],  bias_mat= bbias, lbl_in = y_val[vl_step*args.batch_size:(vl_step+1)*args.batch_size], msk_in = val_mask[vl_step*args.batch_size:(vl_step+1)*args.batch_size], training=False, args = args)
            val_loss_avg += loss_value_vl
            val_acc_avg += acc_vl
            vl_step += 1
            val_loss_list.append(val_loss_avg.numpy())
            val_acc_list.append(val_acc_avg.numpy())
            
        if epoch % 1 ==0:
            print('Seed: %d | Epoch: %d |Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f' % (nr, epoch, train_loss_avg/tr_step, train_acc_avg/tr_step, val_loss_avg/vl_step, val_acc_avg/vl_step))
        
      
        ###Early Stopping Segment###      
        if val_acc_avg/vl_step >= vacc_mx or val_loss_avg/vl_step <= vlss_mn:
            if val_acc_avg/vl_step >= vacc_mx and val_loss_avg/vl_step <= vlss_mn:
                    vacc_early_model = val_acc_avg/vl_step
                    vlss_early_model = val_loss_avg/vl_step            
                    working_weights = model.get_weights()
            vacc_mx = np.max((val_acc_avg/vl_step, vacc_mx))
            vlss_mn = np.min((val_loss_avg/vl_step, vlss_mn))
            curr_step = 0
        else:
            curr_step += 1
            if curr_step == args.patience:
                    print('Early stop! Min loss: ', vlss_mn, ', Max accuracy: ', vacc_mx)
                    print('Early stop model validation loss: ', vlss_early_model, ', accuracy: ', vacc_early_model)
                    model.set_weights(working_weights)
                    break

        train_loss_avg = 0
        train_acc_avg = 0
        val_loss_avg = 0
        val_acc_avg = 0

    train_loss_list_all.append(train_loss_list)
    train_acc_list_all.append(train_acc_list)
    val_loss_list_all.append(val_loss_list)
    val_acc_list_all.append(val_acc_list)


    ###Testing Segment### Outside of the epochs

    ts_step = 0
    ts_size = features.shape[0]
    ts_loss = 0.0
    ts_acc = 0.0
    while ts_step * args.batch_size < ts_size:   
        bbias = biases #[ts_step*args.batch_size:(ts_step+1)*args.batch_size]   
        _, acc_ts,loss_value_ts = evaluate( model, inputs= features[ts_step*args.batch_size:(ts_step+1)*args.batch_size],  bias_mat= bbias, lbl_in = y_test[ts_step*args.batch_size:(ts_step+1)*args.batch_size], msk_in = test_mask[ts_step*args.batch_size:(ts_step+1)*args.batch_size], training=False, args = args)   
        ts_loss += loss_value_ts
        ts_acc += acc_ts
        ts_step += 1

    print('Test loss:', ts_loss/ts_step, '; Test accuracy:', ts_acc/ts_step)
    ts_loss_list.append((ts_loss/ts_step).numpy())
    ts_acc_list.append((ts_acc/ts_step).numpy())


    plt.plot(train_loss_list, label='train loss')
    plt.plot(val_loss_list, label='val loss')
    plt.legend()
    plt.xlabel('epoch')
    plt.ylabel("loss")
    plt.title("Train Validation Loss")
    plt.show()


    plt.plot(train_acc_list, label='train acc')
    plt.plot(val_acc_list, label='val acc')
    plt.legend()
    plt.xlabel('epoch')
    plt.ylabel("acc")
    plt.title("Train Validation Accuracy")
    plt.show()


# the final test accuracy over N repeats are printed here
print(ts_acc_list)
