In [1]:
## This Cell is related to date preprocessing and is completely copied from original kipf's github code!

import numpy as np
import scipy.sparse as sp
import torch,torch.nn,torch.sparse,torch.nn.functional,torch.distributions
from sklearn.metrics import roc_auc_score,average_precision_score
from input_data import load_data
from preprocessing import preprocess_graph, sparse_to_tuple, mask_test_edges
np.set_printoptions(threshold=np.inf)



adj, features = load_data("cora")


# Store original adjacency matrix (without diagonal entries) for later
adj_orig = adj
adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
adj_orig.eliminate_zeros()

adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj)
adj = adj_train


###################################################################### feature less training
#features = sp.identity(features.shape[0])  # featureless
######################################################################


# Some preprocessing
adj_norm = preprocess_graph(adj)


num_nodes = adj.shape[0]

features = sparse_to_tuple(features.tocoo())
num_features = features[2][1]
features_nonzero = features[1].shape[0]



pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)



adj_label = adj_train + sp.eye(adj_train.shape[0])
adj_label = sparse_to_tuple(adj_label)




In [2]:
#Model definition Cell

adj_orig_tensor = torch.Tensor(adj_orig.toarray())

test_edges_false = np.array(test_edges_false)
val_edges_false = np.array(val_edges_false)
adj_label_tensor = torch.sparse.FloatTensor(torch.LongTensor(adj_label[0].transpose()),torch.FloatTensor(adj_label[1]),torch.Size(adj_label[2])).to_dense()
adj_norm_tensor  = torch.sparse.FloatTensor(torch.LongTensor(adj_norm[0].transpose()) ,torch.FloatTensor(adj_norm[1]), torch.Size(adj_norm[2])).to_dense()
features_tensor  = torch.sparse.FloatTensor(torch.LongTensor(features[0].transpose()) ,torch.FloatTensor(features[1]), torch.Size(features[2])).to_dense()

# from the previous cell following arrays are obtained : 
#adj_label
#adj_norm
#features
#val_edges_false
#test_edges
#test_edges_false




class Model(torch.nn.Module):

    def __init__(self,first_layer_dim=30,embedding_dim=15,A_tilda=None ,**kwargs):
        super(Model, self).__init__(**kwargs)


        self.W0 = torch.nn.Linear(num_features, first_layer_dim)
        self.W1_mean = torch.nn.Linear(first_layer_dim, embedding_dim)
        self.W1_log_std  = torch.nn.Linear(first_layer_dim, embedding_dim)

        self.A_tilda = A_tilda

        self.normal_dist = torch.distributions.MultivariateNormal(loc=torch.zeros(embedding_dim),scale_tril=torch.eye(embedding_dim))

        self.optimizer = torch.optim.Adam(params=list(self.W0.parameters())+list(self.W1_mean.parameters())+list(self.W1_log_std.parameters()),lr=0.01)
        self.recon = None
        

    def train(self,x,A):


        first_layer = torch.nn.functional.relu(torch.matmul(self.A_tilda,self.W0(x)))
        z_mean      = torch.matmul(self.A_tilda,self.W1_mean(first_layer))
        z_log_std   = torch.matmul(self.A_tilda,self.W1_log_std(first_layer))


        z = z_mean +self.normal_dist.sample((x.shape[0],)) * torch.exp(z_log_std)
        
       

        recon = torch.matmul(z,z.transpose(dim0=0,dim1=1))
        self.recon = recon

        recon_loss = (  A    *    torch.nn.functional.logsigmoid(recon)  ).mean()*pos_weight + ( (1-A) * torch.nn.functional.logsigmoid(-recon)).mean()
        recon_loss = -norm*recon_loss

        kl_loss =   torch.mean(torch.sum(1 + 2 * z_log_std - z_mean**2 -torch.exp(z_log_std)**2, 1))
        kl_loss = -(0.5 / x.shape[0])*kl_loss

        loss = recon_loss+kl_loss


        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        return loss.item()


    def evaluate(self,pos_edge,neg_edge):
        pred_pos_edge = self.recon[pos_edge[:, 0].reshape(-1), pos_edge[:, 1].reshape(-1)].sigmoid().detach().numpy()
        pred_neg_edge = self.recon[neg_edge[:, 0].reshape(-1), neg_edge[:, 1].reshape(-1)].sigmoid().detach().numpy()

        labels = np.r_[np.ones_like(pred_pos_edge), np.zeros_like(pred_neg_edge)]
        preds  = np.r_[pred_pos_edge, pred_neg_edge]

        roc_auc = roc_auc_score(labels,preds)
        precision=average_precision_score(labels, preds)
        
        

        return roc_auc,precision,np.c_[labels,preds]




In [3]:
#Training Cell 

my_model = Model(first_layer_dim=30,embedding_dim=15,A_tilda=adj_norm_tensor)


for epoch_counter in range(200):
    
    epoch_loss = my_model.train(features_tensor,adj_label_tensor)
    epoch_roc_auc,epoch_precision,_ = my_model.evaluate(val_edges,val_edges_false)
    
    print("Epoch:", '%04d' % (epoch_counter + 1), "train_loss=", "{:.5f}".format(epoch_loss), "val_roc_auc=", "{:.5f}".format(epoch_roc_auc),
          "val_precision=", "{:.5f}".format(epoch_precision))


print('\n',"#########################################################",'\n')

test_roc_auc,test_precision,label_pred = my_model.evaluate(test_edges,test_edges_false)
print("test_roc_auc=","{:.5f}".format(test_roc_auc),"test_precision=","{:.5f}".format(epoch_precision))





Epoch: 0001 train_loss= 1.77748 val_roc_auc= 0.51048 val_precision= 0.49425
Epoch: 0002 train_loss= 1.44978 val_roc_auc= 0.51825 val_precision= 0.52862
Epoch: 0003 train_loss= 1.18505 val_roc_auc= 0.51115 val_precision= 0.48601
Epoch: 0004 train_loss= 0.99368 val_roc_auc= 0.54994 val_precision= 0.52709
Epoch: 0005 train_loss= 0.86613 val_roc_auc= 0.53424 val_precision= 0.52547
Epoch: 0006 train_loss= 0.76945 val_roc_auc= 0.56537 val_precision= 0.55766
Epoch: 0007 train_loss= 0.73547 val_roc_auc= 0.56414 val_precision= 0.57273
Epoch: 0008 train_loss= 0.73151 val_roc_auc= 0.59080 val_precision= 0.60212
Epoch: 0009 train_loss= 0.74040 val_roc_auc= 0.62203 val_precision= 0.64381
Epoch: 0010 train_loss= 0.74793 val_roc_auc= 0.63273 val_precision= 0.65965
Epoch: 0011 train_loss= 0.74620 val_roc_auc= 0.63069 val_precision= 0.65988
Epoch: 0012 train_loss= 0.74412 val_roc_auc= 0.64859 val_precision= 0.68360
Epoch: 0013 train_loss= 0.74468 val_roc_auc= 0.64603 val_precision= 0.67634
Epoch: 0014 

Epoch: 0109 train_loss= 0.45485 val_roc_auc= 0.90457 val_precision= 0.92036
Epoch: 0110 train_loss= 0.45489 val_roc_auc= 0.91826 val_precision= 0.93094
Epoch: 0111 train_loss= 0.45449 val_roc_auc= 0.90886 val_precision= 0.91935
Epoch: 0112 train_loss= 0.45416 val_roc_auc= 0.90786 val_precision= 0.92223
Epoch: 0113 train_loss= 0.45397 val_roc_auc= 0.91149 val_precision= 0.92413
Epoch: 0114 train_loss= 0.45375 val_roc_auc= 0.91428 val_precision= 0.92718
Epoch: 0115 train_loss= 0.45321 val_roc_auc= 0.90460 val_precision= 0.92323
Epoch: 0116 train_loss= 0.45285 val_roc_auc= 0.91470 val_precision= 0.92907
Epoch: 0117 train_loss= 0.45303 val_roc_auc= 0.91509 val_precision= 0.92713
Epoch: 0118 train_loss= 0.45236 val_roc_auc= 0.91482 val_precision= 0.92800
Epoch: 0119 train_loss= 0.45213 val_roc_auc= 0.91488 val_precision= 0.93028
Epoch: 0120 train_loss= 0.45197 val_roc_auc= 0.91506 val_precision= 0.92959
Epoch: 0121 train_loss= 0.45162 val_roc_auc= 0.91673 val_precision= 0.92905
Epoch: 0122 

In [35]:
tmp = np.abs(label_pred[:,0]-np.round(label_pred[:,1]))

print(1-tmp.sum()/tmp.shape[0])




0.7241758241758242


In [41]:
#print(adj_label_tensor[val_edges[:,0],val_edges[:,1]])
#print(adj_orig_tensor [val_edges[:,0],val_edges[:,1]])
print(features_tensor.shape)

torch.Size([2708, 1433])


In [6]:
print(adj_norm_tensor[4,4])

tensor(0.2500)
