In [15]:
#%matplotlib inline

import matplotlib.pyplot as plt

import numpy as np
import torch
from torch.nn.parameter import Parameter
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
import networkx as nx
import time
import os.path as osp
from collections import defaultdict
import pandas as pd
import xlsxwriter
from torch_geometric.nn import GCNConv,GATConv, SGConv, BatchNorm,SAGEConv
torch.manual_seed(29)
# cit-DBLP：13.24.27.29.40.44.46
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
torch.set_printoptions(precision=10)
gra_train = "scale"
path_train = "data/"+gra_train
path_test = "data/"

dataset = "cit-DBLP"

In [16]:
def data_process(g, mode="test"):
  
    adj = list(g.edges())
    m = g.number_of_nodes()

    features = torch.zeros(size=(m,1)).float()
    
    print("No.of nodes = ",m)
    print("No.of edges = ",g.number_of_edges())

    for u,_ in adj:
        features[u,0]+=1

    features = features/max(features)
    adj = torch.LongTensor(adj).t().contiguous()
 
    if mode == "train":
        
        adj_mat = torch.from_numpy(nx.to_numpy_array(g))
        adj_mat = F.normalize(adj_mat, p=1,dim=1).float()
        
        return adj.to(device), features.to(device), adj_mat.to(device)
    
    else:
        return adj.to(device), features.to(device)

In [17]:
def eigen_vector_calc(adj_mat, y):
    
    z = torch.spmm(adj_mat,y)
    y = y / (torch.linalg.norm(y))
    z = z/ (torch.linalg.norm(z))
    
    return y,z

In [18]:
class Model(nn.Module):
    def __init__(self, nin, nhid1, nout, hid_l, out_l=1):
        super(Model, self).__init__()
        
        self.gc1 = SAGEConv(in_channels= nin, out_channels= nhid1)
        self.bc1 = BatchNorm(nhid1)
        self.gc2 = SAGEConv(in_channels= nhid1, out_channels= nout)
        self.bc2 = BatchNorm(nhid1)
        self.lay1 = nn.Linear(nout ,hid_l)
        self.l0 = nn.Linear(hid_l,hid_l)
        self.lb0 = nn.BatchNorm1d(hid_l)
        self.l1 = nn.Linear(hid_l,hid_l)
        self.lb1 = nn.BatchNorm1d(hid_l)
        self.lay2 = nn.Linear(hid_l ,out_l)
        self.active1 = nn.LeakyReLU(0.1)
        
        with torch.no_grad():
            self.gc1.weight = Parameter(nn.init.uniform_(torch.empty(nin,nhid1),a=0.0,b=1.0))
            self.gc1.bias = Parameter(nn.init.uniform_(torch.empty(nhid1),a=0.0,b=1.0))
            self.gc2.weight = Parameter(nn.init.uniform_(torch.empty(nhid1,nout),a=0.0,b=1.0))
            self.gc2.bias = Parameter(nn.init.uniform_(torch.empty(nout),a=0.0,b=1.0))
            self.lay1.weight = Parameter(nn.init.uniform_(torch.empty(hid_l, nout ),a=0.0,b=1.0))
            self.l0.weight = Parameter(nn.init.uniform_(torch.empty(hid_l, hid_l),a=0.0,b=1.0))
            self.l1.weight = Parameter(nn.init.uniform_(torch.empty(hid_l, hid_l),a=0.0,b=1.0))
            self.lay2.weight = Parameter(nn.init.uniform_(torch.empty(out_l,hid_l),a=0.0,b=1.0))


    def forward(self, x, adj):
        x = self.gc1(x, adj)
        x = self.bc1(x)
        x = self.gc2(x, adj)
        x = self.bc2(x)
        x = self.lay1(x)
        x = self.l0(x)
        x = self.lb0(x)
        x = self.l1(x)
        x = self.lb1(x)
        x = self.lay2(x)
        
        return self.active1(x)

In [19]:
def train_model(epoch):
    model.train()
    optimizer.zero_grad()
    t = time.time()
    y = model(features,adj)
    
    y,z = eigen_vector_calc(adj_mat, y)
    
    loss1 = nn.MSELoss()(y,z)
    loss2 = -y.abs().mean()
    loss_train = loss1 + loss2
    loss_temp.append(loss_train.cpu().detach())
    
    loss_train.backward(retain_graph=True)
    optimizer.step()
    
    if epoch%50==0:
        print("MSE loss = ",loss1,"\t","Mean Loss = ",loss2)
        print('Epoch: {:04d}'.format(epoch+1),
                  'loss_train: {:.10f}'.format(loss_train.item()),
                  'time: {:.4f}s'.format(time.time() - t))
        

    return y

In [20]:
if __name__=="__main__":
    
    #Training

    loss_plot = []
    
    model= Model(nin = 1, nhid1=128, nout=128, hid_l=64, out_l=1).to(device)
    
    optimizer = optim.Adam(model.parameters(), lr = 0.001)
    #flag = False
    g = nx.read_weighted_edgelist(osp.join(path_test, dataset+".edgelist"),nodetype=int)
    no_of_nodes = g.number_of_nodes()
    
    for i in range(40):
        print("\n\n Graph number",i,"\n","-"*120)
        #if flag: break
        
        samples = np.random.randint(low=0, high=no_of_nodes)

        g_subgraph = nx.bfs_tree(g, source=samples, depth_limit=2)
        map = dict( list(zip(list(g_subgraph.nodes()), list(range(g_subgraph.number_of_nodes())))))
        g_subgraph = g.subgraph(g_subgraph)
        g_subgraph = nx.relabel_nodes(g_subgraph,map)
        #g = nx.read_edgelist(osp.join(path_train,gra_train+"_1k/"+gra_train+"1k_"+str(i)),nodetype=int)
        adj,features,adj_mat = data_process(g_subgraph ,mode="train")
        print("adj_list = ",adj.shape,"adj_mat = ",adj_mat.shape)

        loss_temp=[]
        net_time = time.time()
        
        for ep in range(150):
            y = train_model(ep)
            if ep%50==0:
                print("Time Net = ",time.time()-net_time,"\n\n")
                '''
                print("{:^10}".format("-------Validation-------"))
                l5, l10, l15, l20 = [],[],[],[]
                test(g)
                if l5[0]>0.5 and l10>0.55:
                  flag = True
                  break
                '''
        loss_plot.append((np.mean(loss_temp), i))



 Graph number 0 
 ------------------------------------------------------------------------------------------------------------------------
No.of nodes =  47
No.of edges =  72
adj_list =  torch.Size([2, 72]) adj_mat =  torch.Size([47, 47])
MSE loss =  tensor(0.0318271443, device='cuda:0', grad_fn=<MseLossBackward0>) 	 Mean Loss =  tensor(-0.0892385095, device='cuda:0', grad_fn=<NegBackward0>)
Epoch: 0001 loss_train: -0.0574113652 time: 0.0130s
Time Net =  0.01399993896484375 


MSE loss =  tensor(0.0354900695, device='cuda:0', grad_fn=<MseLossBackward0>) 	 Mean Loss =  tensor(-0.0968431905, device='cuda:0', grad_fn=<NegBackward0>)
Epoch: 0051 loss_train: -0.0613531210 time: 0.0060s
Time Net =  0.3489987850189209 


MSE loss =  tensor(0.0310775414, device='cuda:0', grad_fn=<MseLossBackward0>) 	 Mean Loss =  tensor(-0.0944052711, device='cuda:0', grad_fn=<NegBackward0>)
Epoch: 0101 loss_train: -0.0633277297 time: 0.0100s
Time Net =  0.6085772514343262 




 Graph number 1 
 ------------

In [21]:
print(loss_plot)
np.save("result_eigen/loss/l2",loss_plot)
torch.save(model.state_dict(), "result_eigen/"+dataset+".pt")
writer = pd.ExcelWriter("result_eigen/rt-retweet-crawl_gcn.xlsx", engine = 'xlsxwriter')
print(model.forward)
# df = pd.DataFrame(list(zip(l5,l10,l15,l20)),columns=["Top-5%","Top-10%","Top-15%","Top-20%"])
# df.to_excel(writer)
# writer.save()
# writer.close()

[(-0.06223817, 0), (-0.12991361, 1), (-0.26361963, 2), (-0.11330232, 3), (-0.0622764, 4), (-0.027166385, 5), (-0.13787411, 6), (-0.0880448, 7), (-0.28867513, 8), (-0.049206786, 9), (-0.040756956, 10), (-0.028138332, 11), (-0.24253565, 12), (-0.18257417, 13), (-0.13483997, 14), (-0.07856742, 15), (-0.12598816, 16), (-0.08873565, 17), (-0.039133027, 18), (-0.079305165, 19), (-0.18257417, 20), (-0.11470787, 21), (-0.07905693, 22), (-0.25, 23), (-0.26726124, 24), (-0.14285715, 25), (-0.19611615, 26), (-0.048056927, 27), (-0.09325047, 28), (-0.09449112, 29), (-0.25, 30), (-0.030875377, 31), (-0.03302291, 32), (-0.22941574, 33), (-0.22941574, 34), (-0.24253565, 35), (-0.14285715, 36), (-0.073720984, 37), (-0.1767767, 38), (-0.045883145, 39)]
<bound method Model.forward of Model(
  (gc1): SAGEConv(1, 128, aggr=mean)
  (bc1): BatchNorm(128)
  (gc2): SAGEConv(128, 128, aggr=mean)
  (bc2): BatchNorm(128)
  (lay1): Linear(in_features=128, out_features=64, bias=True)
  (l0): Linear(in_features=64,