In [88]:
%matplotlib inline

import matplotlib.pyplot as plt

import numpy as np
import torch
from torch.nn.parameter import Parameter
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
import networkx as nx
import time
import os.path as osp
from collections import defaultdict
import pandas as pd
import xlsxwriter
from torch_geometric.nn import GCNConv,GATConv, SGConv, BatchNorm,SAGEConv
torch.manual_seed(6)
# cit-DBLP：13.24.27.29.30.31.40.44.46
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
torch.set_printoptions(precision=10)
gra_train = "scale"
path_train = "data/"+gra_train
path_test = "data/"

dataset = "USairport500"

In [89]:
#读取数据
data = pd.read_csv(path_test+dataset+".edgelist",sep=' ',header=None)
print(data)
data.columns = ['start','end','weight']

data.head()
data.set_index(['start','end'],inplace=True)

        0    1       2
0       0    0       0
1       1  111   68150
2       1   49  110902
3       1  283   72020
4       1  177  220192
...   ...  ...     ...
5956  488  487    2992
5957  489  487    6785
5958  489  488    7077
5959  492  491    8053
5960  498  497    7598

[5961 rows x 3 columns]


In [90]:
def data_process(g, mode="test"):
  
    adj = list(g.edges())
    m = g.number_of_nodes()

    features = torch.zeros(size=(m,1)).float()
    
    print("No.of nodes = ",m)
    print("No.of edges = ",g.number_of_edges())

    for u,_ in adj:
        features[u,0]+=torch.Tensor(list(g.get_edge_data(u,_).values()))[0]

    features = features/max(features)
    adj = torch.LongTensor(adj).t().contiguous()
 
    if mode == "train":
        
        adj_mat = torch.from_numpy(nx.to_numpy_array(g))
        adj_mat = F.normalize(adj_mat, p=1,dim=1).float()
        
        return adj.to(device), features.to(device), adj_mat.to(device)
    
    else:
        return adj.to(device), features.to(device)

In [91]:
def eigen_vector_calc(adj_mat, y):
    
    z = torch.spmm(adj_mat,y)
    y = y / (torch.linalg.norm(y))
    z = z/ (torch.linalg.norm(z))
    
    return y,z

In [92]:
class Model(nn.Module):
    def __init__(self, nin, nhid1, nout, hid_l, out_l=1):
        super(Model, self).__init__()
        
        self.gc1 = GCNConv(in_channels= nin, out_channels= nhid1)
        self.bc1 = BatchNorm(nhid1)
        self.gc2 = GCNConv(in_channels= nhid1, out_channels= nout)
        self.bc2 = BatchNorm(nhid1)
        self.lay1 = nn.Linear(nout ,hid_l)
        self.l0 = nn.Linear(hid_l,hid_l)
        self.lb0 = nn.BatchNorm1d(hid_l)
        self.l1 = nn.Linear(hid_l,hid_l)
        self.lb1 = nn.BatchNorm1d(hid_l)
        self.lay2 = nn.Linear(hid_l ,out_l)
        self.active1 = nn.LeakyReLU(0.1)
        
        with torch.no_grad():
            self.gc1.weight = Parameter(nn.init.uniform_(torch.empty(nin,nhid1),a=0.0,b=1.0))
            self.gc1.bias = Parameter(nn.init.uniform_(torch.empty(nhid1),a=0.0,b=1.0))
            self.gc2.weight = Parameter(nn.init.uniform_(torch.empty(nhid1,nout),a=0.0,b=1.0))
            self.gc2.bias = Parameter(nn.init.uniform_(torch.empty(nout),a=0.0,b=1.0))
            self.lay1.weight = Parameter(nn.init.uniform_(torch.empty(hid_l, nout ),a=0.0,b=1.0))
            self.l0.weight = Parameter(nn.init.uniform_(torch.empty(hid_l, hid_l),a=0.0,b=1.0))
            self.l1.weight = Parameter(nn.init.uniform_(torch.empty(hid_l, hid_l),a=0.0,b=1.0))
            self.lay2.weight = Parameter(nn.init.uniform_(torch.empty(out_l,hid_l),a=0.0,b=1.0))


    def forward(self, x, adj):
        x = self.gc1(x, adj)
        x = self.bc1(x)
        x = self.gc2(x, adj)
        x = self.bc2(x)
        x = self.lay1(x)
        x = self.l0(x)
        x = self.lb0(x)
        x = self.l1(x)
        x = self.lb1(x)
        x = self.lay2(x)
        
        return self.active1(x)

In [93]:
def train_model(epoch):
    model.train()
    optimizer.zero_grad()
    t = time.time()
    y = model(features,adj)
    
    y,z = eigen_vector_calc(adj_mat, y)
    
    loss1 = nn.MSELoss()(y,z)
    loss2 = -y.abs().mean()
    loss_train = loss1 + 2*loss2
    loss_temp.append(loss_train.cpu().detach())
    
    loss_train.backward(retain_graph=True)
    optimizer.step()
    
    if epoch%50==0:
        print("MSE loss = ",loss1,"\t","Mean Loss = ",loss2)
        print('Epoch: {:04d}'.format(epoch+1),
                  'loss_train: {:.10f}'.format(loss_train.item()),
                  'time: {:.4f}s'.format(time.time() - t))
        

    return y

In [94]:
if __name__=="__main__":
    
    #Training

    loss_plot = []
    
    model= Model(nin = 1, nhid1=128, nout=128, hid_l=64, out_l=1).to(device)
    
    optimizer = optim.Adam(model.parameters(), lr = 0.001)
    #flag = False
    g = nx.read_weighted_edgelist(osp.join(path_test, dataset+".edgelist"),nodetype=int)#int#
    no_of_nodes = g.number_of_nodes()
    
    for i in range(50):
        print("\n\n Graph number",i,"\n","-"*120)
        #if flag: break
        
        samples = np.random.choice(g.nodes())#np.random.randint(low=0, high=no_of_nodes)

        g_subgraph = nx.bfs_tree(g, source=samples, depth_limit=1)
        map = dict( list(zip(list(g_subgraph.nodes()), list(range(g_subgraph.number_of_nodes())))))
        g_subgraph = g.subgraph(g_subgraph)
        g_subgraph = nx.relabel_nodes(g_subgraph,map)
        print(g_subgraph)
        #g = nx.read_edgelist(osp.join(path_train,gra_train+"_1k/"+gra_train+"1k_"+str(i)),nodetype=int)
        adj,features,adj_mat = data_process(g_subgraph ,mode="train")
        print("adj_list = ",adj.shape,"adj_mat = ",adj_mat.shape)

        loss_temp=[]
        net_time = time.time()
        
        for ep in range(500):
            y = train_model(ep)
            if ep%50==0:
                print("Time Net = ",time.time()-net_time,"\n\n")
                '''
                print("{:^10}".format("-------Validation-------"))
                l5, l10, l15, l20 = [],[],[],[]
                test(g)
                if l5[0]>0.5 and l10>0.55:
                  flag = True
                  break
                '''
        loss_plot.append((np.mean(loss_temp), i))



 Graph number 0 
 ------------------------------------------------------------------------------------------------------------------------
Graph with 3 nodes and 2 edges
No.of nodes =  3
No.of edges =  2
adj_list =  torch.Size([2, 2]) adj_mat =  torch.Size([3, 3])
MSE loss =  tensor(0.7368852496, device='cuda:0', grad_fn=<MseLossBackward0>) 	 Mean Loss =  tensor(-0.3649740815, device='cuda:0', grad_fn=<NegBackward0>)
Epoch: 0001 loss_train: 0.0069370866 time: 0.0140s
Time Net =  0.014999866485595703 


MSE loss =  tensor(0.6420027018, device='cuda:0', grad_fn=<MseLossBackward0>) 	 Mean Loss =  tensor(-0.4055557251, device='cuda:0', grad_fn=<NegBackward0>)
Epoch: 0051 loss_train: -0.1691087484 time: 0.0080s
Time Net =  0.46699976921081543 


MSE loss =  tensor(0.2070699930, device='cuda:0', grad_fn=<MseLossBackward0>) 	 Mean Loss =  tensor(-0.4964672923, device='cuda:0', grad_fn=<NegBackward0>)
Epoch: 0101 loss_train: -0.7858645916 time: 0.0090s
Time Net =  0.846583366394043 


MSE lo

In [95]:
print(loss_plot)
np.save("result_eigen/loss/l2",loss_plot)
torch.save(model.state_dict(), "result_eigen/"+dataset+".pt")
writer = pd.ExcelWriter("result_eigen/rt-retweet-crawl_gcn.xlsx", engine = 'xlsxwriter')
print(model.forward)
# df = pd.DataFrame(list(zip(l5,l10,l15,l20)),columns=["Top-5%","Top-10%","Top-15%","Top-20%"])
# df.to_excel(writer)
# writer.save()
# writer.close()

[(-0.6961802, 0), (-0.43670458, 1), (-1.2959619, 2), (-0.51639783, 3), (-1.4142134, 4), (-1.4142135, 5), (-0.7071067, 6), (-1.4142134, 7), (-1.1547004, 8), (-1.4142134, 9), (-1.1547004, 10), (-0.6666667, 11), (-1.4142135, 12), (-0.75592893, 13), (-0.8164966, 14), (-0.7071067, 15), (-1.1547004, 16), (-1.1547004, 17), (-1.1547004, 18), (-1.0, 19), (-0.89442724, 20), (-0.4264015, 21), (-1.0, 22), (-0.37139076, 23), (-1.0, 24), (-1.1546994, 25), (-1.0, 26), (-1.1547004, 27), (-0.24253562, 28), (-0.89442724, 29), (-1.1546978, 30), (-1.1547004, 31), (-0.89442724, 32), (-1.1547004, 33), (-1.0, 34), (-1.4142107, 35), (-1.0, 36), (-0.40000007, 37), (-0.7071067, 38), (-0.53452253, 39), (-0.81649673, 40), (-0.55470026, 41), (-0.6666667, 42), (-1.41421, 43), (-1.1547004, 44), (-1.0, 45), (-0.89442724, 46), (-0.55470026, 47), (-0.89442724, 48), (-0.55470026, 49)]
<bound method Model.forward of Model(
  (gc1): GCNConv(1, 128)
  (bc1): BatchNorm(128)
  (gc2): GCNConv(128, 128)
  (bc2): BatchNorm(128)