In [1]:
import numpy as np
import pickle as pkl
import networkx as nx
import scipy.sparse as sp
import torch
from scipy.sparse import csgraph
import sys
import time
import argparse
import numpy as np
import torch
import torch.nn.functional as F
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from utils import *
from graphConvolution import *

### Load data

In [2]:
adj, features, labels, idx_train, idx_val, idx_test = load_data(dataset='cora')


[STEP 1]: Upload cora dataset.
| # of nodes : 2708
| # of edges : 5278.0
| # of features : 1433
| # of clases   : 7
| # of train set : 140
| # of val set   : 500
| # of test set  : 1000


In [3]:
from torch.autograd import Variable
features, adj, labels = Variable(features), Variable(adj), Variable(labels)
# torch.cuda.manual_seed(72)
features = features.cuda()
adj = adj.cuda()
labels = labels.cuda()
idx_train = idx_train.cuda()
idx_val = idx_val.cuda()
idx_test = idx_test.cuda()

### FM-6layers

In [4]:
class GraphConvolutionFM(Module):
    def __init__(self, in_features, out_features, embedding, bias=True):
        super(GraphConvolutionFM, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.embedding = embedding
        self.weight = Parameter(torch.FloatTensor(in_features, out_features),requires_grad=True)
        self.V = Parameter(torch.randn(out_features, in_features, embedding),requires_grad=True)
        if bias:
            self.bias = Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        self.V.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input, adj,nhid1,nhid2,nhid3,nhid4):
        out_lin = torch.mm(input, self.weight) + self.bias
        # all
        out_1 = torch.matmul(input,self.V).pow(2).sum(2, keepdim=True).view(self.out_features,input.shape[0]).t()
        out_2 = torch.matmul(input.pow(2), self.V.pow(2)).sum(2, keepdim=True).view(self.out_features,input.shape[0]).t() 
        out_inter1 = 0.5*(out_1 - out_2)
        # x1-xnhid1
        out_3 = torch.matmul(input[:,:nhid1],self.V[:,:nhid1,:]).pow(2).sum(2, keepdim=True).view(self.out_features,input.shape[0]).t()
        out_4 = torch.matmul(input[:,:nhid1].pow(2), self.V[:,:nhid1,:].pow(2)).sum(2, keepdim=True).view(self.out_features,input.shape[0]).t() 
        out_inter2 = 0.5*(out_3 - out_4)
        # xnhid1-xnhid2
        out_5 = torch.matmul(input[:,nhid1:nhid2],self.V[:,nhid1:nhid2,:]).pow(2).sum(2, keepdim=True).view(self.out_features,input.shape[0]).t()
        out_6 = torch.matmul(input[:,nhid1:nhid2].pow(2), self.V[:,nhid1:nhid2,:].pow(2)).sum(2, keepdim=True).view(self.out_features,input.shape[0]).t() 
        out_inter3 = 0.5*(out_5 - out_6)
        # xnhid2-xnhid3
        out_7 = torch.matmul(input[:,nhid2:nhid3],self.V[:,nhid2:nhid3,:]).pow(2).sum(2, keepdim=True).view(self.out_features,input.shape[0]).t()
        out_8 = torch.matmul(input[:,nhid2:nhid3].pow(2), self.V[:,nhid2:nhid3,:].pow(2)).sum(2, keepdim=True).view(self.out_features,input.shape[0]).t() 
        out_inter4 = 0.5*(out_7 - out_8)
        # xnhid3-xnhid4
        out_9 = torch.matmul(input[:,nhid3:nhid4],self.V[:,nhid3:nhid4,:]).pow(2).sum(2, keepdim=True).view(self.out_features,input.shape[0]).t()
        out_10 = torch.matmul(input[:,nhid3:nhid4].pow(2), self.V[:,nhid3:nhid4,:].pow(2)).sum(2, keepdim=True).view(self.out_features,input.shape[0]).t() 
        out_inter5 = 0.5*(out_9- out_10)
        
        # xnhid4-xnhid5
        out_11 = torch.matmul(input[:,nhid4:],self.V[:,nhid4:,:]).pow(2).sum(2, keepdim=True).view(self.out_features,input.shape[0]).t()
        out_12 = torch.matmul(input[:,nhid4:].pow(2), self.V[:,nhid4:,:].pow(2)).sum(2, keepdim=True).view(self.out_features,input.shape[0]).t() 
        out_inter6 = 0.5*(out_11- out_12)
        
        out_inter = out_inter1 - out_inter2 - out_inter3- out_inter4- out_inter5-out_inter6
        
        output = out_inter + out_lin
        
        output = torch.spmm(adj, output) 
        return output
    

    def __repr__(self):
        return self.__class__.__name__ + ' (' \
               + str(self.in_features) + ' -> ' \
               + str(self.out_features) + ')'

In [5]:
class GCN(nn.Module):
    def __init__(self, nfeat, nhid1,nhid2, nhid3,nhid4,nhid5,nclass, dropout):
        super(GCN, self).__init__()
        self.nhid1 = nhid1
        self.nhid2 = nhid2
        self.nhid3 = nhid3
        self.nhid4 = nhid4
        self.nhid5 = nhid5
        
        self.gc_1 = GraphConvolution(nfeat, nhid1,bias=True)
        self.gc_2 = GraphConvolution(nfeat, nhid2,bias=True)
        self.gc_3 = GraphConvolution(nfeat, nhid3,bias=True)
        self.gc_4 = GraphConvolution(nfeat, nhid4,bias=True)
        
        self.gc1 = GraphConvolution(nfeat, nhid1,bias=True)
        self.gc1_2 = GraphConvolution(nhid1, nhid2,bias=True)
        self.gc1_3 = GraphConvolution(nhid1, nhid3,bias=True)
        self.gc1_4 = GraphConvolution(nhid1, nhid4,bias=True)
        
        
        self.gc2 = GraphConvolution(nhid1, nhid2,bias=True)
        self.gc2_3 = GraphConvolution(nhid2, nhid3,bias=True)
        self.gc2_4 = GraphConvolution(nhid2, nhid4,bias=True)
        
        self.W1 = nn.Parameter(torch.ones(2708, 2),requires_grad=True)
        self.W2 = nn.Parameter(torch.ones(2708, 3),requires_grad=True)
        self.W3 = nn.Parameter(torch.ones(2708, 4),requires_grad=True)
        self.W4 = nn.Parameter(torch.ones(2708, 5),requires_grad=True)
        
        self.gc3 = GraphConvolution(nhid2, nhid3,bias=True)
        self.gc3_4 = GraphConvolution(nhid3, nhid4,bias=True)
        
        self.gc4 = GraphConvolution(nhid3, nhid4,bias=True)
        
        self.gc5 = GraphConvolution(nhid4, nhid5,bias=True)
        
        self.gcFM6 = GraphConvolutionFM(nhid5+nhid4+nhid3+nhid2+nhid1, nclass,embedding=5,bias=True)
        
        self.dropout = dropout

    def forward(self, x, adj):
        x_d = F.dropout(x, self.dropout, training=self.training)
        x_df1 = F.dropout(F.relu(self.gc_1(x_d,adj)), self.dropout, training=self.training)
        x_df2 = F.dropout(F.relu(self.gc_2(x_d,adj)), self.dropout, training=self.training)
        x_df3 = F.dropout(F.relu(self.gc_3(x_d,adj)), self.dropout, training=self.training)
        x_df4 = F.dropout(F.relu(self.gc_4(x_d,adj)), self.dropout, training=self.training)

        x1_d = F.dropout(F.relu(self.gc1(x_d, adj)), training=self.training)
        x1_df2 = F.dropout(F.relu(self.gc1_2(x1_d,adj)), self.dropout, training=self.training)
        x1_df3 = F.dropout(F.relu(self.gc1_3(x1_d,adj)), self.dropout, training=self.training)
        x1_df4 = F.dropout(F.relu(self.gc1_4(x1_d,adj)), self.dropout, training=self.training)
        
        combined1 =  torch.mul(x_df1, self.W1[:,0].view(2708,1)) + torch.mul(x1_d,self.W1[:,1].view(2708,1))
        combined1 = F.dropout(combined1, self.dropout, training=self.training)
        
        x2_d = F.dropout(F.relu(self.gc2(combined1, adj)), self.dropout, training=self.training)
        x2_df3 = F.dropout(F.relu(self.gc2_3(x2_d,adj)), self.dropout, training=self.training)
        x2_df4 = F.dropout(F.relu(self.gc2_4(x2_d,adj)), self.dropout, training=self.training)

        combined2 =  torch.mul(x_df2, self.W2[:,0].view(2708,1)) + torch.mul(x1_df2, self.W2[:,0].view(2708,1))+ torch.mul(x2_d,self.W2[:,2].view(2708,1))
        combined2 = F.dropout(combined2, self.dropout, training=self.training)

        x3_d = F.dropout(F.relu(self.gc3(combined2, adj)), self.dropout, training=self.training)
        x3_df4 = F.dropout(F.relu(self.gc3_4(x3_d,adj)), self.dropout, training=self.training)
        
        combined3 =  torch.mul(x_df3, self.W3[:,0].view(2708,1)) +torch.mul(x1_df3, self.W3[:,1].view(2708,1))+torch.mul(x2_df3, self.W3[:,2].view(2708,1))+ torch.mul(x3_d,self.W3[:,3].view(2708,1)) 
        combined3 = F.dropout(combined3, self.dropout, training=self.training)
        
        x4_d = F.dropout(F.relu(self.gc4(combined3, adj)), self.dropout, training=self.training)
        
        combined4 =  torch.mul(x_df4, self.W4[:,0].view(2708,1)) +torch.mul(x1_df4, self.W4[:,1].view(2708,1))+torch.mul(x2_df4, self.W4[:,2].view(2708,1))+torch.mul(x3_df4, self.W4[:,3].view(2708,1))+torch.mul(x4_d,self.W4[:,4].view(2708,1))  
        combined4 = F.dropout(combined4, self.dropout, training=self.training)
        
        
        x5_d = F.dropout(F.relu(self.gc5(combined4, adj)), self.dropout, training=self.training)
        
        combined5 = torch.cat([x1_d, x2_d, x3_d, x4_d, x5_d], dim=1)
        combined5 = F.dropout(combined5, self.dropout, training=self.training)
        
        x6 = self.gcFM6(combined5,adj,self.nhid1,self.nhid2+self.nhid1,self.nhid3+self.nhid2+self.nhid1,self.nhid4+self.nhid3+self.nhid2+self.nhid1) 
        return F.log_softmax(x6, dim=1)

In [6]:
def train(epoch, model,record):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    output = model(features, adj)
    loss_train = F.cross_entropy(output[idx_train], labels[idx_train]) 
    acc_train = accuracy(output[idx_train], labels[idx_train])
    loss_train.backward()
    optimizer.step()
    model.eval()
    output = model(features, adj)

    loss_val = F.cross_entropy(output[idx_val], labels[idx_val])
    acc_val = accuracy(output[idx_val], labels[idx_val])
    
    loss_test = F.cross_entropy(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(loss_train.item()),
          'acc_train: {:.4f}'.format(acc_train.item()),
          'acc_val: {:.4f}'.format(acc_val.item()),
          'acc_test: {:.4f}'.format(acc_test.item()),
          'time: {:.4f}s'.format(time.time() - t))
    record[acc_val.item()] = acc_test.item()

In [8]:
model = GCN(nfeat=features.shape[1],
                nhid1=32,
                nhid2=32,
                nhid3=32,
                nhid4=32,
                nhid5=32,
                nclass=labels.max().item() + 1,
                dropout=0.8)
model.cuda()
optimizer = optim.Adam(model.parameters(),
                       lr=0.02, weight_decay=5e-4)
t_total = time.time()
record = {}
for epoch in range(400):  
    train(epoch,model,record)
print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
bit_list = sorted(record.keys())
bit_list.reverse()
for key in bit_list[:10]:
    value = record[key]
    print(key,value)

Epoch: 0001 loss_train: 11.5620 acc_train: 0.1071 acc_val: 0.0760 acc_test: 0.0930 time: 0.0511s
Epoch: 0002 loss_train: 3.5902 acc_train: 0.1429 acc_val: 0.0760 acc_test: 0.0920 time: 0.0384s
Epoch: 0003 loss_train: 2.5083 acc_train: 0.1143 acc_val: 0.0860 acc_test: 0.1050 time: 0.0377s
Epoch: 0004 loss_train: 2.3134 acc_train: 0.1429 acc_val: 0.1860 acc_test: 0.2030 time: 0.0350s
Epoch: 0005 loss_train: 2.2924 acc_train: 0.1143 acc_val: 0.2500 acc_test: 0.2450 time: 0.0386s
Epoch: 0006 loss_train: 2.1104 acc_train: 0.1357 acc_val: 0.0880 acc_test: 0.1130 time: 0.0395s
Epoch: 0007 loss_train: 2.0289 acc_train: 0.1500 acc_val: 0.1560 acc_test: 0.1520 time: 0.0399s
Epoch: 0008 loss_train: 1.9527 acc_train: 0.1357 acc_val: 0.1620 acc_test: 0.1550 time: 0.0331s
Epoch: 0009 loss_train: 2.0664 acc_train: 0.1357 acc_val: 0.1620 acc_test: 0.1510 time: 0.0424s
Epoch: 0010 loss_train: 1.9618 acc_train: 0.1643 acc_val: 0.1620 acc_test: 0.1490 time: 0.0366s
Epoch: 0011 loss_train: 1.9525 acc_trai

Epoch: 0091 loss_train: 1.4228 acc_train: 0.4786 acc_val: 0.7560 acc_test: 0.7720 time: 0.0336s
Epoch: 0092 loss_train: 1.5333 acc_train: 0.4286 acc_val: 0.7700 acc_test: 0.7830 time: 0.0329s
Epoch: 0093 loss_train: 1.3958 acc_train: 0.5357 acc_val: 0.7720 acc_test: 0.7900 time: 0.0358s
Epoch: 0094 loss_train: 1.4330 acc_train: 0.4714 acc_val: 0.7720 acc_test: 0.7970 time: 0.0361s
Epoch: 0095 loss_train: 1.4314 acc_train: 0.4786 acc_val: 0.7720 acc_test: 0.8070 time: 0.0330s
Epoch: 0096 loss_train: 1.4784 acc_train: 0.5000 acc_val: 0.7820 acc_test: 0.8090 time: 0.0331s
Epoch: 0097 loss_train: 1.4382 acc_train: 0.5000 acc_val: 0.7920 acc_test: 0.8130 time: 0.0343s
Epoch: 0098 loss_train: 1.4187 acc_train: 0.5143 acc_val: 0.7940 acc_test: 0.8150 time: 0.0364s
Epoch: 0099 loss_train: 1.4576 acc_train: 0.4786 acc_val: 0.7880 acc_test: 0.8130 time: 0.0340s
Epoch: 0100 loss_train: 1.3723 acc_train: 0.4786 acc_val: 0.7840 acc_test: 0.8020 time: 0.0336s
Epoch: 0101 loss_train: 1.4339 acc_train

Epoch: 0181 loss_train: 1.1900 acc_train: 0.5786 acc_val: 0.7920 acc_test: 0.8340 time: 0.0349s
Epoch: 0182 loss_train: 1.1816 acc_train: 0.5643 acc_val: 0.7940 acc_test: 0.8370 time: 0.0335s
Epoch: 0183 loss_train: 1.1129 acc_train: 0.6714 acc_val: 0.7920 acc_test: 0.8400 time: 0.0332s
Epoch: 0184 loss_train: 1.0467 acc_train: 0.6571 acc_val: 0.7920 acc_test: 0.8390 time: 0.0358s
Epoch: 0185 loss_train: 1.2245 acc_train: 0.5857 acc_val: 0.7920 acc_test: 0.8440 time: 0.0368s
Epoch: 0186 loss_train: 1.1942 acc_train: 0.5214 acc_val: 0.7860 acc_test: 0.8430 time: 0.0330s
Epoch: 0187 loss_train: 1.2803 acc_train: 0.5500 acc_val: 0.7880 acc_test: 0.8370 time: 0.0327s
Epoch: 0188 loss_train: 1.1339 acc_train: 0.5929 acc_val: 0.7860 acc_test: 0.8260 time: 0.0331s
Epoch: 0189 loss_train: 1.0908 acc_train: 0.6571 acc_val: 0.7760 acc_test: 0.8240 time: 0.0358s
Epoch: 0190 loss_train: 1.0828 acc_train: 0.5857 acc_val: 0.7780 acc_test: 0.8220 time: 0.0345s
Epoch: 0191 loss_train: 1.1642 acc_train

Epoch: 0271 loss_train: 0.9149 acc_train: 0.7071 acc_val: 0.7040 acc_test: 0.6960 time: 0.0372s
Epoch: 0272 loss_train: 0.8733 acc_train: 0.7071 acc_val: 0.7040 acc_test: 0.7060 time: 0.0332s
Epoch: 0273 loss_train: 0.8339 acc_train: 0.7071 acc_val: 0.7060 acc_test: 0.7120 time: 0.0335s
Epoch: 0274 loss_train: 0.7927 acc_train: 0.7286 acc_val: 0.7080 acc_test: 0.7150 time: 0.0330s
Epoch: 0275 loss_train: 0.7928 acc_train: 0.7214 acc_val: 0.7100 acc_test: 0.7160 time: 0.0332s
Epoch: 0276 loss_train: 0.8121 acc_train: 0.7143 acc_val: 0.7140 acc_test: 0.7180 time: 0.0369s
Epoch: 0277 loss_train: 0.8281 acc_train: 0.7000 acc_val: 0.7080 acc_test: 0.7150 time: 0.0368s
Epoch: 0278 loss_train: 0.8989 acc_train: 0.6929 acc_val: 0.7040 acc_test: 0.7170 time: 0.0330s
Epoch: 0279 loss_train: 0.7677 acc_train: 0.7143 acc_val: 0.7000 acc_test: 0.7100 time: 0.0318s
Epoch: 0280 loss_train: 0.8647 acc_train: 0.7429 acc_val: 0.7060 acc_test: 0.7060 time: 0.0335s
Epoch: 0281 loss_train: 0.9647 acc_train

Epoch: 0361 loss_train: 0.6609 acc_train: 0.7857 acc_val: 0.6380 acc_test: 0.6390 time: 0.0343s
Epoch: 0362 loss_train: 0.6495 acc_train: 0.8071 acc_val: 0.6560 acc_test: 0.6500 time: 0.0337s
Epoch: 0363 loss_train: 0.6962 acc_train: 0.8071 acc_val: 0.6640 acc_test: 0.6650 time: 0.0331s
Epoch: 0364 loss_train: 1.9858 acc_train: 0.7571 acc_val: 0.6660 acc_test: 0.6720 time: 0.0350s
Epoch: 0365 loss_train: 0.8241 acc_train: 0.7214 acc_val: 0.6680 acc_test: 0.6740 time: 0.0364s
Epoch: 0366 loss_train: 0.7229 acc_train: 0.7357 acc_val: 0.6740 acc_test: 0.6760 time: 0.0318s
Epoch: 0367 loss_train: 0.6785 acc_train: 0.8000 acc_val: 0.6780 acc_test: 0.6780 time: 0.0328s
Epoch: 0368 loss_train: 0.6773 acc_train: 0.7929 acc_val: 0.6760 acc_test: 0.6820 time: 0.0337s
Epoch: 0369 loss_train: 0.7239 acc_train: 0.7500 acc_val: 0.6840 acc_test: 0.6820 time: 0.0358s
Epoch: 0370 loss_train: 0.6865 acc_train: 0.8000 acc_val: 0.6800 acc_test: 0.6840 time: 0.0366s
Epoch: 0371 loss_train: 0.6749 acc_train