# Pytorch Implementation

In [123]:
from torch_geometric.utils import from_scipy_sparse_matrix, k_hop_subgraph, to_scipy_sparse_matrix

a = [[0, 0.2, 0, 0], [0, 0, 0, 0], [0.1, 0.2, 0, 0], [0, 0, 0, 0]]
edge_index = torch.LongTensor([[0, 0], [1, 2]])
edge_weight = torch.FloatTensor([0.1, 0.2])

adj = sp.coo_matrix(
        (edge_weight, (edge_index[:, 0], edge_index[:, 1])), shape=(3, 3), dtype=np.float32
    )
print(adj.toarray())
# build symmetric adjacency matrix
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)

adj = normalize(adj + sp.eye(adj.shape[0]))
adj = adj.tocoo().astype(np.float32)
print(adj)

[[0.1 0.  0. ]
 [0.  0.  0.2]
 [0.  0.  0. ]]
  (0, 0)	1.0
  (1, 1)	0.8333333
  (2, 1)	0.16666667
  (1, 2)	0.16666667
  (2, 2)	0.8333333


In [139]:
indices = torch.from_numpy(
        np.vstack((adj.row, adj.col)).astype(np.int64))
indices.type()

'torch.LongTensor'

In [138]:
edge_index.type()

'torch.LongTensor'

In [129]:
values = torch.from_numpy(adj.data)
values

tensor([1.0000, 0.8333, 0.1667, 0.1667, 0.8333])

In [130]:
edge_weight

tensor([0.1000, 0.2000])

In [135]:
shape = torch.Size(adj.shape)

In [136]:
torch.sparse.FloatTensor(indices, values, shape)

tensor(indices=tensor([[0, 1, 2, 1, 2],
                       [0, 1, 1, 2, 2]]),
       values=tensor([1.0000, 0.8333, 0.1667, 0.1667, 0.8333]),
       size=(3, 3), nnz=5, layout=torch.sparse_coo)

In [140]:
torch.sparse.FloatTensor(edge_index, edge_weight, shape)

tensor(indices=tensor([[0, 0],
                       [1, 2]]),
       values=tensor([0.1000, 0.2000]),
       size=(3, 3), nnz=2, layout=torch.sparse_coo)

In [None]:
indices = torch.from_numpy(
        np.vstack((adj.row, adj.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(adj.shape)
    return torch.sparse.FloatTensor(indices, values, shape)

In [126]:
torch.spmm()

TypeError: new() received an invalid combination of arguments - got (Tensor, Tensor, shape=torch.Size), but expected one of:
 * (*, torch.device device)
 * (Tensor indices, Tensor values, *, torch.device device)
 * (Tensor indices, Tensor values, tuple of ints size, *, torch.device device)
 * (tuple of ints size, *, torch.device device)
      didn't match because some of the keywords were incorrect: shape


In [152]:
import sys, os
sys.path.append('/cluster/home/kamara/Explain/code')

from utils.gen_utils import from_adj_to_edge_index, from_edge_index_to_adj, init_weights

import numpy as np
import scipy.sparse as sp
import torch

In [153]:
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

import math

import torch

from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module


class GraphConvolution(Module):
    """
    Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
    """

    def __init__(self, in_features, out_features, bias=True):
        super(GraphConvolution, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.FloatTensor(in_features, out_features))
        if bias:
            self.bias = Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input, edge_index, edge_weight):
        support = torch.mm(input, self.weight)
        shape = torch.Size((len(input), len(input)))
        adj = torch.sparse.FloatTensor(edge_index, edge_weight, shape)
        output = torch.spmm(adj, support)
        if self.bias is not None:
            return output + self.bias
        else:
            return output

    def __repr__(self):
        return self.__class__.__name__ + ' (' \
               + str(self.in_features) + ' -> ' \
               + str(self.out_features) + ')'
    
    
    
class GCN(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout):
        super(GCN, self).__init__()

        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nclass)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = self.gc2(x, adj)
        return F.log_softmax(x, dim=1)
    
class GCN(nn.Module):
    def __init__(self, num_node_features, hidden_dim, num_classes, dropout, num_layers=2):
        super().__init__()
        self.num_node_features, self.num_classes, self.num_layers, self.hidden_dim, self.dropout = (
            num_node_features,
            num_classes,
            num_layers,
            hidden_dim,
            dropout,
        )
        self.layers = nn.ModuleList()
        current_dim = self.num_node_features
        for l in range(self.num_layers - 1):
            self.layers.append(GraphConvolution(current_dim, hidden_dim))
            current_dim = hidden_dim
        self.layers.append(GraphConvolution(current_dim, self.num_classes))

    def forward(self, x, edge_index, edge_weight):
        for layer in self.layers[:-1]:
            x = layer(x, edge_index, edge_weight)
            x = F.relu(x)
            x = F.dropout(x, self.dropout, training=self.training)
        x = self.layers[-1](x, edge_index, edge_weight)
        return F.log_softmax(x, dim=1)

    def loss(self, pred, label):
        return F.nll_loss(pred, label)

In [154]:
def load_data(dataset_str):
    data_filename = "/cluster/home/kamara/Explain/data/{}/processed/data.pt".format(PLANETOIDS[dataset_str])
    data, _ = torch.load(data_filename)
    data = preprocess_planetoid(data)
    
    idx_train = data.train_mask
    idx_val = data.val_mask
    idx_test = data.test_mask
    
    return data, data.adj, data.x, data.y, idx_train, idx_val, idx_test
    

def preprocess_planetoid(data):
    edges = data.edge_index.T
    adj = sp.coo_matrix(
        (np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), shape=(data.num_nodes, data.num_nodes), dtype=np.float32
    )
    # build symmetric adjacency matrix
    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)

    adj = normalize(adj + sp.eye(adj.shape[0]))

    #data.x = torch.FloatTensor(np.array(data.x))
    labels = torch.LongTensor(data.y)

    sparse_mx = adj.tocoo().astype(np.float32)
    data.edge_index = torch.from_numpy(np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    data.edge_weight = torch.from_numpy(sparse_mx.data)
    data.adj = sparse_mx_to_torch_sparse_tensor(sparse_mx)

    return data


def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.0
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx

def accuracy(output, labels):
    preds = output.max(1)[1].type_as(labels)
    correct = preds.eq(labels).double()
    correct = correct.sum()
    return correct / len(labels)


def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)


In [163]:
from __future__ import division
from __future__ import print_function

import time
import argparse
import numpy as np

import torch
import torch.nn.functional as F
import torch.optim as optim

from sklearn.metrics import f1_score

# Training settings

args = {'fastmode':False, 'no_cuda':False, 'seed':42, 'epochs':200, 'lr': 0.01, 'weight_decay':5e-4, 
        'hidden':16, 'dropout':0.5}

class AttributeDict(dict):
    __slots__ = () 
    __getattr__ = dict.__getitem__
    __setattr__ = dict.__setitem__
    
    
args = AttributeDict(args)

args.cuda = not args.no_cuda and torch.cuda.is_available()

np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

# Load data
#adj, features, labels, idx_train, idx_val, idx_test = load_data_impl1()
data, adj, features, labels, idx_train, idx_val, idx_test = load_data('pubmed')
print('data.edge_weight', data.edge_weight)

# Model and optimizer
"""model = GCN(nfeat=features.shape[1],
            nhid=args.hidden,
            nclass=labels.max().item() + 1,
            dropout=args.dropout)"""

model = GCN(num_node_features=features.shape[1],
            hidden_dim=args.hidden,
            num_classes=labels.max().item()+1,
            dropout=args.dropout)
optimizer = optim.Adam(model.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)

if args.cuda:
    model.cuda()
    features = features.cuda()
    adj = adj.cuda()
    labels = labels.cuda()
    idx_train = idx_train.cuda()
    idx_val = idx_val.cuda()
    idx_test = idx_test.cuda()


def train(epoch):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    #output = model(features, adj)
    output = model(features, data.edge_index, data.edge_weight)
    
    loss_train = F.nll_loss(output[idx_train], labels[idx_train])
    #loss_train = F.cross_entropy(output[idx_train], labels[idx_train])
    
    acc_train = accuracy(output[idx_train], labels[idx_train])
    loss_train.backward()
    optimizer.step()

    if not args.fastmode:
        # Evaluate validation set performance separately,
        # deactivates dropout during validation run.
        model.eval()
        #output = model(features, adj)
        output = model(features, data.edge_index, data.edge_weight)
    

    loss_val = F.nll_loss(output[idx_val], labels[idx_val])
    #loss_val = F.cross_entropy(output[idx_val], labels[idx_val])
    acc_val = accuracy(output[idx_val], labels[idx_val])
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(loss_train.item()),
          'acc_train: {:.4f}'.format(acc_train.item()),
          'loss_val: {:.4f}'.format(loss_val.item()),
          'acc_val: {:.4f}'.format(acc_val.item()),
          'time: {:.4f}s'.format(time.time() - t))


def test():
    model.eval()
    #output = model(features, adj)
    output = model(features, data.edge_index, data.edge_weight)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    #loss_test = F.cross_entropy(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    print(np.argmax(output[idx_test].detach().numpy(), axis=1))
    f1_test = f1_score(labels[idx_test].detach().numpy(), np.argmax(output[idx_test].detach().numpy(), axis=1), average='micro')
    
    print("Test set results:",
          "loss= {:.4f}".format(loss_test.item()),
          "accuracy= {:.4f}".format(acc_test.item()),
         "f1_score= {:.4f}".format(f1_test.item()))


# Train model
t_total = time.time()
for epoch in range(args.epochs):
    train(epoch)
print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

# Testing
test()

data.edge_weight tensor([0.1667, 0.0270, 0.0278,  ..., 0.5000, 0.1250, 0.5000])
Epoch: 0001 loss_train: 1.1533 acc_train: 0.3500 loss_val: 1.2180 acc_val: 0.1980 time: 0.0300s
Epoch: 0002 loss_train: 1.1442 acc_train: 0.3500 loss_val: 1.1955 acc_val: 0.3240 time: 0.0299s
Epoch: 0003 loss_train: 1.1319 acc_train: 0.4000 loss_val: 1.1746 acc_val: 0.4860 time: 0.0283s
Epoch: 0004 loss_train: 1.1218 acc_train: 0.4667 loss_val: 1.1547 acc_val: 0.5220 time: 0.0282s
Epoch: 0005 loss_train: 1.1041 acc_train: 0.5500 loss_val: 1.1350 acc_val: 0.5340 time: 0.0287s
Epoch: 0006 loss_train: 1.0722 acc_train: 0.5833 loss_val: 1.1149 acc_val: 0.5220 time: 0.0281s
Epoch: 0007 loss_train: 1.0488 acc_train: 0.5833 loss_val: 1.0951 acc_val: 0.5280 time: 0.0280s
Epoch: 0008 loss_train: 1.0358 acc_train: 0.6000 loss_val: 1.0763 acc_val: 0.5260 time: 0.0306s
Epoch: 0009 loss_train: 1.0275 acc_train: 0.5833 loss_val: 1.0587 acc_val: 0.5240 time: 0.0280s
Epoch: 0010 loss_train: 1.0057 acc_train: 0.5500 loss_va

Epoch: 0088 loss_train: 0.1668 acc_train: 0.9667 loss_val: 0.5496 acc_val: 0.7840 time: 0.0297s
Epoch: 0089 loss_train: 0.1401 acc_train: 1.0000 loss_val: 0.5491 acc_val: 0.7840 time: 0.0281s
Epoch: 0090 loss_train: 0.1463 acc_train: 1.0000 loss_val: 0.5502 acc_val: 0.7840 time: 0.0285s
Epoch: 0091 loss_train: 0.1444 acc_train: 0.9667 loss_val: 0.5511 acc_val: 0.7860 time: 0.0308s
Epoch: 0092 loss_train: 0.1257 acc_train: 1.0000 loss_val: 0.5516 acc_val: 0.7860 time: 0.0280s
Epoch: 0093 loss_train: 0.1691 acc_train: 0.9667 loss_val: 0.5531 acc_val: 0.7880 time: 0.0281s
Epoch: 0094 loss_train: 0.1244 acc_train: 0.9833 loss_val: 0.5551 acc_val: 0.7880 time: 0.0296s
Epoch: 0095 loss_train: 0.1512 acc_train: 0.9667 loss_val: 0.5564 acc_val: 0.7880 time: 0.0286s
Epoch: 0096 loss_train: 0.1258 acc_train: 1.0000 loss_val: 0.5573 acc_val: 0.7880 time: 0.0294s
Epoch: 0097 loss_train: 0.1466 acc_train: 1.0000 loss_val: 0.5579 acc_val: 0.7900 time: 0.0281s
Epoch: 0098 loss_train: 0.1670 acc_train

Epoch: 0181 loss_train: 0.0733 acc_train: 1.0000 loss_val: 0.5682 acc_val: 0.7740 time: 0.0292s
Epoch: 0182 loss_train: 0.0660 acc_train: 1.0000 loss_val: 0.5683 acc_val: 0.7740 time: 0.0277s
Epoch: 0183 loss_train: 0.0814 acc_train: 1.0000 loss_val: 0.5680 acc_val: 0.7760 time: 0.0277s
Epoch: 0184 loss_train: 0.0787 acc_train: 0.9833 loss_val: 0.5676 acc_val: 0.7800 time: 0.0277s
Epoch: 0185 loss_train: 0.0915 acc_train: 1.0000 loss_val: 0.5682 acc_val: 0.7780 time: 0.0278s
Epoch: 0186 loss_train: 0.0786 acc_train: 1.0000 loss_val: 0.5681 acc_val: 0.7780 time: 0.0277s
Epoch: 0187 loss_train: 0.0909 acc_train: 0.9833 loss_val: 0.5663 acc_val: 0.7800 time: 0.0276s
Epoch: 0188 loss_train: 0.0489 acc_train: 1.0000 loss_val: 0.5653 acc_val: 0.7820 time: 0.0276s
Epoch: 0189 loss_train: 0.0620 acc_train: 1.0000 loss_val: 0.5651 acc_val: 0.7820 time: 0.0291s
Epoch: 0190 loss_train: 0.0971 acc_train: 1.0000 loss_val: 0.5652 acc_val: 0.7820 time: 0.0278s
Epoch: 0191 loss_train: 0.0744 acc_train

RuntimeError: Can't call numpy() on Tensor that requires grad. Use tensor.detach().numpy() instead.

# Tensorflow Implementation 

In [61]:
import numpy as np
import pickle as pkl
import networkx as nx
import scipy.sparse as sp
from scipy.sparse.linalg.eigen.arpack import eigsh
import sys


def parse_index_file(filename):
    """Parse index file."""
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index


def sample_mask(idx, l):
    """Create mask."""
    mask = np.zeros(l)
    mask[idx] = 1
    return np.array(mask, dtype=bool)


PLANETOIDS = {"cora": "Cora", "citeseer": "CiteSeer", "pubmed": "PubMed"}

def load_data(dataset_str):
    """
    Loads input data from gcn/data directory

    ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances
        (a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object;
    ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object;
    ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object;
    ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict
        object;
    ind.dataset_str.test.index => the indices of test instances in graph, for the inductive setting as list object.

    All objects above must be saved using python pickle module.

    :param dataset_str: Dataset name
    :return: All data input files loaded (as well the training/test data).
    """
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("/cluster/home/kamara/Explain/data/{}/raw/ind.{}.{}".format(PLANETOIDS[dataset_str], dataset_str, names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file("/cluster/home/kamara/Explain/data/{}/raw/ind.{}.test.index".format(PLANETOIDS[dataset_str], dataset_str))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range-min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range-min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))
    idx_val = range(len(y), len(y)+500)

    train_mask = sample_mask(idx_train, labels.shape[0])
    val_mask = sample_mask(idx_val, labels.shape[0])
    test_mask = sample_mask(idx_test, labels.shape[0])

    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    y_val[val_mask, :] = labels[val_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]

    return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask


def sparse_to_tuple(sparse_mx):
    """Convert sparse matrix to tuple representation."""
    def to_tuple(mx):
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
        coords = np.vstack((mx.row, mx.col)).transpose()
        values = mx.data
        shape = mx.shape
        return coords, values, shape

    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = to_tuple(sparse_mx[i])
    else:
        sparse_mx = to_tuple(sparse_mx)

    return sparse_mx


def preprocess_features(features):
    """Row-normalize feature matrix and convert to tuple representation"""
    rowsum = np.array(features.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    features = r_mat_inv.dot(features)
    return sparse_to_tuple(features)


def normalize_adj(adj):
    """Symmetrically normalize adjacency matrix."""
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1))
    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()


def preprocess_adj(adj):
    """Preprocessing of adjacency matrix for simple GCN model and conversion to tuple representation."""
    adj_normalized = normalize_adj(adj + sp.eye(adj.shape[0]))
    return sparse_to_tuple(adj_normalized)


def construct_feed_dict(features, support, labels, labels_mask, placeholders):
    """Construct feed dictionary."""
    feed_dict = dict()
    feed_dict.update({placeholders['labels']: labels})
    feed_dict.update({placeholders['labels_mask']: labels_mask})
    feed_dict.update({placeholders['features']: features})
    feed_dict.update({placeholders['support'][i]: support[i] for i in range(len(support))})
    feed_dict.update({placeholders['num_features_nonzero']: features[1].shape})
    return feed_dict


def chebyshev_polynomials(adj, k):
    """Calculate Chebyshev polynomials up to order k. Return a list of sparse matrices (tuple representation)."""
    print("Calculating Chebyshev polynomials up to order {}...".format(k))

    adj_normalized = normalize_adj(adj)
    laplacian = sp.eye(adj.shape[0]) - adj_normalized
    largest_eigval, _ = eigsh(laplacian, 1, which='LM')
    scaled_laplacian = (2. / largest_eigval[0]) * laplacian - sp.eye(adj.shape[0])

    t_k = list()
    t_k.append(sp.eye(adj.shape[0]))
    t_k.append(scaled_laplacian)

    def chebyshev_recurrence(t_k_minus_one, t_k_minus_two, scaled_lap):
        s_lap = sp.csr_matrix(scaled_lap, copy=True)
        return 2 * s_lap.dot(t_k_minus_one) - t_k_minus_two

    for i in range(2, k+1):
        t_k.append(chebyshev_recurrence(t_k[-1], t_k[-2], scaled_laplacian))

    return sparse_to_tuple(t_k)

In [62]:
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()
import numpy as np


def uniform(shape, scale=0.05, name=None):
    """Uniform init."""
    initial = tf.random_uniform(shape, minval=-scale, maxval=scale, dtype=tf.float32)
    return tf.Variable(initial, name=name)


def glorot(shape, name=None):
    """Glorot & Bengio (AISTATS 2010) init."""
    init_range = np.sqrt(6.0/(shape[0]+shape[1]))
    initial = tf.random_uniform(shape, minval=-init_range, maxval=init_range, dtype=tf.float32)
    return tf.Variable(initial, name=name)


def zeros(shape, name=None):
    """All zeros."""
    initial = tf.zeros(shape, dtype=tf.float32)
    return tf.Variable(initial, name=name)


def ones(shape, name=None):
    """All ones."""
    initial = tf.ones(shape, dtype=tf.float32)
    return tf.Variable(initial, name=name)

In [63]:
# global unique layer ID dictionary for layer name assignment
_LAYER_UIDS = {}


def get_layer_uid(layer_name=''):
    """Helper function, assigns unique layer IDs."""
    if layer_name not in _LAYER_UIDS:
        _LAYER_UIDS[layer_name] = 1
        return 1
    else:
        _LAYER_UIDS[layer_name] += 1
        return _LAYER_UIDS[layer_name]


def sparse_dropout(x, keep_prob, noise_shape):
    """Dropout for sparse tensors."""
    random_tensor = keep_prob
    random_tensor += tf.random_uniform(noise_shape)
    dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool)
    pre_out = tf.sparse_retain(x, dropout_mask)
    return pre_out * (1./keep_prob)


def dot(x, y, sparse=False):
    """Wrapper for tf.matmul (sparse vs dense)."""
    if sparse:
        res = tf.sparse_tensor_dense_matmul(x, y)
    else:
        res = tf.matmul(x, y)
    return res


class Layer(object):
    """Base layer class. Defines basic API for all layer objects.
    Implementation inspired by keras (http://keras.io).

    # Properties
        name: String, defines the variable scope of the layer.
        logging: Boolean, switches Tensorflow histogram logging on/off

    # Methods
        _call(inputs): Defines computation graph of layer
            (i.e. takes input, returns output)
        __call__(inputs): Wrapper for _call()
        _log_vars(): Log all variables
    """

    def __init__(self, **kwargs):
        allowed_kwargs = {'name', 'logging'}
        for kwarg in kwargs.keys():
            assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
        name = kwargs.get('name')
        if not name:
            layer = self.__class__.__name__.lower()
            name = layer + '_' + str(get_layer_uid(layer))
        self.name = name
        self.vars = {}
        logging = kwargs.get('logging', False)
        self.logging = logging
        self.sparse_inputs = False

    def _call(self, inputs):
        return inputs

    def __call__(self, inputs):
        with tf.name_scope(self.name):
            if self.logging and not self.sparse_inputs:
                tf.summary.histogram(self.name + '/inputs', inputs)
            outputs = self._call(inputs)
            if self.logging:
                tf.summary.histogram(self.name + '/outputs', outputs)
            return outputs

    def _log_vars(self):
        for var in self.vars:
            tf.summary.histogram(self.name + '/vars/' + var, self.vars[var])


class Dense(Layer):
    """Dense layer."""
    def __init__(self, input_dim, output_dim, placeholders, dropout=0., sparse_inputs=False,
                 act=tf.nn.relu, bias=False, featureless=False, **kwargs):
        super(Dense, self).__init__(**kwargs)

        if dropout:
            self.dropout = placeholders['dropout']
        else:
            self.dropout = 0.

        self.act = act
        self.sparse_inputs = sparse_inputs
        self.featureless = featureless
        self.bias = bias

        # helper variable for sparse dropout
        self.num_features_nonzero = placeholders['num_features_nonzero']

        with tf.variable_scope(self.name + '_vars'):
            self.vars['weights'] = glorot([input_dim, output_dim],
                                          name='weights')
            if self.bias:
                self.vars['bias'] = zeros([output_dim], name='bias')

        if self.logging:
            self._log_vars()

    def _call(self, inputs):
        x = inputs

        # dropout
        if self.sparse_inputs:
            x = sparse_dropout(x, 1-self.dropout, self.num_features_nonzero)
        else:
            x = tf.nn.dropout(x, 1-self.dropout)

        # transform
        output = dot(x, self.vars['weights'], sparse=self.sparse_inputs)

        # bias
        if self.bias:
            output += self.vars['bias']

        return self.act(output)


class GraphConvolution(Layer):
    """Graph convolution layer."""
    def __init__(self, input_dim, output_dim, placeholders, dropout=0.,
                 sparse_inputs=False, act=tf.nn.relu, bias=False,
                 featureless=False, **kwargs):
        super(GraphConvolution, self).__init__(**kwargs)

        if dropout:
            self.dropout = placeholders['dropout']
        else:
            self.dropout = 0.

        self.act = act
        self.support = placeholders['support']
        self.sparse_inputs = sparse_inputs
        self.featureless = featureless
        self.bias = bias

        # helper variable for sparse dropout
        self.num_features_nonzero = placeholders['num_features_nonzero']

        with tf.variable_scope(self.name + '_vars'):
            for i in range(len(self.support)):
                self.vars['weights_' + str(i)] = glorot([input_dim, output_dim],
                                                        name='weights_' + str(i))
            if self.bias:
                self.vars['bias'] = zeros([output_dim], name='bias')

        if self.logging:
            self._log_vars()

    def _call(self, inputs):
        x = inputs

        # dropout
        if self.sparse_inputs:
            x = sparse_dropout(x, 1-self.dropout, self.num_features_nonzero)
        else:
            x = tf.nn.dropout(x, 1-self.dropout)

        # convolve
        supports = list()
        for i in range(len(self.support)):
            if not self.featureless:
                pre_sup = dot(x, self.vars['weights_' + str(i)],
                              sparse=self.sparse_inputs)
            else:
                pre_sup = self.vars['weights_' + str(i)]
            support = dot(self.support[i], pre_sup, sparse=True)
            supports.append(support)
        output = tf.add_n(supports)

        # bias
        if self.bias:
            output += self.vars['bias']

        return self.act(output)

In [64]:
def masked_softmax_cross_entropy(preds, labels, mask):
    """Softmax cross-entropy loss with masking."""
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=labels)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask)
    loss *= mask
    return tf.reduce_mean(loss)


def masked_accuracy(preds, labels, mask):
    """Accuracy with masking."""
    correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(labels, 1))
    accuracy_all = tf.cast(correct_prediction, tf.float32)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask)
    accuracy_all *= mask
    return tf.reduce_mean(accuracy_all)

In [65]:
class Model(object):
    def __init__(self, **kwargs):
        allowed_kwargs = {'name', 'logging'}
        for kwarg in kwargs.keys():
            assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
        name = kwargs.get('name')
        if not name:
            name = self.__class__.__name__.lower()
        self.name = name

        logging = kwargs.get('logging', False)
        self.logging = logging

        self.vars = {}
        self.placeholders = {}

        self.layers = []
        self.activations = []

        self.inputs = None
        self.outputs = None

        self.loss = 0
        self.accuracy = 0
        self.optimizer = None
        self.opt_op = None

    def _build(self):
        raise NotImplementedError

    def build(self):
        """ Wrapper for _build() """
        with tf.variable_scope(self.name):
            self._build()

        # Build sequential layer model
        self.activations.append(self.inputs)
        for layer in self.layers:
            hidden = layer(self.activations[-1])
            self.activations.append(hidden)
        self.outputs = self.activations[-1]

        # Store model variables for easy access
        variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name)
        self.vars = {var.name: var for var in variables}

        # Build metrics
        self._loss()
        self._accuracy()

        self.opt_op = self.optimizer.minimize(self.loss)

    def predict(self):
        pass

    def _loss(self):
        raise NotImplementedError

    def _accuracy(self):
        raise NotImplementedError

    def save(self, sess=None):
        if not sess:
            raise AttributeError("TensorFlow session not provided.")
        saver = tf.train.Saver(self.vars)
        save_path = saver.save(sess, "tmp/%s.ckpt" % self.name)
        print("Model saved in file: %s" % save_path)

    def load(self, sess=None):
        if not sess:
            raise AttributeError("TensorFlow session not provided.")
        saver = tf.train.Saver(self.vars)
        save_path = "tmp/%s.ckpt" % self.name
        saver.restore(sess, save_path)
        print("Model restored from file: %s" % save_path)


class MLP(Model):
    def __init__(self, placeholders, input_dim, **kwargs):
        super(MLP, self).__init__(**kwargs)

        self.inputs = placeholders['features']
        self.input_dim = input_dim
        # self.input_dim = self.inputs.get_shape().as_list()[1]  # To be supported in future Tensorflow versions
        self.output_dim = placeholders['labels'].get_shape().as_list()[1]
        self.placeholders = placeholders

        self.optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)

        self.build()

    def _loss(self):
        # Weight decay loss
        for var in self.layers[0].vars.values():
            self.loss += FLAGS.weight_decay * tf.nn.l2_loss(var)

        # Cross entropy error
        self.loss += masked_softmax_cross_entropy(self.outputs, self.placeholders['labels'],
                                                  self.placeholders['labels_mask'])

    def _accuracy(self):
        self.accuracy = masked_accuracy(self.outputs, self.placeholders['labels'],
                                        self.placeholders['labels_mask'])

    def _build(self):
        self.layers.append(Dense(input_dim=self.input_dim,
                                 output_dim=FLAGS.hidden1,
                                 placeholders=self.placeholders,
                                 act=tf.nn.relu,
                                 dropout=True,
                                 sparse_inputs=True,
                                 logging=self.logging))

        self.layers.append(Dense(input_dim=FLAGS.hidden1,
                                 output_dim=self.output_dim,
                                 placeholders=self.placeholders,
                                 act=lambda x: x,
                                 dropout=True,
                                 logging=self.logging))

    def predict(self):
        return tf.nn.softmax(self.outputs)


class GCN(Model):
    def __init__(self, placeholders, input_dim, **kwargs):
        super(GCN, self).__init__(**kwargs)

        self.inputs = placeholders['features']
        self.input_dim = input_dim
        # self.input_dim = self.inputs.get_shape().as_list()[1]  # To be supported in future Tensorflow versions
        self.output_dim = placeholders['labels'].get_shape().as_list()[1]
        self.placeholders = placeholders

        self.optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)

        self.build()

    def _loss(self):
        # Weight decay loss
        for var in self.layers[0].vars.values():
            self.loss += FLAGS.weight_decay * tf.nn.l2_loss(var)

        # Cross entropy error
        self.loss += masked_softmax_cross_entropy(self.outputs, self.placeholders['labels'],
                                                  self.placeholders['labels_mask'])

    def _accuracy(self):
        self.accuracy = masked_accuracy(self.outputs, self.placeholders['labels'],
                                        self.placeholders['labels_mask'])

    def _build(self):

        self.layers.append(GraphConvolution(input_dim=self.input_dim,
                                            output_dim=FLAGS.hidden1,
                                            placeholders=self.placeholders,
                                            act=tf.nn.relu,
                                            dropout=True,
                                            sparse_inputs=True,
                                            logging=self.logging))

        self.layers.append(GraphConvolution(input_dim=FLAGS.hidden1,
                                            output_dim=self.output_dim,
                                            placeholders=self.placeholders,
                                            act=lambda x: x,
                                            dropout=True,
                                            logging=self.logging))

    def predict(self):
        return tf.nn.softmax(self.outputs)

In [70]:
from __future__ import division
from __future__ import print_function

import time

# Set random seed
seed = 41
np.random.seed(seed)
tf.set_random_seed(seed)

# Settings
FLAGS = {'dataset': 'pubmed', 'model':'gcn', 'learning_rate':0.01, 'epochs':200, 'hidden1':16, 'dropout':0.5,
        'weight_decay': 5e-4, 'early_stopping':10, 'max_degree':3}

class AttributeDict(dict):
    __slots__ = () 
    __getattr__ = dict.__getitem__
    __setattr__ = dict.__setitem__
    
    
FLAGS = AttributeDict(FLAGS)

# Load data
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(FLAGS.dataset)

# Some preprocessing
features = preprocess_features(features)
if FLAGS.model == 'gcn':
    support = [preprocess_adj(adj)]
    num_supports = 1
    model_func = GCN
elif FLAGS.model == 'gcn_cheby':
    support = chebyshev_polynomials(adj, FLAGS.max_degree)
    num_supports = 1 + FLAGS.max_degree
    model_func = GCN
elif FLAGS.model == 'dense':
    support = [preprocess_adj(adj)]  # Not used
    num_supports = 1
    model_func = MLP
else:
    raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

# Define placeholders
placeholders = {
    'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
    'features': tf.sparse_placeholder(tf.float32, shape=tf.constant(features[2], dtype=tf.int64)),
    'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
    'labels_mask': tf.placeholder(tf.int32),
    'dropout': tf.placeholder_with_default(0., shape=()),
    'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
}

# Create model
model = model_func(placeholders, input_dim=features[2][1], logging=True)

# Initialize session
sess = tf.Session()


# Define model evaluation function
def evaluate(features, support, labels, mask, placeholders):
    t_test = time.time()
    feed_dict_val = construct_feed_dict(features, support, labels, mask, placeholders)
    outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
    return outs_val[0], outs_val[1], (time.time() - t_test)


# Init variables
sess.run(tf.global_variables_initializer())

cost_val = []

# Train model
for epoch in range(FLAGS.epochs):

    t = time.time()
    # Construct feed dictionary
    feed_dict = construct_feed_dict(features, support, y_train, train_mask, placeholders)
    feed_dict.update({placeholders['dropout']: FLAGS.dropout})

    # Training step
    outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)

    # Validation
    cost, acc, duration = evaluate(features, support, y_val, val_mask, placeholders)
    cost_val.append(cost)

    # Print results
    print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
          "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
          "val_acc=", "{:.5f}".format(acc), "time=", "{:.5f}".format(time.time() - t))

    if epoch > FLAGS.early_stopping and cost_val[-1] > np.mean(cost_val[-(FLAGS.early_stopping+1):-1]):
        print("Early stopping...")
        break

print("Optimization Finished!")

# Testing
test_cost, test_acc, test_duration = evaluate(features, support, y_test, test_mask, placeholders)
print("Test set results:", "cost=", "{:.5f}".format(test_cost),
      "accuracy=", "{:.5f}".format(test_acc), "time=", "{:.5f}".format(test_duration))

Epoch: 0001 train_loss= 1.10520 train_acc= 0.41667 val_loss= 1.10238 val_acc= 0.58000 time= 0.23617
Epoch: 0002 train_loss= 1.09978 train_acc= 0.61667 val_loss= 1.09789 val_acc= 0.68400 time= 0.12964
Epoch: 0003 train_loss= 1.09100 train_acc= 0.75000 val_loss= 1.09320 val_acc= 0.68600 time= 0.13018
Epoch: 0004 train_loss= 1.08477 train_acc= 0.78333 val_loss= 1.08842 val_acc= 0.71200 time= 0.13211
Epoch: 0005 train_loss= 1.07603 train_acc= 0.80000 val_loss= 1.08370 val_acc= 0.73000 time= 0.14324
Epoch: 0006 train_loss= 1.06793 train_acc= 0.81667 val_loss= 1.07887 val_acc= 0.73000 time= 0.13176
Epoch: 0007 train_loss= 1.06125 train_acc= 0.78333 val_loss= 1.07407 val_acc= 0.74000 time= 0.13217
Epoch: 0008 train_loss= 1.04720 train_acc= 0.81667 val_loss= 1.06930 val_acc= 0.74000 time= 0.12774
Epoch: 0009 train_loss= 1.04453 train_acc= 0.85000 val_loss= 1.06452 val_acc= 0.73600 time= 0.13147
Epoch: 0010 train_loss= 1.02949 train_acc= 0.86667 val_loss= 1.05968 val_acc= 0.73200 time= 0.12790


Epoch: 0083 train_loss= 0.48599 train_acc= 0.93333 val_loss= 0.77970 val_acc= 0.80200 time= 0.12943
Epoch: 0084 train_loss= 0.48562 train_acc= 0.95000 val_loss= 0.77862 val_acc= 0.80000 time= 0.12852
Epoch: 0085 train_loss= 0.47162 train_acc= 0.95000 val_loss= 0.77748 val_acc= 0.80000 time= 0.12953
Epoch: 0086 train_loss= 0.49713 train_acc= 0.95000 val_loss= 0.77597 val_acc= 0.80000 time= 0.13119
Epoch: 0087 train_loss= 0.46257 train_acc= 0.95000 val_loss= 0.77386 val_acc= 0.79800 time= 0.12933
Epoch: 0088 train_loss= 0.44254 train_acc= 0.95000 val_loss= 0.77184 val_acc= 0.80000 time= 0.12827
Epoch: 0089 train_loss= 0.46816 train_acc= 0.95000 val_loss= 0.76982 val_acc= 0.80400 time= 0.12867
Epoch: 0090 train_loss= 0.47945 train_acc= 0.95000 val_loss= 0.76780 val_acc= 0.80600 time= 0.12805
Epoch: 0091 train_loss= 0.47900 train_acc= 0.93333 val_loss= 0.76607 val_acc= 0.80400 time= 0.12896
Epoch: 0092 train_loss= 0.47117 train_acc= 0.96667 val_loss= 0.76432 val_acc= 0.80400 time= 0.12905


Epoch: 0165 train_loss= 0.31391 train_acc= 0.96667 val_loss= 0.70229 val_acc= 0.79600 time= 0.12912
Epoch: 0166 train_loss= 0.35556 train_acc= 0.98333 val_loss= 0.70155 val_acc= 0.79600 time= 0.12965
Epoch: 0167 train_loss= 0.28608 train_acc= 1.00000 val_loss= 0.70065 val_acc= 0.79600 time= 0.12879
Epoch: 0168 train_loss= 0.29982 train_acc= 0.98333 val_loss= 0.70010 val_acc= 0.80400 time= 0.12901
Epoch: 0169 train_loss= 0.33092 train_acc= 0.95000 val_loss= 0.69958 val_acc= 0.80400 time= 0.12866
Epoch: 0170 train_loss= 0.32059 train_acc= 0.98333 val_loss= 0.69892 val_acc= 0.80400 time= 0.12871
Epoch: 0171 train_loss= 0.32503 train_acc= 0.98333 val_loss= 0.69851 val_acc= 0.80000 time= 0.13016
Epoch: 0172 train_loss= 0.32099 train_acc= 0.98333 val_loss= 0.69858 val_acc= 0.80400 time= 0.12761
Epoch: 0173 train_loss= 0.33926 train_acc= 1.00000 val_loss= 0.69867 val_acc= 0.80000 time= 0.12909
Epoch: 0174 train_loss= 0.34405 train_acc= 0.95000 val_loss= 0.69891 val_acc= 0.79800 time= 0.13018
