In [11]:
import sys
import pickle as pkl
import numpy as np
import networkx as nx
import scipy.sparse as sp
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from scipy.sparse.linalg.eigen.arpack import eigsh

In [2]:
learning_rate = 0.01
epochs = 200
hidden1 = 16
dropout = 0.5
weight_decay = 5e-4
early_stopping = 10
max_degree = 3

In [4]:
def uniform(shape, scale=0.05, name=None):
    initial = tf.random.uniform(shape, minval=-scale, maxval=scale, dtype=tf.float32)
    return tf.Variable(initial, name=name)


def glorot(shape, name=None):
    init_range = np.sqrt(6.0/(shape[0]+shape[1]))
    initial = tf.random.uniform(shape, minval=-init_range, maxval=init_range, dtype=tf.float32)
    return tf.Variable(initial, name=name)


def zeros(shape, name=None):
    initial = tf.zeros(shape, dtype=tf.float32)
    return tf.Variable(initial, name=name)


def ones(shape, name=None):
    initial = tf.ones(shape, dtype=tf.float32)
    return tf.Variable(initial, name=name)

In [5]:
def sparse_dropout(x, rate, noise_shape):
    """
    Dropout for sparse tensors.
    """
    random_tensor = 1 - rate
    random_tensor += tf.random.uniform(noise_shape)
    dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool)
    pre_out = tf.sparse.retain(x, dropout_mask)
    return pre_out * (1./(1 - rate))


def dot(x, y, sparse=False):
    """
    Wrapper for tf.matmul (sparse vs dense).
    """
    if sparse:
        res = tf.sparse.sparse_dense_matmul(x, y)
    else:
        res = tf.matmul(x, y)
    return res




class Dense(layers.Layer):
    """Dense layer."""
    def __init__(self, input_dim, output_dim, placeholders, dropout=0., sparse_inputs=False,
                 act=tf.nn.relu, bias=False, featureless=False, **kwargs):
        super(Dense, self).__init__(**kwargs)

        if dropout:
            self.dropout = placeholders['dropout']
        else:
            self.dropout = 0.

        self.act = act
        self.sparse_inputs = sparse_inputs
        self.featureless = featureless
        self.bias = bias

        # helper variable for sparse dropout
        self.num_features_nonzero = placeholders['num_features_nonzero']

        with tf.variable_scope(self.name + '_vars'):
            self.vars['weights'] = glorot([input_dim, output_dim],
                                          name='weights')
            if self.bias:
                self.vars['bias'] = zeros([output_dim], name='bias')

        if self.logging:
            self._log_vars()

    def _call(self, inputs):
        x = inputs

        # dropout
        if self.sparse_inputs:
            x = sparse_dropout(x, 1-self.dropout, self.num_features_nonzero)
        else:
            x = tf.nn.dropout(x, 1-self.dropout)

        # transform
        output = dot(x, self.vars['weights'], sparse=self.sparse_inputs)

        # bias
        if self.bias:
            output += self.vars['bias']

        return self.act(output)


class GraphConvolution(layers.Layer):
    """
    Graph convolution layer.
    """
    def __init__(self, input_dim, output_dim, num_features_nonzero,
                 dropout=0.,
                 is_sparse_inputs=False,
                 activation=tf.nn.relu,
                 bias=False,
                 featureless=False, **kwargs):
        super(GraphConvolution, self).__init__(**kwargs)

        self.dropout = dropout
        self.activation = activation
        self.is_sparse_inputs = is_sparse_inputs
        self.featureless = featureless
        self.bias = bias
        self.num_features_nonzero = num_features_nonzero

        self.weights_ = []
        for i in range(1):
            w = self.add_variable('weight' + str(i), [input_dim, output_dim])
            self.weights_.append(w)
        if self.bias:
            self.bias = self.add_variable('bias', [output_dim])


        # for p in self.trainable_variables:
        #     print(p.name, p.shape)



    def call(self, inputs, training=None):
        x, support_ = inputs

        # dropout
        if training is not False and self.is_sparse_inputs:
            x = sparse_dropout(x, self.dropout, self.num_features_nonzero)
        elif training is not False:
            x = tf.nn.dropout(x, self.dropout)


        # convolve
        supports = list()
        for i in range(len(support_)):
            if not self.featureless: # if it has features x
                pre_sup = dot(x, self.weights_[i], sparse=self.is_sparse_inputs)
            else:
                pre_sup = self.weights_[i]

            support = dot(support_[i], pre_sup, sparse=True)
            supports.append(support)

        output = tf.add_n(supports)

        # bias
        if self.bias:
            output += self.bias

        return self.activation(output)

In [6]:
def masked_softmax_cross_entropy(preds, labels, mask):
    """
    Softmax cross-entropy loss with masking.
    """
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=labels)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask)
    loss *= mask
    return tf.reduce_mean(loss)


def masked_accuracy(preds, labels, mask):
    """
    Accuracy with masking.
    """
    correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(labels, 1))
    accuracy_all = tf.cast(correct_prediction, tf.float32)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask)
    accuracy_all *= mask
    return tf.reduce_mean(accuracy_all)

In [7]:
class GCN(keras.Model):

    def __init__(self, input_dim, output_dim, num_features_nonzero, **kwargs):
        super(GCN, self).__init__(**kwargs)

        self.input_dim = input_dim # 1433
        self.output_dim = output_dim

        print('input dim:', input_dim)
        print('output dim:', output_dim)
        print('num_features_nonzero:', num_features_nonzero)

        self.layers_ = []
        self.layers_.append(GraphConvolution(input_dim=self.input_dim, # 1433
                                            output_dim=args.hidden1, # 16
                                            num_features_nonzero=num_features_nonzero,
                                            activation=tf.nn.relu,
                                            dropout=args.dropout,
                                            is_sparse_inputs=True))





        self.layers_.append(GraphConvolution(input_dim=args.hidden1, # 16
                                            output_dim=self.output_dim, # 7
                                            num_features_nonzero=num_features_nonzero,
                                            activation=lambda x: x,
                                            dropout=args.dropout))


        for p in self.trainable_variables:
            print(p.name, p.shape)

    def call(self, inputs, training=None):
        """
        :param inputs:
        :param training:
        :return:
        """
        x, label, mask, support = inputs

        outputs = [x]

        for layer in self.layers:
            hidden = layer((outputs[-1], support), training)
            outputs.append(hidden)
        output = outputs[-1]

        # # Weight decay loss
        loss = tf.zeros([])
        for var in self.layers_[0].trainable_variables:
            loss += args.weight_decay * tf.nn.l2_loss(var)

        # Cross entropy error
        loss += masked_softmax_cross_entropy(output, label, mask)

        acc = masked_accuracy(output, label, mask)

        return loss, acc



    def predict(self):
        return tf.nn.softmax(self.outputs)

In [8]:
def parse_index_file(filename):
    """
    Parse index file.
    """
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index


def sample_mask(idx, l):
    """
    Create mask.
    """
    mask = np.zeros(l)
    mask[idx] = 1
    return np.array(mask, dtype=np.bool)


def load_data(dataset_str):
    
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range-min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range-min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))
    idx_val = range(len(y), len(y)+500)

    train_mask = sample_mask(idx_train, labels.shape[0])
    val_mask = sample_mask(idx_val, labels.shape[0])
    test_mask = sample_mask(idx_test, labels.shape[0])

    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    y_val[val_mask, :] = labels[val_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]

    return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask


def sparse_to_tuple(sparse_mx):
    """
    Convert sparse matrix to tuple representation.
    """
    def to_tuple(mx):
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
        coords = np.vstack((mx.row, mx.col)).transpose()
        values = mx.data
        shape = mx.shape
        return coords, values, shape

    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = to_tuple(sparse_mx[i])
    else:
        sparse_mx = to_tuple(sparse_mx)

    return sparse_mx


def preprocess_features(features):
    """
    Row-normalize feature matrix and convert to tuple representation
    """
    rowsum = np.array(features.sum(1)) # get sum of each row, [2708, 1]
    r_inv = np.power(rowsum, -1).flatten() # 1/rowsum, [2708]
    r_inv[np.isinf(r_inv)] = 0. # zero inf data
    r_mat_inv = sp.diags(r_inv) # sparse diagonal matrix, [2708, 2708]
    features = r_mat_inv.dot(features) # D^-1:[2708, 2708]@X:[2708, 2708]
    return sparse_to_tuple(features) # [coordinates, data, shape], []


def normalize_adj(adj):
    """Symmetrically normalize adjacency matrix."""
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1)) # D
    d_inv_sqrt = np.power(rowsum, -0.5).flatten() # D^-0.5
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt) # D^-0.5
    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo() # D^-0.5AD^0.5


def preprocess_adj(adj):
    """Preprocessing of adjacency matrix for simple GCN model and conversion to tuple representation."""
    adj_normalized = normalize_adj(adj + sp.eye(adj.shape[0]))
    return sparse_to_tuple(adj_normalized)





def chebyshev_polynomials(adj, k):
    """
    Calculate Chebyshev polynomials up to order k. Return a list of sparse matrices (tuple representation).
    """
    print("Calculating Chebyshev polynomials up to order {}...".format(k))

    adj_normalized = normalize_adj(adj)
    laplacian = sp.eye(adj.shape[0]) - adj_normalized
    largest_eigval, _ = eigsh(laplacian, 1, which='LM')
    scaled_laplacian = (2. / largest_eigval[0]) * laplacian - sp.eye(adj.shape[0])

    t_k = list()
    t_k.append(sp.eye(adj.shape[0]))
    t_k.append(scaled_laplacian)

    def chebyshev_recurrence(t_k_minus_one, t_k_minus_two, scaled_lap):
        s_lap = sp.csr_matrix(scaled_lap, copy=True)
        return 2 * s_lap.dot(t_k_minus_one) - t_k_minus_two

    for i in range(2, k+1):
        t_k.append(chebyshev_recurrence(t_k[-1], t_k[-2], scaled_laplacian))

    return sparse_to_tuple(t_k)

In [112]:
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data('citeseer')

In [77]:
y_test.shape, features.shape, y_train.shape

((3327, 6), (3327, 3703), (3327, 6))

In [81]:
test_mask

array([False, False, False, ...,  True,  True,  True])

In [14]:
adj

<3327x3327 sparse matrix of type '<class 'numpy.intc'>'
	with 9228 stored elements in Compressed Sparse Row format>

In [15]:
features

<3327x3703 sparse matrix of type '<class 'numpy.float64'>'
	with 105165 stored elements in List of Lists format>

In [68]:
y_train.shape

(3327, 6)

In [72]:
mpath = '../Data/Gangnam/'
data_path = mpath + 'speed_gangnam.csv'
adj_path = mpath + 'adj_mx_gangnam.pkl'

In [73]:
import pandas as pd

data = pd.read_csv(data_path)
data = data.drop('Unnamed: 0', axis=1)
data

Unnamed: 0,1210007700,1210008500,1210009500,1210010300,1210011300,1210013700,1220011900,1220016300,1220021100,1220025100,...,1210013800,1210013000,1210012200,1210012300,1210013100,1210013900,1210014900,1210015900,1210017100,1210018300
0,32.70,21.00,33.54,43.21,24.31,26.54,35.91,29.00,28.78,21.76,...,24.98,24.54,32.76,43.75,32.36,32.13,35.47,22.96,26.62,14.00
1,31.50,38.00,25.61,43.89,22.75,24.37,30.73,29.07,25.92,22.66,...,26.22,25.14,32.15,43.75,25.00,35.04,26.00,16.00,27.22,29.58
2,22.00,38.00,47.00,54.00,19.00,24.36,35.75,24.63,21.96,22.67,...,21.55,25.16,34.37,43.75,25.00,33.00,36.32,22.42,32.40,11.00
3,31.00,34.39,32.70,44.15,23.64,27.76,40.06,41.00,28.65,21.68,...,25.33,24.11,33.41,28.61,29.88,34.93,37.35,21.35,29.07,32.34
4,39.20,37.65,30.75,43.66,22.76,28.04,28.10,23.25,23.00,19.92,...,24.44,24.15,34.94,28.11,33.88,33.53,39.30,30.68,27.68,30.03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2875,27.50,36.33,29.53,39.86,26.15,20.05,25.37,18.27,21.88,18.25,...,28.17,24.28,38.21,21.95,29.47,28.47,31.52,32.13,28.22,15.96
2876,25.51,37.62,30.48,41.88,26.52,23.00,31.94,16.56,24.29,19.14,...,28.65,24.49,32.52,27.44,31.76,33.00,44.68,24.89,33.04,11.00
2877,29.40,28.26,31.81,39.68,26.45,24.28,28.92,18.00,23.41,21.11,...,32.26,23.88,32.02,26.94,30.75,31.10,17.00,24.70,30.21,18.65
2878,24.00,28.21,29.02,41.93,25.49,25.38,25.57,17.61,23.63,21.70,...,27.08,27.32,34.00,26.27,27.32,29.02,37.90,27.58,30.23,15.41


In [74]:
with open(adj_path, 'rb') as file:
    temp = pickle.load(file)
adj = temp[2]
adj.shape, adj

((506, 506),
 array([[1.        , 0.40327677, 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.40327677, 1.        , 0.91133416, ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.91133416, 1.        , ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.        , 0.        , 0.        , ..., 1.        , 0.38495174,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.38495174, 1.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         1.        ]], dtype=float32))

In [75]:
def preprocess_data(data, time_len, rate, seq_len, pre_len):
    train_size = int(time_len * rate)
    train_data = data[0:train_size]
    test_data = data[train_size:time_len]
    
    trainX, trainY, testX, testY = [], [], [], []
    for i in range(len(train_data) - seq_len - pre_len):
        a = train_data[i: i + seq_len + pre_len]
        trainX.append(a[0 : seq_len])
        trainY.append(a[seq_len : seq_len + pre_len])
    for i in range(len(test_data) - seq_len -pre_len):
        b = test_data[i: i + seq_len + pre_len]
        testX.append(b[0 : seq_len])
        testY.append(b[seq_len : seq_len + pre_len])
      
    trainX1 = np.array(trainX)
    trainY1 = np.array(trainY)
    testX1 = np.array(testX)
    testY1 = np.array(testY)
    return trainX1, trainY1, testX1, testY1

In [91]:
time_len = data.shape[0]
rate = 0.8
seq_len = 12
pre_len = 3
trainX, trainY, testX, testY = preprocess_data(data, time_len, rate, seq_len, pre_len)

In [92]:
trainX.shape

(2289, 12, 506)

In [94]:
length = data.shape[0]
test_length = round(length * (1-rate))
test_length

576

In [95]:
train_length = length - test_length
train_length

2304

In [101]:
train_data = data[0:train_length]
test_data = data[train_length:]

In [104]:
train_data.values.shape

(2304, 506)

In [105]:
test_data.values.shape

(576, 506)

In [106]:
features.shape

(3327, 3703)

In [None]:
nodes = 3327, sequence (time_len) = 3703

In [None]:
nodes = 506, sequence (time_len) = 2880

In [114]:
y_train.shape, y_test.shape, train_mask.shape, test_mask.shape

((3327, 6), (3327, 6), (3327,), (3327,))

In [82]:
names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
objects = []
for i in range(len(names)):
    with open("data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
        if sys.version_info > (3, 0):
            objects.append(pkl.load(f, encoding='latin1'))
        else:
            objects.append(pkl.load(f))

x, y, tx, ty, allx, ally, graph = tuple(objects)
test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset_str))
test_idx_range = np.sort(test_idx_reorder)

if dataset_str == 'citeseer':
    # Fix citeseer dataset (there are some isolated nodes in the graph)
    # Find isolated nodes, add them as zero-vecs into the right position
    test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
    tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
    tx_extended[test_idx_range-min(test_idx_range), :] = tx
    tx = tx_extended
    ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
    ty_extended[test_idx_range-min(test_idx_range), :] = ty
    ty = ty_extended

features = sp.vstack((allx, tx)).tolil()
features[test_idx_reorder, :] = features[test_idx_range, :]
adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

labels = np.vstack((ally, ty))
labels[test_idx_reorder, :] = labels[test_idx_range, :]

idx_test = test_idx_range.tolist()
idx_train = range(len(y))
idx_val = range(len(y), len(y)+500)

train_mask = sample_mask(idx_train, labels.shape[0])
val_mask = sample_mask(idx_val, labels.shape[0])
test_mask = sample_mask(idx_test, labels.shape[0])

y_train = np.zeros(labels.shape)
y_val = np.zeros(labels.shape)
y_test = np.zeros(labels.shape)
y_train[train_mask, :] = labels[train_mask, :]
y_val[val_mask, :] = labels[val_mask, :]
y_test[test_mask, :] = labels[test_mask, :]

In [85]:
test_idx_range

array([2312, 2313, 2314, 2315, 2316, 2317, 2318, 2319, 2320, 2321, 2322,
       2323, 2324, 2325, 2326, 2327, 2328, 2329, 2330, 2331, 2332, 2333,
       2334, 2335, 2336, 2337, 2338, 2339, 2340, 2341, 2342, 2343, 2344,
       2345, 2346, 2347, 2348, 2349, 2350, 2351, 2352, 2353, 2354, 2355,
       2356, 2357, 2358, 2359, 2360, 2361, 2362, 2363, 2364, 2365, 2366,
       2367, 2368, 2369, 2370, 2371, 2372, 2373, 2374, 2375, 2376, 2377,
       2378, 2379, 2380, 2381, 2382, 2383, 2384, 2385, 2386, 2387, 2388,
       2389, 2390, 2391, 2392, 2393, 2394, 2395, 2396, 2397, 2398, 2399,
       2400, 2401, 2402, 2403, 2404, 2405, 2406, 2408, 2409, 2410, 2411,
       2412, 2413, 2414, 2415, 2416, 2417, 2418, 2419, 2420, 2421, 2422,
       2423, 2424, 2425, 2426, 2427, 2428, 2429, 2430, 2431, 2432, 2433,
       2434, 2435, 2436, 2437, 2438, 2439, 2440, 2441, 2442, 2443, 2444,
       2445, 2446, 2447, 2448, 2449, 2450, 2451, 2452, 2453, 2454, 2455,
       2456, 2457, 2458, 2459, 2460, 2461, 2462, 24

In [90]:
y_test[2311:]

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.],
       ...,
       [0., 0., 0., 1., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1.]])

In [107]:
dataset_str = 'citeseer'
names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
objects = []
for i in range(len(names)):
    with open("data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
        if sys.version_info > (3, 0):
            objects.append(pkl.load(f, encoding='latin1'))
        else:
            objects.append(pkl.load(f))

x, y, tx, ty, allx, ally, graph = tuple(objects)

In [108]:
x.toarray().shape

(120, 3703)

In [109]:
x.toarray().shape

(120, 3703)

In [69]:
y.shape

(120, 6)

In [51]:
tx.toarray().shape

(1000, 3703)

In [52]:
ty.shape

(1000, 6)

In [53]:
allx.toarray().shape

(2312, 3703)

In [55]:
ally.shape

(2312, 6)

In [56]:
graph

defaultdict(list,
            {0: [628],
             1: [158, 2919, 2933, 1097, 486],
             2: [3285],
             3: [3219, 1431],
             4: [467],
             5: [648],
             6: [1501],
             7: [2137, 1833],
             8: [178, 1033],
             9: [1007],
             10: [2622, 1670],
             11: [2034],
             12: [2487,
              1622,
              794,
              966,
              113,
              557,
              677,
              1357,
              1097,
              2474,
              839,
              1760],
             13: [1894, 2711, 1493, 1871, 1167],
             14: [1248, 146],
             15: [2521],
             16: [2415,
              314,
              2416,
              616,
              1013,
              2417,
              2418,
              2166,
              1422,
              2597,
              1567,
              622,
              1605,
              1708],
             17: [879, 21

In [66]:
nx.adjacency_matrix(nx.from_dict_of_lists(graph)).toarray().shape

(3327, 3327)

In [76]:
test_label

NameError: name 'test_label' is not defined