In [1]:
import numpy as np
import scipy.sparse as sp
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
path="./data/cora/"
dataset="cora"

In [3]:
def encode_onehot(labels):
    classes = set(labels)
    classes_dict = {c: np.identity(len(classes))[i, :] for i, c in
                    enumerate(classes)}
    labels_onehot = np.array(list(map(classes_dict.get, labels)),
                             dtype=np.int32)
    return labels_onehot

In [4]:
idx_features_labels = np.genfromtxt("{}{}.content".format(path, dataset),
                                    dtype=np.dtype(str))
labels = encode_onehot(idx_features_labels[:, -1])

In [5]:
idx = np.array(idx_features_labels[:, 0], dtype=np.int32)
idx_map = {j: i for i, j in enumerate(idx)}

In [6]:
edges_unordered = np.genfromtxt("{}{}.cites".format(path, dataset),
                                    dtype=np.int32)
edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
                     dtype=np.int32).reshape(edges_unordered.shape)
edges_transpose = np.transpose(edges)

In [7]:
A = np.zeros(shape=(idx.shape[0],idx.shape[0]))
for e in edges:
    A[e[0]][e[1]]=1
X = idx_features_labels[:, 1:-1]
Y=labels

In [8]:
D=np.eye(idx.shape[0])
I=np.eye(idx.shape[0])
for e in edges:
    D[e[0]][e[0]]=np.sum(edges_transpose[0] == e[0])
AI = A + I
D_inv = np.linalg.inv(D)
D_half = np.dot(D**0.5,D_inv)
A_t = np.dot(np.dot(D_half,AI),D_half)

In [9]:
# Deg[i] means the no of edges terminating on that vertex ie np.sum(edges_transpose[0]==i)
# D is diag(Deg[i]) $\forall$ i
#AI = A + I .. defining that a node i is connected to itself

In [10]:
# X is N X D , Y is N X K , A_t is A X N .. good to go, we need degree matrix Deg

In [11]:
print A_t.shape , X.shape

(2708, 2708) (2708, 1433)


In [12]:
F = 7 # the dimensions of latent variable
H = 32 # the dimension for Hidden unit
D = X.shape[1]
# W0 would be D X H, W1 would be H X F

In [13]:
w0 = tf.Variable(tf.random_uniform([D, H]),trainable=True)
w1 = tf.Variable(tf.random_uniform([H, F]),trainable=True)

A_t_tf = tf.convert_to_tensor(A_t, dtype=tf.float32)
labels_tf = tf.convert_to_tensor(labels, dtype=tf.float32)
AI_tf = tf.convert_to_tensor(AI, dtype=tf.float32)
X_tf = tf.convert_to_tensor(X,dtype=tf.float32)

In [14]:
# First layer :
z1 = tf.matmul(tf.matmul(A_t_tf,X_tf),w0) 
a1 = tf.nn.relu(z1)

In [15]:
#Second Layer :
z2 = tf.matmul(tf.matmul(A_t_tf,a1),w1)
a2 = tf.nn.softmax(z2)


#Final assignent to Z, it has to be N X F
Z = a2

In [16]:
lnZ = tf.log(Z+0.000001)
lnZf=tf.reshape(lnZ,[-1])
labelsf=tf.reshape(labels,[-1])
labelsf = tf.cast(labelsf,tf.float32)
lnZf = tf.cast(lnZf,tf.float32)
loss = -tf.tensordot(labelsf, lnZf,1)

In [17]:
opt = tf.train.AdamOptimizer(0.01).minimize(loss)

In [18]:
out=np.zeros(shape=(A.shape[0],7))
out=np.zeros(shape=(A.shape[0],32))
lo = 0
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(200):
        emb=sess.run([loss,Z,z1])
        out=emb[1]
        out2=emb[2]
        lo = emb[0]
        sess.run([opt])
        print(i,sess.run(loss))

(0, 23544.213)
(1, 21941.018)
(2, 20515.045)
(3, 14147.339)
(4, 16242.314)
(5, 16512.807)
(6, 13599.493)
(7, 10936.191)
(8, 10576.314)
(9, 11971.71)
(10, 8824.292)
(11, 7213.5493)
(12, 7252.3413)
(13, 7494.5376)
(14, 7392.6294)
(15, 7096.497)
(16, 6280.9595)
(17, 5087.1143)
(18, 4368.6123)
(19, 4531.8457)
(20, 4783.696)
(21, 4544.169)
(22, 4007.4885)
(23, 3356.8628)
(24, 2679.0994)
(25, 2183.6301)
(26, 1984.7971)
(27, 2061.8635)
(28, 2315.9766)
(29, 2341.6516)
(30, 1996.4263)
(31, 1661.1329)
(32, 1484.6058)
(33, 1427.2498)
(34, 1433.724)
(35, 1443.3417)
(36, 1415.364)
(37, 1340.6346)
(38, 1244.6692)
(39, 1165.4774)
(40, 1113.3202)
(41, 1077.0139)
(42, 1038.3207)
(43, 993.75305)
(44, 952.18634)
(45, 919.94507)
(46, 895.30493)
(47, 872.8122)
(48, 851.2612)
(49, 830.0018)
(50, 805.80566)
(51, 776.7281)
(52, 744.08716)
(53, 712.1535)
(54, 685.3738)
(55, 665.29315)
(56, 650.8974)
(57, 639.98145)
(58, 629.7059)
(59, 617.1365)
(60, 600.72314)
(61, 581.46515)
(62, 562.20184)
(63, 545.9562)
(64

In [None]:
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import pandas as pd
import numpy as np
import matplotlib.cm as cm

X = out2

print("Embedding...")
X_embedded = TSNE(n_components=2).fit_transform(X)
print("Done!")

colors = cm.rainbow(np.linspace(0, 1, 10))

node_data = pd.read_table('./data/cora/cora.content', header=None)
ids=node_data[1434].astype("category")

from collections import defaultdict
from itertools import count
from functools import partial
print("Plotting...")
# Plotting dataset
label_to_number = defaultdict(partial(next, count(1)))

import matplotlib.pyplot as plt
plt.scatter(X_embedded[:,0],X_embedded[:,1],color=colors[[label_to_number[label] for label in ids]],s=2)
plt.savefig("GCN_hidden")
plt.show()


Embedding...


In [85]:
i=40
print ( out[i],labels[i])

(array([5.4477439e-08, 6.0824561e-11, 3.1893135e-03, 9.9681026e-01,
       3.6469464e-07, 7.1694589e-10, 7.9001050e-10], dtype=float32), array([1, 0, 0, 0, 0, 0, 0], dtype=int32))


In [19]:
a=0
for i in range (2708):
    a1=np.argmax(labels[i])
    a2=np.argmax(out[i])
    if(a1!=a2):
        a=a+1
print a 

29
