<a href="https://colab.research.google.com/github/JHyunjun/SNU/blob/main/Graph%20Convolutional%20Network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [57]:
import numpy as np
import tensorflow as tf
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from tqdm import tqdm

from tensorflow.keras import Sequential
from tensorflow.keras import layers as ly
from tensorflow.keras import Model

In [58]:
cora_content = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/snu/w4/실습자료/7-29 gcn-cora/cora/cora.content', sep='\t', header=None)
cora_content.head()

#cora/cora.cites : <ID of cited paper> <ID of citing paper>
#cora/cora.content : <paper id> <word attributes> + <class label>

#2708개의 publish된 논문들을 5429개의 link로 연결되어있으며 1433개의 unique words dictionary로 표현되어있다.
#이 2708개의 논문들을 7개의 Publications중에 어디에 포함되는지에 대해 정리된 데이터
#각 노드의 연결(edge)는 0/1로 되어있으며, 이것은 그단어가 있다/없다를 의미한다.

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1425,1426,1427,1428,1429,1430,1431,1432,1433,1434
0,31336,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Neural_Networks
1,1061127,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,Rule_Learning
2,1106406,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Reinforcement_Learning
3,13195,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Reinforcement_Learning
4,37879,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Probabilistic_Methods


In [59]:
ids = cora_content[0].values # paper(node) ids
vecs = cora_content[cora_content.columns[1:1434]].values # node features
labels = cora_content[1434].values # node label

print(np.unique(labels))

['Case_Based' 'Genetic_Algorithms' 'Neural_Networks'
 'Probabilistic_Methods' 'Reinforcement_Learning' 'Rule_Learning' 'Theory']


In [60]:
print(labels)
print(labels.shape)

['Neural_Networks' 'Rule_Learning' 'Reinforcement_Learning' ...
 'Genetic_Algorithms' 'Case_Based' 'Neural_Networks']
(2708,)


In [61]:
# one hot encode node labels
# get_dummies 자체가 모든 instance들에 대해 one-hot encoding을 해준다.
labels_onehot = pd.get_dummies(labels)

In [62]:
print(labels_onehot.shape)
print(labels_onehot)

(2708, 7)
      Case_Based  Genetic_Algorithms  Neural_Networks  Probabilistic_Methods  \
0              0                   0                1                      0   
1              0                   0                0                      0   
2              0                   0                0                      0   
3              0                   0                0                      0   
4              0                   0                0                      1   
...          ...                 ...              ...                    ...   
2703           0                   1                0                      0   
2704           0                   1                0                      0   
2705           0                   1                0                      0   
2706           1                   0                0                      0   
2707           0                   0                1                      0   

      Reinforcement_Learning 

In [63]:
inds = np.arange(ids.shape[0]) # use index at identifying each node
x = vecs
print("x : ",x.shape)
y = labels_onehot
print("y : ", y.shape)
print(ids.shape, x.shape, y.shape)

x :  (2708, 1433)
y :  (2708, 7)
(2708,) (2708, 1433) (2708, 7)


In [64]:
num_classes = 7
num_per_train = 10
num_per_test = 100

x_train, x_test, y_train, y_test, idx_train, idx_test = train_test_split(x, y, inds, stratify=y,
                                                    train_size=num_classes*num_per_train,
                                                    test_size=num_classes*num_per_test,
                                                    random_state=42)

x_train, x_valid, y_train, y_valid, idx_train, idx_valid = train_test_split(x_train, y_train, idx_train,
                                                      stratify=y_train,
                                                      train_size=int(num_classes*num_per_train*0.8),
                                                      test_size=int(num_classes*num_per_train*0.2),
                                                      random_state=42)

print(idx_train.shape, x_train.shape, y_train.shape) # 10 examples per class
print(idx_valid.shape, x_valid.shape, y_valid.shape) # 10 examples per class
print(idx_test.shape, x_test.shape, y_test.shape) # 100 examples per class

(56,) (56, 1433) (56, 7)
(14,) (14, 1433) (14, 7)
(700,) (700, 1433) (700, 7)


## model

In [65]:
dnn = Sequential([
    ly.Dense(units=1024, activation='relu', kernel_initializer='he_normal'),
    ly.Dense(units=256, activation='relu', kernel_initializer='he_normal'),
    ly.Dense(units=256, activation='relu', kernel_initializer='he_normal'),
    ly.Dense(units=256*4, activation='relu', kernel_initializer='he_normal'),
    ly.Dense(units=256, activation='relu', kernel_initializer='he_normal'),
    ly.Dense(units=256, activation='relu', kernel_initializer='he_normal'),
    ly.Dense(units=num_classes, kernel_initializer='he_normal')    
])

In [66]:
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
dnn.compile(optimizer='adam', loss=loss_fn, metrics=['acc'])
dnn.fit(x = x_train, y = y_train, batch_size=8, epochs=100, verbose=2, 
        validation_data=(x_valid, y_valid))
dnn.summary()

Epoch 1/100
7/7 - 1s - loss: 1.9949 - acc: 0.2321 - val_loss: 1.8227 - val_acc: 0.2857 - 1s/epoch - 147ms/step
Epoch 2/100
7/7 - 0s - loss: 1.5054 - acc: 0.4107 - val_loss: 1.7626 - val_acc: 0.2857 - 130ms/epoch - 19ms/step
Epoch 3/100
7/7 - 0s - loss: 0.8173 - acc: 0.7500 - val_loss: 1.6780 - val_acc: 0.3571 - 139ms/epoch - 20ms/step
Epoch 4/100
7/7 - 0s - loss: 0.2576 - acc: 0.9286 - val_loss: 1.9482 - val_acc: 0.4286 - 157ms/epoch - 22ms/step
Epoch 5/100
7/7 - 0s - loss: 0.0323 - acc: 1.0000 - val_loss: 2.4461 - val_acc: 0.3571 - 143ms/epoch - 20ms/step
Epoch 6/100
7/7 - 0s - loss: 0.0078 - acc: 1.0000 - val_loss: 2.7631 - val_acc: 0.3571 - 134ms/epoch - 19ms/step
Epoch 7/100
7/7 - 0s - loss: 3.0945e-04 - acc: 1.0000 - val_loss: 3.2336 - val_acc: 0.3571 - 164ms/epoch - 23ms/step
Epoch 8/100
7/7 - 0s - loss: 1.2188e-04 - acc: 1.0000 - val_loss: 3.4931 - val_acc: 0.4286 - 140ms/epoch - 20ms/step
Epoch 9/100
7/7 - 0s - loss: 7.2277e-05 - acc: 1.0000 - val_loss: 3.5391 - val_acc: 0.4286

In [67]:
train_loss, train_acc = dnn.evaluate(x_train, y_train, verbose=0)
valid_loss, valid_acc = dnn.evaluate(x_valid, y_valid, verbose=0)
test_loss, test_acc = dnn.evaluate(x_test, y_test, verbose=0)

print("Train accuracy: ", train_acc)
print("Valid accuracy: ", valid_acc)
print("Test accuracy: ", test_acc)

Train accuracy:  1.0
Valid accuracy:  0.5714285969734192
Test accuracy:  0.404285728931427


In [68]:
# make adj matrix from citation information
def get_adj_matrix(ids):
    cora_cites = np.loadtxt('/content/drive/MyDrive/Colab Notebooks/snu/w4/실습자료/7-29 gcn-cora/cora/cora.cites', dtype = np.int32)
    N = ids.shape[0]
    adj_matrix = np.zeros(shape = (N, N), dtype = np.int32)

    for i in range(cora_cites.shape[0]) : 
      node1, node2 = cora_cites[i]
      idx1 = np.where(ids == node1)[0]
      idx2 = np.where(ids ==node2)[0]

      adj_matrix[idx1, idx2] = 1
      adj_matrix[idx2, idx1] = 1

      return adj_matrix

# make DAD(normalization) matrix
def get_norm_matrix(adj_matrix):
      a_tilda = adj_matrix + np.eye(adj_matrix.shape[0])
      d_tilda = np.diag(1 / np.sqrt(np.sum(a_tilda, axis = 1)))
      return np.matmul(np.matmul(d_tilda, a_tilda), d_tilda)


In [72]:
class GCN(Model):
    def __init__(self, A, input_dim=1433, hid_dim=64, num_classes=7, num_nodes=2708):
        super(GCN, self).__init__()
        self.A = tf.cast(A, dtype = 'float32')
        self.hid_dim = hid_dim
        w_init = tf.initializers.he_normal()

        self.W1 = self.add_weight(name = 'W1', shape = (input_dim, self.hid_dim),
                                  initializer = w_init, trainable = True)
        self.W2 = self.add_weight(name = 'W2', shape = (input_dim, self.hid_dim),
                                  initializer = w_init, trainable = True)        
        self.var_list = self.weights

    def call(self, x):
        x = tf.cast(x, "float32")
        L1 = tf.matmul(tf.matmul(self.A, x), self.W1)
        L1 = tf.nn.tanh(L1)

        L2 = tf.matmul(tf.matmul(self.A, L1), self.W2)
        return L2

    def loss_fn(self,logits, labels, indices):
        _labels = tf.gather_nd(labels, indices)
        _logits = tf.gather_nd(logits, indices)
        loss = tf.nn.softmax_cross_entropy_with_logits(labels = _labels, logits = _logits)

        return tf.reduce_mean(loss)
        
    def evaluate(self, x, labels, indices):
        logits = self.call(x)
        loss = self.loss_fn(logits, labels, indices)
        _logits = tf.gather_nd(logits, indices)
        _labels = tf.gather_nd(labels, indices)

        pred = tf.argmax(_logits, axis = 1)
        ans = tf.argmax(_labels, axis = 1)
        correct = tf.equal(pred, ans)
        acc = tf.reduce_mean(tf.cast(correct, tf.float32))
    
    def train(self, x, labels, idx_train, idx_val, optimizer, max_epochs=20):
        for epoch in range(1, max_epochs + 1) :
          with tf.GradientTape() as tape : 
            logits = self.call(x)
            train_loss = self.loss_fn(logits, labels, idx_train)

          grad_list = tape.gradient(train_loss, self.var_list)
          grads_and_vars = zip(grad_list, self.var_list)
          optimizer.apply_gradients(grads_and_vars)
          
          train_loss, train_acc = self.evaluate(x, labels, idx_train)
          valid_loss, valid_acc = self.evaluate(x, labels, idx_val)
          print(f"Epoch {epoch:3d}: {train_loss:.4f}, {train_acc*100:.2f},"
                ,f"{valid_loss:.4f}, {valid_acc*100:.2f}")
 

In [73]:
num_nodes, input_dim = x.shape[0], x.shape[1]

adj_matrix = get_adj_matrix(ids)
norm_matrix = get_norm_matrix(adj_matrix)

gcn = GCN(A = norm_matrix, input_dim = input_dim, hid_dim = 64, 
          num_classes = num_classes, num_nodes = num_nodes)
optimizer = tf.keras.optimizers.Adam(learning_rate = 1e-2)
_idx_train = np.expand_dims(idx_train, axis = 1)
_idx_val = np.expand_dims(idx_valid, axis = 1)

gcn.train(x = x, labels = y, idx_train = _idx_train, idx_val = _idx_val, optimizer = optimizer, max_epochs = 20)


InvalidArgumentError: ignored

In [None]:
test_loss , test_acc = gcn.evaluate(x, y, np.expand_dims(idx_test, axis = 1))
print("Test Accuracy : ", test_acc)

In [None]:
gcn_loss, gcn_acc = gcn.evaluate(x, y, np.expand_dims(idx_test, axis = 1))
dnn_loss, dnn_acc = dnn.evaluate(x_test, y_test, verbose = 0)

print(f"[GCN] test loss : {gcn_loss:.4f}, test acc : {gcn_acc*100:.2f}")
print(f"[DNN] test loss : {dnn_loss:.4f}, test acc : {dnn_acc*100:.2f}")