# Code the paper

## Overview
- Inspired by the actual implementation
- Implemention using ``Pytorch``
- Application on the ``Cora`` dataset
    - Nodes $\rightarrow$ papers
    - edges $\rightarrow$ citations
    - Node features $\rightarrow$ bag of the most frequent words
    - Labels $\rightarrow$ class of the paper (e.g. Neural_Networks, Probabilistic_Methods, ...)
 

## Architecture

- Graph Convolution Network with two layers:

$$ Z = logSoftmax(\hat{A} \times Relu(\hat{A} \times X \times W^{(0)}) \times W^{(1)}) $$


## Note

- In the implementation, we used the same parameters like the paper
    - Number of hidden units
    - Optimizer and learning rate
    - Number of epochs

In [1]:
# change working directory
import os

os.chdir("../")
os.getcwd()

'c:\\Users\\HP\\Desktop\\DS_DL\\pygcn'

In [2]:
# load data
from pygcn.utils import load_data

adj, features, labels, idx_train, idx_val, idx_test = load_data(
    path="./data/cora/"
)

Loading cora dataset...


In [3]:
# setups
from pygcn.models import GCN
import torch.optim as optim


# define model
model = GCN(
    nfeat=features.shape[1],
    # paper uses 32 hidden units
    nhid=32,
    nclass=labels.max().item() + 1,
    # no dropout in paper
    dropout=0
)

# setup optimizer
optimizer = optim.Adam(
    model.parameters(),
    lr=0.01,
)

In [4]:
# data structure tracker
# to save meta data during train
{ 
    "epoch": {
        "loss_train":"",
        "loss_val": "",
        "acc_train": "",
        "acc_val": "",
    },
    ###
}

tracker = {}

In [5]:
import torch.nn.functional as F
from pygcn.utils import accuracy


def train(epoch):
    # set train phase
    model.train()
    optimizer.zero_grad()

    # forward
    output = model(features, adj)
    # train
    loss_train = F.nll_loss(output[idx_train], labels[idx_train])
    acc_train = accuracy(output[idx_train], labels[idx_train])
    # validation
    loss_val = F.nll_loss(output[idx_val], labels[idx_val])
    acc_val = accuracy(output[idx_val], labels[idx_val])

    # backward
    loss_train.backward()
    optimizer.step()

    # save meta data
    tracker[epoch] = {
        'loss_train': loss_train.item(),
        'acc_train': acc_train.item(),
        'loss_val': loss_val.item(),
        'acc_val': acc_val.item()
    }


def test():
    # set test phase
    model.eval()

    # forward
    output = model(features, adj)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])

    # print meta data
    print(
        "Test set results:",
        "loss= {:.4f}".format(loss_test.item()),
        "accuracy= {:.4f}".format(acc_test.item())
    )

In [6]:
# training loop
MAX_NB_EPOCHS = 200

for epoch in range(MAX_NB_EPOCHS):
    train(epoch)

In [7]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=["Loss", "Accuracy"],
)


fig.add_traces(
    data=[
        go.Scatter(
            y=[epo_meta[type_error] for epo_meta in tracker.values()],
            name=type_error
        )
    for type_error in ["loss_train", "loss_val"]],
    rows=1,
    cols=1
)

fig.add_traces(
    data=[
        go.Scatter(
            y=[epo_meta[type_acc] for epo_meta in tracker.values()],
            name=type_acc
        )
    for type_acc in ["acc_train", "acc_val"]],
    rows=1,
    cols=2
)

# Update x axis properties
fig.update_xaxes(title_text="epoch", row=1, col=1)
fig.update_xaxes(title_text="epoch", row=1, col=2)


fig.show()

In [8]:
# evaluate model on test set
test()

Test set results: loss= 0.6323 accuracy= 0.8000


In [9]:
model.children

<bound method Module.children of GCN(
  (gc1): GraphConvolution (1433 -> 32)
  (gc2): GraphConvolution (32 -> 7)
)>

In [10]:
list(model.children())[0].forward(features, adj).shape

torch.Size([2708, 32])

In [11]:
# preserve the first layer (embedding layer)
embedding_model = list(model.children())[0]

In [12]:
from sklearn.manifold import TSNE


# compute embedding
X = embedding_model(features, adj).detach().numpy()

# reduce to 2D
X_embedded = TSNE(
    n_components=2, 
    learning_rate='auto',
    init='random'
    ).fit_transform(X)

X_embedded.shape

(2708, 2)

In [13]:
labels.numpy().astype(str)

array(['3', '0', '4', ..., '5', '2', '3'], dtype='<U21')

In [14]:
import plotly.express as px

fig = px.scatter(
    x=X_embedded[:, 0],
    y=X_embedded[:, 1],
    color=labels.numpy().astype(str)
)

fig.update_layout(
    title="Projection of nodes representations in a 2D space"
)

fig.show()