In [17]:
import torch.optim as optim
from torch_geometric.datasets import Planetoid
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.transforms import NormalizeFeatures
from torch_geometric.data import DataLoader
torch.set_printoptions(edgeitems=500)

In [18]:
import read_data
data = read_data.read()

In [19]:
data

Data(edge_index=[2, 21398], num_classes=2, test_mask=[1835], train_mask=[1835], x=[1835, 1835], y=[1835])

In [20]:
dataset = data
# data.train_mask = data.y >= 0

In [21]:
# data.test_mask = data.y >= 0

In [22]:
# from torch_geometric.data import DataLoader
#
# loader = DataLoader(data, batch_size=32, shuffle=True)

In [23]:
data_list = [data]

In [24]:
dataset = DataLoader(data_list, batch_size=4)

In [25]:
dataset.num_node_features = data.num_node_features
dataset.num_classes = data.num_classes

In [26]:
from general_GNN import GNNStack
model = GNNStack(data.num_node_features, hidden_dim=32, output_dim=data.num_classes)
print(model)

GNNStack(
  (convs): ModuleList(
    (0): GCNConv(1835, 32)
    (1): GCNConv(32, 32)
    (2): GCNConv(32, 32)
  )
  (lns): ModuleList(
    (0): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
    (1): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
  )
  (post_mp): Sequential(
    (0): Linear(in_features=32, out_features=32, bias=True)
    (1): Dropout(p=0.25, inplace=False)
    (2): Linear(in_features=32, out_features=2, bias=True)
  )
)


In [27]:
# Running on GPU
use_GPU = False
device = torch.device('cuda' if torch.cuda.is_available() and use_GPU else 'cpu')
model, data = model.to(device), data.to(device)

In [28]:
device


device(type='cpu')

In [29]:
# torch.cuda.empty_cache()

In [30]:
def test(loader, model, is_validation=False):
    model.eval()

    correct = 0
    for data in loader:
        with torch.no_grad():
            emb, pred = model(data.x, data.edge_index, data.batch)
            pred = pred.argmax(dim=1)
            label = data.y

        if model.task == 'node':
            mask = data.val_mask if is_validation else data.test_mask
            # node classification: only evaluate on nodes in test set
            pred = pred[mask]
            label = data.y[mask]

        correct += pred.eq(label).sum().item()

    if model.task == 'graph':
        total = len(loader.dataset)
    else:
        total = 0
        for data in loader.dataset:
            total += torch.sum(data.test_mask).item()
    return correct / total

In [31]:
def train(dataset, task, writer):
    if task == 'graph':
        data_size = len(dataset)
        loader = DataLoader(dataset[:int(data_size * 0.8)], batch_size=64, shuffle=True)
        test_loader = DataLoader(dataset[int(data_size * 0.8):], batch_size=64, shuffle=True)
    else:
        test_loader = loader = DataLoader(dataset, batch_size=4, shuffle=False)

    # build model
    model = GNNStack(max(data.num_node_features, 1), 32, data.num_classes, task=task)
    opt = optim.Adam(model.parameters(), lr=0.01)
    #writer.add_graph(model, ( data.x, data.edge_index, torch.zeros(data.train_mask.shape[0], device=torch.device('cuda')) ))
    # train
    for epoch in range(401):
        total_loss = 0
        model.train()
        for batch in loader:
            #print(batch.train_mask, '----')
            opt.zero_grad()
            embedding, pred = model(batch.x, batch.edge_index, batch.batch)
            label = batch.y
            if task == 'node':
                pred = pred[batch.train_mask]
                label = label[batch.train_mask]
            loss = model.loss(pred, label)
            loss.backward()
            opt.step()
            total_loss += loss.item() * batch.num_graphs
        total_loss /= len(loader.dataset)
        writer.add_scalar("loss", total_loss, epoch)

        if epoch % 10 == 0:
            test_acc = test(test_loader, model)
            print("Epoch {}. Loss: {:.4f}. Test accuracy: {:.4f}".format(
                epoch, total_loss, test_acc))
            writer.add_scalar("test accuracy", test_acc, epoch)

        if epoch % 20 == 0:
            name = 'epoch' + str(epoch)
            writer.add_embedding(embedding, global_step=epoch, tag=name, metadata=batch.y)

    return model


### How to run tensorboard
command run tensorboard
```
cd src
tensorboard --logdir log
```

In [32]:
from datetime import datetime
from tensorboardX import SummaryWriter

writer = SummaryWriter("./log/" + datetime.now().strftime("%Y%m%d-%H%M%S"))

model = train([data], 'node', writer)

Epoch 0. Loss: 0.6800. Test accuracy: 0.6213
Epoch 10. Loss: 0.6502. Test accuracy: 0.4905
Epoch 20. Loss: 0.4620. Test accuracy: 0.5123
Epoch 30. Loss: 0.3981. Test accuracy: 0.5477
Epoch 40. Loss: 0.3308. Test accuracy: 0.5804
Epoch 50. Loss: 0.3685. Test accuracy: 0.5531
Epoch 60. Loss: 0.2975. Test accuracy: 0.5695
Epoch 70. Loss: 0.2638. Test accuracy: 0.5613
Epoch 80. Loss: 0.2587. Test accuracy: 0.5204
Epoch 90. Loss: 0.2287. Test accuracy: 0.5559
Epoch 100. Loss: 0.2135. Test accuracy: 0.5477
Epoch 110. Loss: 0.2754. Test accuracy: 0.5450
Epoch 120. Loss: 0.1869. Test accuracy: 0.5368
Epoch 130. Loss: 0.1696. Test accuracy: 0.5395
Epoch 140. Loss: 0.1679. Test accuracy: 0.5341
Epoch 150. Loss: 0.1607. Test accuracy: 0.5586
Epoch 160. Loss: 0.1619. Test accuracy: 0.5422
Epoch 170. Loss: 0.1426. Test accuracy: 0.5422
Epoch 180. Loss: 0.1428. Test accuracy: 0.5422
Epoch 190. Loss: 0.1246. Test accuracy: 0.5531
Epoch 200. Loss: 0.1254. Test accuracy: 0.5586
Epoch 210. Loss: 0.1164.