In [1]:
import torch.optim as optim
from torch_geometric.datasets import Planetoid
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.transforms import NormalizeFeatures
from torch_geometric.data import DataLoader

torch.set_printoptions(edgeitems=500)

In [2]:
import read_data

data = read_data.read()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  autism_df['label'][autism_df['confidence'] == 0.5] = 3


In [3]:
data

Data(edge_index=[2, 21398], num_classes=4, test_mask=[1835], train_mask=[1835], x=[1835, 1835], y=[1835])

In [4]:
# dataset = data
# data.train_mask = data.y >= 0

In [5]:
# data.test_mask = data.y >= 0

In [6]:
# from torch_geometric.data import DataLoader
#
# loader = DataLoader(data, batch_size=32, shuffle=True)


In [7]:
data_list = [data]

In [8]:
dataset = DataLoader(data_list, batch_size=4)

In [9]:
dataset.num_node_features = data.num_node_features
dataset.num_classes = data.num_classes

In [10]:
# build model
from GNN import GNNStack

model = GNNStack(data.num_node_features, hidden_dim1=128, hidden_dim2=64, output_dim=data.num_classes)
print(model)


GNNStack(
  (convs): ModuleList(
    (0): GCNConv(1835, 128)
    (1): GCNConv(128, 64)
    (2): GCNConv(64, 64)
  )
  (lns): ModuleList(
    (0): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    (1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
  )
  (post_mp): Sequential(
    (0): Linear(in_features=64, out_features=64, bias=True)
    (1): Dropout(p=0.25, inplace=False)
    (2): Linear(in_features=64, out_features=4, bias=True)
  )
)


In [11]:
# Running on GPU or CPU
use_GPU = True
device = torch.device('cuda' if torch.cuda.is_available() and use_GPU else 'cpu')
model, data = model.to(device), data.to(device)

In [12]:
device


device(type='cuda')

In [13]:
# torch.cuda.empty_cache()

In [14]:
def test(loader, model, is_validation=False):
    model.eval()

    correct = 0
    for data in loader:
        with torch.no_grad():
            emb, pred = model(data.x, data.edge_index)
            pred = pred.argmax(dim=1)
            label = data.y

        mask = data.val_mask if is_validation else data.test_mask
        # node classification: only evaluate on nodes in test set
        pred = pred[mask]
        label = data.y[mask]

        correct += pred.eq(label).sum().item()
    total = 0
    for data in loader.dataset:
        total += torch.sum(data.test_mask).item()
    return correct / total

In [15]:
def train(dataset, writer, model, epoch_num):
    test_loader = loader = DataLoader(dataset, batch_size=4, shuffle=False)

    # build model
    # model = GNNStack(max(data.num_node_features, 1), 32, data.num_classes)
    opt = optim.Adam(model.parameters(), lr=0.01)
    writer.add_graph(model, ( data.x, data.edge_index ))
    # train
    for epoch in range(epoch_num + 1):
        total_loss = 0
        model.train()
        for batch in loader:
            #print(batch.train_mask, '----')
            opt.zero_grad()
            embedding, pred = model(batch.x, batch.edge_index)
            label = batch.y
            pred = pred[batch.train_mask]
            label = label[batch.train_mask]
            loss = model.loss(pred, label)
            loss.backward()
            opt.step()
            total_loss += loss.item() * batch.num_graphs
        total_loss /= len(loader.dataset)
        writer.add_scalar("loss", total_loss, epoch)

        if epoch % 10 == 0:
            test_acc = test(test_loader, model)
            print("Epoch {}. Loss: {:.4f}. Test accuracy: {:.4f}".format(
                epoch, total_loss, test_acc))
            writer.add_scalar("test accuracy", test_acc, epoch)

        if epoch % 20 == 0:
            name = 'epoch' + str(epoch)
            writer.add_embedding(embedding, global_step=epoch, tag=name, metadata=batch.y)

    return model

### Visualization using tensorboard
commandline run tensorboard
```
cd src
tensorboard --logdir log
```

In [16]:
from datetime import datetime
from tensorboardX import SummaryWriter

writer = SummaryWriter("./log/" + datetime.now().strftime("%Y%m%d-%H%M%S"))

model = train([data], writer, model, epoch_num=400)

With rtol=1e-05 and atol=1e-05, found 1624 element(s) (out of 117440) whose difference(s) exceeded the margin of error (including 0 nan comparisons). The greatest difference was 6.152689456939697e-05 (0.23520348966121674 vs. 0.23526501655578613), which occurred at index (446, 7).
  _check_trace(
With rtol=1e-05 and atol=1e-05, found 27 element(s) (out of 7340) whose difference(s) exceeded the margin of error (including 0 nan comparisons). The greatest difference was 9.644031524658203e-05 (-1.2237502336502075 vs. -1.223846673965454), which occurred at index (788, 2).
  _check_trace(


Epoch 0. Loss: 1.3206. Test accuracy: 0.6076
Epoch 10. Loss: 0.9658. Test accuracy: 0.6049
Epoch 20. Loss: 0.7664. Test accuracy: 0.5668
Epoch 30. Loss: 0.7197. Test accuracy: 0.4959
Epoch 40. Loss: 0.5877. Test accuracy: 0.4823
Epoch 50. Loss: 0.5289. Test accuracy: 0.3869
Epoch 60. Loss: 0.4911. Test accuracy: 0.4687
Epoch 70. Loss: 0.3828. Test accuracy: 0.4714
Epoch 80. Loss: 0.3979. Test accuracy: 0.4850
Epoch 90. Loss: 0.4708. Test accuracy: 0.4332
Epoch 100. Loss: 0.3099. Test accuracy: 0.4605
Epoch 110. Loss: 0.3345. Test accuracy: 0.4605
Epoch 120. Loss: 0.3096. Test accuracy: 0.4523
Epoch 130. Loss: 0.2267. Test accuracy: 0.4360
Epoch 140. Loss: 0.2030. Test accuracy: 0.4578
Epoch 150. Loss: 0.1580. Test accuracy: 0.4387
Epoch 160. Loss: 0.1573. Test accuracy: 0.4496
Epoch 170. Loss: 0.1671. Test accuracy: 0.4632
Epoch 180. Loss: 0.1311. Test accuracy: 0.4605
Epoch 190. Loss: 0.0995. Test accuracy: 0.4469
Epoch 200. Loss: 0.1116. Test accuracy: 0.4550
Epoch 210. Loss: 0.0771.