In [19]:
import torch.optim as optim
from torch_geometric.datasets import Planetoid
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.transforms import NormalizeFeatures
from torch_geometric.data import DataLoader

torch.set_printoptions(edgeitems=500)

# seed for reproducibility
torch.manual_seed(0)

<torch._C.Generator at 0x266a3cfb490>

### Data class info:
class 0: without autism associations
class 1: autism genes
class 2: 0.75 confidence
class 3: 0.5 confidence
class 4: unlabeled nodes

In [20]:
import read_data

data = read_data.read()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  autism_df['label'][autism_df['confidence'] == 0.5] = 3


In [21]:
data

Data(edge_index=[2, 811236], num_classes=5, test_mask=[23472], train_mask=[23472], x=[23472, 23472], y=[23472])

In [22]:
print(f'Contains isolated nodes: {data.contains_isolated_nodes()}')
print(f'Contains self-loops: {data.contains_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')
# dataset = data
# data.train_mask = data.y >= 0

Contains isolated nodes: False
Contains self-loops: False
Is undirected: True


In [23]:
data.train_mask.shape

torch.Size([23472])

In [24]:
data.train_mask.sum()

tensor(1468)

In [25]:
data.test_mask.sum()
# data.test_mask = data.y >= 0

tensor(367)

In [26]:
data.y[data.train_mask].shape


torch.Size([1468])

In [27]:
data.y[data.test_mask].shape

torch.Size([367])

In [28]:
# from torch_geometric.data import DataLoader
#
# loader = DataLoader(data, batch_size=32, shuffle=True)


In [29]:
# data_list = [data]
#

In [30]:
# dataset = DataLoader(data_list)
#

In [31]:
# dataset.num_node_features = data.num_node_features
# dataset.num_classes = data.num_classes


### Visualization Model Using Tensorboard Command
commandline run tensorboard
```
cd src
tensorboard --logdir log
```

In [32]:
# build model
from GCN import GCNStack

model = GCNStack(data.num_node_features, hidden_dim1=128, hidden_dim2=64, output_dim=data.num_classes)
print(model)


GCNStack(
  (convs): ModuleList(
    (0): GCNConv(23472, 128)
    (1): GCNConv(128, 64)
    (2): GCNConv(64, 64)
  )
  (lns): ModuleList(
    (0): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    (1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
  )
  (post_mp): Sequential(
    (0): Linear(in_features=64, out_features=64, bias=True)
    (1): Linear(in_features=64, out_features=5, bias=True)
  )
)


In [33]:
# Running on GPU or CPU
use_GPU = True
device = torch.device('cuda' if torch.cuda.is_available() and use_GPU else 'cpu')
model, data = model.to(device), data.to(device)

In [34]:
device


device(type='cuda')

In [35]:
# torch.cuda.empty_cache()

In [36]:
def test(loader, model, is_validation=False):
    ''' Testing Code of the Model '''
    model.eval()

    correct = 0
    for data in loader:
        with torch.no_grad():
            emb, pred = model(data.x, data.edge_index)
            pred = pred.argmax(dim=1)
            label = data.y

        mask = data.val_mask if is_validation else data.test_mask
        # node classification: only evaluate on nodes in test set
        pred = pred[mask]
        label = data.y[mask]

        correct += pred.eq(label).sum().item()
    total = 0
    for data in loader.dataset:
        total += torch.sum(data.test_mask).item()
    return correct / total

def train(dataset, writer, model, epoch_num, lr, weight_decay):
    ''' Training code of the model '''
    test_loader = loader = DataLoader(dataset, shuffle=False)

    # build model
    # model = GCNStack(max(data.num_node_features, 1), 32, data.num_classes)

    opt = optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay, momentum=0.9)
    #opt = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    #writer.add_graph(model, ( data.x, data.edge_index ))
    # train
    for epoch in range(epoch_num + 1):
        total_loss = 0
        model.train()
        for batch in loader:
            #print(batch.train_mask, '----')
            opt.zero_grad()
            embedding, pred = model(batch.x, batch.edge_index)
            label = batch.y
            pred = pred[batch.train_mask]
            label = label[batch.train_mask]
            loss = model.loss(pred, label)
            loss.backward()
            opt.step()
            total_loss += loss.item() * batch.num_graphs
        total_loss /= len(loader.dataset)
        writer.add_scalar("loss", total_loss, epoch)

        if epoch % 10 == 0:
            test_acc = test(test_loader, model)
            print("Epoch {}. Loss: {:.4f}. Test accuracy: {:.4f}".format(
                epoch, total_loss, test_acc))
            writer.add_scalar("test accuracy", test_acc, epoch)

        if epoch % 20 == 0:
            name = 'epoch' + str(epoch)
            writer.add_embedding(embedding, global_step=epoch, tag=name, metadata=batch.y)

    return model

from datetime import datetime
from tensorboardX import SummaryWriter

writer = SummaryWriter("./log/" + datetime.now().strftime("%Y%m%d-%H%M%S"))

model = train([data], writer, model, epoch_num=400, lr=1e-2, weight_decay=1e-3)

Epoch 0. Loss: 1.6208. Test accuracy: 0.5804
Epoch 10. Loss: 1.2671. Test accuracy: 0.5804
Epoch 20. Loss: 1.1724. Test accuracy: 0.5804
Epoch 30. Loss: 1.1058. Test accuracy: 0.5804
Epoch 40. Loss: 1.0539. Test accuracy: 0.5804
Epoch 50. Loss: 1.0124. Test accuracy: 0.5940
Epoch 60. Loss: 0.9791. Test accuracy: 0.5913
Epoch 70. Loss: 0.9529. Test accuracy: 0.5995
Epoch 80. Loss: 0.9325. Test accuracy: 0.5940
Epoch 90. Loss: 0.9171. Test accuracy: 0.5967
Epoch 100. Loss: 0.9056. Test accuracy: 0.6049
Epoch 110. Loss: 0.8972. Test accuracy: 0.6049
Epoch 120. Loss: 0.8912. Test accuracy: 0.6076
Epoch 130. Loss: 0.8869. Test accuracy: 0.6076
Epoch 140. Loss: 0.8838. Test accuracy: 0.6131
Epoch 150. Loss: 0.8815. Test accuracy: 0.6104
Epoch 160. Loss: 0.8799. Test accuracy: 0.6104
Epoch 170. Loss: 0.8787. Test accuracy: 0.6131
Epoch 180. Loss: 0.8777. Test accuracy: 0.6131
Epoch 190. Loss: 0.8770. Test accuracy: 0.6104
Epoch 200. Loss: 0.8765. Test accuracy: 0.6104
Epoch 210. Loss: 0.8760.