In [1]:
import torch.optim as optim
from torch_geometric.datasets import Planetoid
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.transforms import NormalizeFeatures
from torch_geometric.data import DataLoader

torch.set_printoptions(edgeitems=500)

# seed for reproducibility
torch.manual_seed(0)
np.random.seed(0)

### Data class info:
class 0: without autism associations
class 1: autism genes
class 2: 0.75 confidence
class 3: 0.5 confidence
class 4: unlabeled nodes

In [2]:
import read_data

data = read_data.read()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  autism_df['label'][autism_df['confidence'] == 0.5] = 3


In [3]:
data

Data(edge_index=[2, 811236], num_classes=5, test_mask=[23472], train_mask=[23472], x=[23472, 23472], y=[23472])

In [4]:
print(f'Contains isolated nodes: {data.contains_isolated_nodes()}')
print(f'Contains self-loops: {data.contains_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')
# dataset = data
# data.train_mask = data.y >= 0

Contains isolated nodes: False
Contains self-loops: False
Is undirected: True


In [5]:
data.train_mask.shape

torch.Size([23472])

In [6]:
data.train_mask.sum()

tensor(1468)

In [7]:
data.test_mask.sum()
# data.test_mask = data.y >= 0

tensor(367)

In [8]:
data.y[data.train_mask].shape


torch.Size([1468])

In [9]:
data.y[data.test_mask].shape

torch.Size([367])

In [10]:
# from torch_geometric.data import DataLoader
#
# loader = DataLoader(data, batch_size=32, shuffle=True)


In [11]:
# data_list = [data]
#

In [12]:
# dataset = DataLoader(data_list)
#

In [13]:
# dataset.num_node_features = data.num_node_features
# dataset.num_classes = data.num_classes


### Visualization Model Using Tensorboard Command
commandline run tensorboard
```
cd src
tensorboard --logdir log
```

In [14]:
# build model
from GCN import GCNStack

model = GCNStack(data.num_node_features, hidden_dim1=128, hidden_dim2=64, output_dim=data.num_classes)
print(model)


GCNStack(
  (convs): ModuleList(
    (0): GCNConv(23472, 128)
    (1): GCNConv(128, 64)
    (2): GCNConv(64, 64)
  )
  (lns): ModuleList(
    (0): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    (1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
  )
  (post_mp): Sequential(
    (0): Linear(in_features=64, out_features=64, bias=True)
    (1): Linear(in_features=64, out_features=5, bias=True)
  )
)


In [15]:
# Running on GPU or CPU
use_GPU = True
device = torch.device('cuda' if torch.cuda.is_available() and use_GPU else 'cpu')
model, data = model.to(device), data.to(device)

In [16]:
device


device(type='cuda')

In [17]:
# torch.cuda.empty_cache()

In [18]:
def test(loader, model, is_validation=False):
    ''' Testing Code of the Model '''
    model.eval()

    correct = 0
    for data in loader:
        with torch.no_grad():
            emb, pred = model(data.x, data.edge_index)
            pred = pred.argmax(dim=1)
            label = data.y

        mask = data.val_mask if is_validation else data.test_mask
        # node classification: only evaluate on nodes in test set
        pred = pred[mask]
        label = data.y[mask]

        correct += pred.eq(label).sum().item()
    total = 0
    for data in loader.dataset:
        total += torch.sum(data.test_mask).item()
    return correct / total

def train(dataset, writer, model, epoch_num, lr, weight_decay):
    ''' Training code of the model '''
    test_loader = loader = DataLoader(dataset, shuffle=False)

    # build model
    # model = GCNStack(max(data.num_node_features, 1), 32, data.num_classes)

    opt = optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay, momentum=0.9)
    #opt = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    #writer.add_graph(model, ( data.x, data.edge_index ))
    # train
    for epoch in range(epoch_num + 1):
        total_loss = 0
        model.train()
        for batch in loader:
            #print(batch.train_mask, '----')
            opt.zero_grad()
            embedding, pred = model(batch.x, batch.edge_index)
            label = batch.y
            pred = pred[batch.train_mask]
            label = label[batch.train_mask]
            loss = model.loss(pred, label)
            loss.backward()
            opt.step()
            total_loss += loss.item() * batch.num_graphs
        total_loss /= len(loader.dataset)
        writer.add_scalar("loss", total_loss, epoch)

        if epoch % 10 == 0:
            test_acc = test(test_loader, model)
            print("Epoch {}. Loss: {:.4f}. Test accuracy: {:.4f}".format(
                epoch, total_loss, test_acc))
            writer.add_scalar("test accuracy", test_acc, epoch)

        if epoch % 20 == 0:
            name = 'epoch' + str(epoch)
            writer.add_embedding(embedding, global_step=epoch, tag=name, metadata=batch.y)

    return model

from datetime import datetime
from tensorboardX import SummaryWriter

writer = SummaryWriter("./log/" + datetime.now().strftime("%Y%m%d-%H%M%S"))

model = train([data], writer, model, epoch_num=400, lr=1e-2, weight_decay=1e-3)

Epoch 0. Loss: 1.5893. Test accuracy: 0.8529
Epoch 10. Loss: 0.8344. Test accuracy: 0.8529
Epoch 20. Loss: 0.7785. Test accuracy: 0.8529
Epoch 30. Loss: 0.7447. Test accuracy: 0.8529
Epoch 40. Loss: 0.7108. Test accuracy: 0.8529
Epoch 50. Loss: 0.6859. Test accuracy: 0.8529
Epoch 60. Loss: 0.6636. Test accuracy: 0.8529
Epoch 70. Loss: 0.6451. Test accuracy: 0.8529
Epoch 80. Loss: 0.6303. Test accuracy: 0.8529
Epoch 90. Loss: 0.6191. Test accuracy: 0.8529
Epoch 100. Loss: 0.6111. Test accuracy: 0.8501
Epoch 110. Loss: 0.6052. Test accuracy: 0.8501
Epoch 120. Loss: 0.6009. Test accuracy: 0.8501
Epoch 130. Loss: 0.5976. Test accuracy: 0.8501
Epoch 140. Loss: 0.5949. Test accuracy: 0.8501
Epoch 150. Loss: 0.5927. Test accuracy: 0.8501
Epoch 160. Loss: 0.5908. Test accuracy: 0.8501
Epoch 170. Loss: 0.5891. Test accuracy: 0.8501
Epoch 180. Loss: 0.5876. Test accuracy: 0.8501
Epoch 190. Loss: 0.5863. Test accuracy: 0.8501
Epoch 200. Loss: 0.5851. Test accuracy: 0.8501
Epoch 210. Loss: 0.5840.