# Libraries Model GNN

In [33]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import sklearn.metrics as metrics
import matplotlib.pyplot as plt 

# Loading MNIST dataset

In [34]:
BATCH_SIZE = 32

# transformations
transform = transforms.Compose([transforms.ToTensor()])

# download and loading training dataset
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,
                                          shuffle=True, num_workers=2)

# download and loading test dataset
testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE,
                                         shuffle=False, num_workers=2)

# Constructing Model GNN

In [35]:
class MyModel(nn.Module):
    
    def __init__(self):
        super(MyModel, self).__init__()
        
        # converting 28x28x1 --> 26x26x32
        
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3)
        self.d1 = nn.Linear(26 * 26 * 32, 128)
        self.d2 = nn.Linear(128,10)
        
    def forward(self, x):
        
        # converting 32 x 1 x 28 x 28 --> 32 x 32 x 26 x 26
        
        x = self.conv1(x)
        x = F.relu(x)
        
        # flatten --> 32 x (32 * 26 * 26)
        
        x = x.flatten(start_dim = 1)
        
        # 32 x (32 * 26 * 26) --> 32 x 128
        x = self.d1(x)
        x = F.relu(x)
        
        # logits --> 32 x 10
        logits = self.d2(x)
        out = F.softmax(logits, dim = 1)
        
        return out

# Training model 

In [36]:
learning_rate = 0.001
num_epochs = 5

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = MyModel()
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

In [37]:
for epoch in range(num_epochs):
    train_running_loss = 0.0
    train_acc = 0.0
    
    ## training step
    for i, (images, labels) in enumerate(trainloader):
        
        images = images.to(device)
        labels = labels.to(device)
        
        # forward + backpropagation + loss
        
        logits = model(images)
        loss = criterion(logits, labels)
        optimizer.zero_grad()
        loss.backward()
        
        # update model parameters
        optimizer.step()
        
        train_running_loss += loss.detach().item()
        train_acc += (torch.argmax(logits, 1).flatten() == labels).type(torch.float).mean().item()
        
    print("Epoch: %d | Loss: %.4f | Train Accuracy: %.2f" \
         %(epoch, train_running_loss / i, train_acc/i))

Epoch: 0 | Loss: 1.6540 | Train Accuracy: 0.81
Epoch: 1 | Loss: 1.4907 | Train Accuracy: 0.97
Epoch: 2 | Loss: 1.4801 | Train Accuracy: 0.98
Epoch: 3 | Loss: 1.4758 | Train Accuracy: 0.99
Epoch: 4 | Loss: 1.4730 | Train Accuracy: 0.99


In [38]:
test_acc = 0.0
for i, (images, labels) in enumerate(testloader, 0):
    images = images.to(device)
    labaels = labels.to(device)
    outputs = model(images)
    test_acc += (torch.argmax(outputs, 1).flatten() == labels).type(torch.float).mean().item()
    preds = torch.argmax(outputs, 1).flatten().cpu().numpy()
    
print("Test Accuracy: %.2f"%(test_acc/i))

Test Accuracy: 0.99


# Libraries Graphing GNN

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch_geometric.nn as pyg_nn
import torch_geometric.utils as pyg_utils

import time
from datetime import datetime

import networkx as nx
import numpy as np
import torch
import torch.optim as optim

from torch_geometric.datasets import TUDataset
from torch_geometric.datasets import Planetoid
from torch_geometric.data import DataLoader

import torch_geometric.transforms as T

from tensorboardX import SummaryWriter
import sklearn.manifold
import matplotlib.pyplot as plt


# Defining model for GNN Graphs

In [13]:
class GNNStack(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, task='node'):
        super(GNNStack, self).__init__()
        self.task = task
        self.convs = nn.ModuleList()
        self.convs.append(self.build_conv_model(input_dim, hidden_dim))
        self.lns = nn.ModuleList()
        self.lns.append(nn.LayerNorm(hidden_dim))
        self.lns.append(nn.LayerNorm(hidden_dim))
        for l in range(2):
            self.convs.append(self.build_conv_model(hidden_dim, hidden_dim))

        # post-message-passing
        self.post_mp = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim), nn.Dropout(0.25), 
            nn.Linear(hidden_dim, output_dim))
        if not (self.task == 'node' or self.task == 'graph'):
            raise RuntimeError('Unknown task.')

        self.dropout = 0.25
        self.num_layers = 3

    def build_conv_model(self, input_dim, hidden_dim):
        # refer to pytorch geometric nn module for different implementation of GNNs.
        if self.task == 'node':
            return pyg_nn.GCNConv(input_dim, hidden_dim)
        else:
            return pyg_nn.GINConv(nn.Sequential(nn.Linear(input_dim, hidden_dim),
                                  nn.ReLU(), nn.Linear(hidden_dim, hidden_dim)))

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        if data.num_node_features == 0:
          x = torch.ones(data.num_nodes, 1)

        for i in range(self.num_layers):
            x = self.convs[i](x, edge_index)
            emb = x
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
            if not i == self.num_layers - 1:
                x = self.lns[i](x)

        if self.task == 'graph':
            x = pyg_nn.global_mean_pool(x, batch)

        x = self.post_mp(x)

        return emb, F.log_softmax(x, dim=1)

    def loss(self, pred, label):
        return F.nll_loss(pred, label)

## Training setup 

In [14]:
def train(dataset, task, writer):
    if task == 'graph':
        data_size = len(dataset)
        loader = DataLoader(dataset[:int(data_size * 0.8)], batch_size=64, shuffle=True)
        test_loader = DataLoader(dataset[int(data_size * 0.8):], batch_size=64, shuffle=True)
    else:
        test_loader = loader = DataLoader(dataset, batch_size=64, shuffle=True)

    # build model
    model = GNNStack(max(dataset.num_node_features, 1), 32, dataset.num_classes, task=task)
    opt = optim.Adam(model.parameters(), lr=0.01)
    
    # train
    for epoch in range(200):
        total_loss = 0
        model.train()
        for batch in loader:
            #print(batch.train_mask, '----')
            opt.zero_grad()
            embedding, pred = model(batch)
            label = batch.y
            if task == 'node':
                pred = pred[batch.train_mask]
                label = label[batch.train_mask]
            loss = model.loss(pred, label)
            loss.backward()
            opt.step()
            total_loss += loss.item() * batch.num_graphs
        total_loss /= len(loader.dataset)
        writer.add_scalar("loss", total_loss, epoch)

        if epoch % 10 == 0:
            test_acc = test(test_loader, model)
            print("Epoch {}. Loss: {:.4f}. Test accuracy: {:.4f}".format(
                epoch, total_loss, test_acc))
            writer.add_scalar("test accuracy", test_acc, epoch)

    return model


# Test setup 

In [15]:
def test(loader, model, is_validation=False):
    model.eval()

    correct = 0
    for data in loader:
        with torch.no_grad():
            emb, pred = model(data)
            pred = pred.argmax(dim=1)
            label = data.y

        if model.task == 'node':
            mask = data.val_mask if is_validation else data.test_mask
            # node classification: only evaluate on nodes in test set
            pred = pred[mask]
            label = data.y[mask]
            
        correct += pred.eq(label).sum().item()
    
    if model.task == 'graph':
        total = len(loader.dataset) 
    else:
        total = 0
        for data in loader.dataset:
            total += torch.sum(data.test_mask).item()
    return correct / total

## Training the model 

In [29]:
# Tensorboard http://localhost:6006/
%reload_ext tensorboard
!rm -rf ./logs/

In [30]:
writer = SummaryWriter("./log/" + datetime.now().strftime("%Y%m%d-%H%M%S"))

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')
dataset = dataset.shuffle()
task = 'graph'

model = train(dataset, task, writer)

Epoch 0. Loss: 1.8290. Test accuracy: 0.1667
Epoch 10. Loss: 1.7348. Test accuracy: 0.2167
Epoch 20. Loss: 1.7236. Test accuracy: 0.2500
Epoch 30. Loss: 1.6919. Test accuracy: 0.2083
Epoch 40. Loss: 1.6730. Test accuracy: 0.2667
Epoch 50. Loss: 1.6934. Test accuracy: 0.2583
Epoch 60. Loss: 1.6716. Test accuracy: 0.2667
Epoch 70. Loss: 1.6721. Test accuracy: 0.2667
Epoch 80. Loss: 1.6234. Test accuracy: 0.3083
Epoch 90. Loss: 1.6522. Test accuracy: 0.2583
Epoch 100. Loss: 1.6506. Test accuracy: 0.2750
Epoch 110. Loss: 1.5910. Test accuracy: 0.3083
Epoch 120. Loss: 1.6359. Test accuracy: 0.2667
Epoch 130. Loss: 1.5770. Test accuracy: 0.2333
Epoch 140. Loss: 1.6646. Test accuracy: 0.2167
Epoch 150. Loss: 1.5855. Test accuracy: 0.3083
Epoch 160. Loss: 1.5285. Test accuracy: 0.3083
Epoch 170. Loss: 1.6050. Test accuracy: 0.2667
Epoch 180. Loss: 1.5086. Test accuracy: 0.3250
Epoch 190. Loss: 1.5082. Test accuracy: 0.3000


In [31]:
%tensorboard --logdir "./log"