In [12]:
# !pip install --upgrade torch-scatter
# !pip install --upgrade torch-sparse
# !pip install --upgrade torch-cluster
# !pip install --upgrade torch-spline-conv 
# !pip install torch-geometric

![alt text](https://raw.githubusercontent.com/rusty1s/pytorch_geometric/master/docs/source/_static/img/pyg_logo_text.svg?sanitize=true)

# Intorduction

PyTorch Geometric [PyG](https://github.com/rusty1s/pytorch_geometric) is a geometric deep learning (GDN) extension library for PyTorch. In general GDN is used to generalize deep learning for non-Ecludian data. For the most part, CNN doesn't work very good for 3D shapes, point clouds and graph structures. Moreover, many real life datasets are inherently non-ecludian like social communicatin datasets, molecular structures, network traffic . etc ... 

Graph convolutional networks (GCN) come to the rescue to generalize CNNs to work for non-ecludian datasets. The basic architecture is illustrated below 

![alt text](https://tkipf.github.io/graph-convolutional-networks/images/gcn_web.png)

where the input is a graph $G = (V,E)$ represented as 

*   Feature repsentation for each node $N \times D$ where N is the number of nodes in the graph and $D$ is the number of features per node. 
*   A matrix repsentation of the graph in the form $2\times L$ where $L$ is the number of edges in the graph. Each column in the matrix represents an edge between two nodes. 
*  Edge attributes of the form $L \times R$ where R is the number of features per each edge. 

The output is of form $N \times F$ where $F$ is the number of features per each node in the graph. 




In [1]:
import numpy as np
import os.path as osp
import torch
import torch.nn.functional as F
from torch_geometric.nn import SplineConv
from torch_geometric.data import Data
from random import shuffle, randint
import networkx as nx
import matplotlib.pyplot as plt
import random 

# Dataset

We will simulate a spammer vs non-spammer graph network. Given a node which represents a client that can send emails to different node (another client). 

Spammers have some similarities 

*   More likely to send lots of emails (more edges)
*   More likely to send lots of data through email (we will represent an edge feature is the number of bytes where the value [0, 1] where 1 represents more bytes sent)
*   Each spammer has an associated trust value which is given by the server. If the node is more likely to be a spammer then the value will be closer to 1. 

Non-spammers have the opposite features. In the next code snippet will try to simulate all of these features through randomization



In [2]:
labels = []
N = 1000 
nodes = range(0, N)
node_features = []
edge_features = []

for node in nodes:
  
  #spammer 
    if random.random() > 0.5:
        #more likely to have many connections with a maximum of 1/5 of the nodes in the graph 
        nb_nbrs = int(random.random() * (N/5))
        #more likely to have sent many bytes
        node_features.append((random.random()+1) / 2.)
        #more likely to have a high trust value 
        edge_features += [(random.random()+2)/3.] * nb_nbrs
        #associate a label 
        labels.append(1)
    
  #non-spammer 
    else:
        #at most connected to 10 nbrs 
        nb_nbrs = int(random.random() * 10 + 1)
        #associate more bytes and random bytes 
        node_features.append(random.random())
        edge_features += [random.random()] * nb_nbrs
        labels.append(0)
  
  #connect to some random nodes 
    nbrs = np.random.choice(nodes, size = nb_nbrs)
    nbrs = nbrs.reshape((1, nb_nbrs))
  
  #add the edges of nbrs 
    node_edges = np.concatenate([np.ones((1, nb_nbrs), dtype = np.int32) * node, nbrs], axis = 0)
  
  #add the overall edges 
    if node == 0:
        edges = node_edges
    else:
        edges = np.concatenate([edges, node_edges], axis = 1)

In [4]:
len(node_features)

1000

In [5]:
len(edge_features)

55324

In [9]:
len(labels)

1000

In [11]:
edges

array([[  0,   0,   0, ..., 999, 999, 999],
       [182,  73, 347, ..., 565, 841, 806]])

Create a data structure 

In [None]:
x = torch.tensor(np.expand_dims(node_features, 1), dtype=torch.float)
y = torch.tensor(labels, dtype=torch.long)

edge_index = torch.tensor(edges, dtype=torch.long)
edge_attr = torch.tensor(np.expand_dims(edge_features, 1), dtype=torch.float)

data = Data(x = x, edge_index=edge_index, y =y, edge_attr=edge_attr )
print(data)

Data(edge_attr=[49077, 1], edge_index=[2, 49077], x=[1000, 1], y=[1000])


We will create a trian/test mask where we split the data into training and test. This is necessary because during optimizing the loss when training we don't want to include the nodes part of the testing process 

In [None]:
data.train_mask = torch.zeros(data.num_nodes, dtype=torch.uint8)
data.train_mask[:int(0.8 * data.num_nodes)] = 1 #train only on the 80% nodes
data.test_mask = torch.zeros(data.num_nodes, dtype=torch.uint8) #test on 20 % nodes 
data.test_mask[- int(0.2 * data.num_nodes):] = 1

# Deep GCN

We will use [SplineConv](https://arxiv.org/abs/1711.08920) layer for the convolution. We will illsue exponential ReLU as an activation function and dropout for regulaization

In [None]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = SplineConv(1, 16, dim=1, kernel_size=5)
        self.conv2 = SplineConv(16, 32, dim=1, kernel_size=5)
        self.conv3 = SplineConv(32, 64, dim=1, kernel_size=7)
        self.conv4 = SplineConv(64, 128, dim=1, kernel_size=7)
        self.conv5 = SplineConv(128, 128, dim=1, kernel_size=11)
        self.conv6 = SplineConv(128, 2, dim=1, kernel_size=11)

    def forward(self):
        x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
        x = F.elu(self.conv1(x, edge_index, edge_attr))
        x = self.conv2(x, edge_index, edge_attr)
        x = F.elu(self.conv3(x, edge_index, edge_attr))
        x = self.conv4(x, edge_index, edge_attr)
        x = F.elu(self.conv5(x, edge_index, edge_attr))
        x = self.conv6(x, edge_index, edge_attr)
        x = F.dropout(x, training = self.training)
        return F.log_softmax(x, dim=1)

# Optimization 

We will use nll_loss which can be used for classification of arbitrary classes

In [None]:
def evaluate_loss(mode = 'train'):
  
  #use masking for loss evaluation 
  if mode == 'train':
    loss = F.nll_loss(model()[data.train_mask], data.y[data.train_mask])
  else:
    loss = F.nll_loss(model()[data.test_mask], data.y[data.test_mask])
  return loss

def train():
  #training 
  model.train()
  optimizer.zero_grad()
  loss = evaluate_loss()
  loss.backward()
  optimizer.step()
  return loss.detach().cpu().numpy() 

def test():
  #testing 
  model.eval()
  logits, accs = model(), []
  loss = evaluate_loss(mode = 'test').detach().cpu().numpy() 

  for _, mask in data('train_mask', 'test_mask'):
      pred = logits[mask].max(1)[1]
      acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
      accs.append(acc)
  return [loss] + accs

# Setup the model 
We will create the model and setup training using adam optimizer

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Training and Testing

In [None]:
losses = []
for epoch in range(1, 200):
  train_loss = train()
  log = 'Epoch: {:03d}, train_loss: {:.3f}, test_loss:{:.3f}, train_acc: {:.2f}, test_acc: {:.2f}'
  test_loss = test()[0]
  losses.append([train_loss,test_loss])
  print(log.format(epoch, train_loss, *test()))

Epoch: 001, train_loss: 0.692, test_loss:0.687, train_acc: 0.54, test_acc: 0.51
Epoch: 002, train_loss: 0.686, test_loss:0.680, train_acc: 0.80, test_acc: 0.81
Epoch: 003, train_loss: 0.680, test_loss:0.670, train_acc: 0.82, test_acc: 0.83
Epoch: 004, train_loss: 0.671, test_loss:0.656, train_acc: 0.82, test_acc: 0.84
Epoch: 005, train_loss: 0.657, test_loss:0.635, train_acc: 0.82, test_acc: 0.84
Epoch: 006, train_loss: 0.639, test_loss:0.606, train_acc: 0.82, test_acc: 0.84
Epoch: 007, train_loss: 0.613, test_loss:0.570, train_acc: 0.82, test_acc: 0.84
Epoch: 008, train_loss: 0.585, test_loss:0.525, train_acc: 0.82, test_acc: 0.84
Epoch: 009, train_loss: 0.554, test_loss:0.477, train_acc: 0.82, test_acc: 0.84
Epoch: 010, train_loss: 0.513, test_loss:0.433, train_acc: 0.82, test_acc: 0.84
Epoch: 011, train_loss: 0.503, test_loss:0.403, train_acc: 0.82, test_acc: 0.84
Epoch: 012, train_loss: 0.524, test_loss:0.388, train_acc: 0.82, test_acc: 0.84
Epoch: 013, train_loss: 0.501, test_loss

# References
[1] https://github.com/rusty1s/pytorch_geometric

[2] https://rusty1s.github.io/pytorch_geometric/build/html/notes/introduction.html

[3] https://tkipf.github.io/graph-convolutional-networks/