# Graph classification with Graph Convolutional Networks in PyTorch


In [14]:
import numpy as np
import torch
import torch.nn.functional as F
from torch import nn
import torchnet as tnt
import networkx as nx

## Load data and graph utils

Here we load the MUTAG dataset as a `networkx` graph and transform it to a Pytorch dataset. Each node in the dataset contains a label from 0 to 6 which will be used as a one-hot-encoding feature vector. From the 188 graphs nodes, we will use 150 for training and the rest for validation. We have two classes. 

In [15]:
import os
import sys
cwd = os.getcwd()
#add MUTAG data in the environment
sys.path.append(cwd + '/../MUTAG')


""" Download MUTAG dataset"""
""" Extra graph utils and data loading stuff"""

def indices_to_one_hot(number, nb_classes, label_dummy=-1):
    """Convert an iterable of indices to one-hot encoded labels.
    
    1. The expression np.eye(nb_classes)[number] is used to create a one-hot encoding representation of
    a number with respect to the given number of classes (nb_classes). Let's break down the expression 
    step by step:

    2. np.eye(nb_classes): This creates a 2D NumPy array (matrix) with shape (nb_classes, nb_classes)
    where the diagonal elements are set to 1 and all other elements are set to 0. This is known as an identity matrix or a unit matrix.

    3. [number]: This part of the expression is used to access a specific row in the matrix created in step 1.
    Since the index is zero-based, number should be an integer between 0 and nb_classes - 1."""
    
    if number == label_dummy:
        return np.zeros(nb_classes)
    else:
        return np.eye(nb_classes)[number]

def get_graph_signal(nx_graph):
    d = dict((k, v) for k, v in nx_graph.nodes.items())
    x = []
    invd = {}
    j = 0
    for k, v in d.items():
        x.append(v['attr_dict'])
        invd[k] = j
        j = j + 1
    return np.array(x)


def load_data(path, ds_name, use_node_labels=True, max_node_label=10):
    node2graph = {}
    Gs = [] #list of graphs
    data = []
    #Name and paths of each files are indicated
    dataset_graph_indicator = f"{ds_name}_graph_indicator.txt" #which node in which 188 graphs
    dataset_adj = f"{ds_name}_A.txt"
    dataset_node_labels = f"{ds_name}_node_labels.txt"
    dataset_graph_labels = f"{ds_name}_graph_labels.txt"

    path_graph_indicator = os.path.join(path,dataset_graph_indicator)
    path_adj = os.path.join(path,dataset_adj)
    path_node_lab = os.path.join(path,dataset_node_labels)
    path_labels = os.path.join(path,dataset_graph_labels)

    #create graphs by nx and append it to list Gs
    #from file graph indicator in which each node is labeled 
    #correponding to each graph
    
    with open(path_graph_indicator, "r") as f:
        c = 1
        for line in f:
            node2graph[c] = int(line[:-1])#dict {node_index: graph_label}
            if not node2graph[c] == len(Gs):
                #if label of the node is not equal to the length
                #of the graph list, this means that the current node belongs
                #to the next graph, so a new graph network is created
                #to appended to the Gs list, then the related nodes 
                #are added to the graph
                Gs.append(nx.Graph())
            Gs[-1].add_node(c)
            c += 1

    with open(path_adj, "r") as f:#Adjacency matrix
        for line in f:
            edge = line[:-1].split(",")
            edge[1] = edge[1].replace(" ", "")
            #add edges from A matrix for each graph
            Gs[node2graph[int(edge[0])] - 1].add_edge(int(edge[0]), int(edge[1]))

            
    #create one-hot encoding for the node label as the feature of each node
    #the feature vector of each node is added to the graph
    if use_node_labels:
        with open(path_node_lab, "r") as f:
            c = 1
            for line in f:
                node_label = indices_to_one_hot(int(line[:-1]), max_node_label)
                Gs[node2graph[c] - 1].add_node(c, attr_dict=node_label)
                c += 1

    labels = []
    with open(path_labels, "r") as f:
        for line in f:
            labels.append(int(line[:-1]))

    return list(zip(Gs, labels)) 

def create_loaders(dataset, batch_size, split_id, offset=-1):
    train_dataset = dataset[:split_id]
    val_dataset = dataset[split_id:]
    return to_pytorch_dataset(train_dataset, offset,batch_size), to_pytorch_dataset(val_dataset, offset,batch_size)

def to_pytorch_dataset(dataset, label_offset=0, batch_size=1):
    #graphs, labels = dataset
    list_set = []
    for graph, label in dataset:
        #F:node feature vectors for each graph
        #G: numpy array of graph
        F, G = get_graph_signal(graph), nx.to_numpy_matrix(graph)
        numOfNodes = G.shape[0]
        F_tensor = torch.from_numpy(F).float()
        G_tensor = torch.from_numpy(G).float()

        # fix labels to zero-indexing
        if label == -1:
            label = 0

        label += label_offset

        list_set.append(tuple((F_tensor, G_tensor, label)))

    dataset_tnt = tnt.dataset.ListDataset(list_set)
    data_loader = torch.utils.data.DataLoader(dataset_tnt, shuffle=True, batch_size=batch_size)
    return data_loader



#dataset = list(zip(Gs, labels)) 
dataset = load_data(path='../MUTAG', ds_name='MUTAG',
                  use_node_labels=True, max_node_label=7) 

# train_dataset = tuple((F_tensor(node feature vectors), G_tensor, label))
# val_dataset = tuple((F_tensor(node feature vectors), G_tensor, label))

train_dataset, val_dataset = create_loaders(dataset, batch_size=1, split_id=150, offset=0)
print('Data are ready')

Data are ready


## GCN Layer

GCNs are nothing more than a matrix multiplication between the input. 

In [16]:
def device_as(x,y):
    return x.to(y.device)

# tensor operationa now support batched inputs
def calc_degree_matrix_norm(a):
    return torch.diag_embed(torch.pow(a.sum(dim=-1),-0.5)) #D^(-1/2)
    
    '''a.sum(dim=-1): This part of the expression calculates the sum of elements along the last dimension (-1)
                      of the input tensor a. The result will be a tensor with one fewer dimension than the original
                      a tensor.

    torch.pow(a.sum(dim=-1), -0.5): This function applies an element-wise power operation to the tensor obtained 
                                    in step 1. It raises each element of the tensor to the power of -0.5.

    torch.diag_embed(...): This function creates a diagonal matrix from the input tensor. 
                           It takes a tensor and returns a new tensor with the original tensor
                           placed on the diagonal of a larger zero-filled matrix.'''
    
def create_graph_lapl_norm(a): #L_norm
    size = a.shape[-1]
    a +=  device_as(torch.eye(size),a) #A_norm = A + I
    D_norm = calc_degree_matrix_norm(a) #D^(-1/2) --> diagonal
    L_norm = torch.bmm( torch.bmm(D_norm, a) , D_norm )#L_norm = D^(-1/2)*A_norm*D^(-1/2)
    return L_norm

#1. BUILD YOUR GCN LAYER
class GCN_Layer(nn.Module):
    """
    A simple GCN layer
    """
    
    def __init__(self, in_features, out_features, bias=True):
        super().__init__()
        self.linear = nn.Linear(in_features, out_features, bias=bias) #X*W
        

    def forward(self, X, A):
        """
        A: adjαcency matrix
        X: graph signal
        """
        L = create_graph_lapl_norm(A) #D^(-1/2) (A + I) D^(-1/2)
        x = self.linear(X)
        return torch.bmm(L, x)
       
        


## Graph Neural Network

Now let's stack 3 `GCN_Layer` in order to construct a full Graph Neural Network. The GNN is followed by a `Linear` layer that will output the final classification between the 2 categories.

In [17]:
import torch
import torch.nn as nn

class GNN(nn.Module):
    def __init__(self,
                    in_features = 7,
                    hidden_dim = 64,
                    classes = 2,
                    dropout = 0.5):
        super(GNN, self).__init__()

        self.conv1 = GCN_Layer(in_features, hidden_dim)
        self.conv2 = GCN_Layer(hidden_dim, hidden_dim)
        self.conv3 = GCN_Layer(hidden_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, classes)
        self.dropout = dropout

    def forward(self, x,A):
        x = self.conv1(x, A)
        x = F.relu(x)
        x = self.conv2(x, A)
        x = F.relu(x)
        x = self.conv3(x, A)
        x = F.dropout(x, p=self.dropout, training=self.training)
        # aggregate node embeddings
        x = x.mean(dim=1)
        # final classification layer
        return self.fc(x)


## Training loop


In [22]:
criterion = torch.nn.CrossEntropyLoss()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f'Training on {device}')
model = GNN(in_features = 7,
                hidden_dim = 128,
                classes = 2).to(device)

optimizer= torch.optim.SGD(model.parameters(), lr=0.01)

def train(train_loader):
    model.train()

    for data in train_loader: 
        optimizer.zero_grad()  
        X, A, labels = data
        X, A, labels = X.to(device), A.to(device), labels.to(device)  
        # Forward pass.
        out = model(X, A)  
        # Compute the graph classification loss.
        loss = criterion(out, labels) 
        # Calculate gradients.
        loss.backward()  
        # Updates the models parameters
        optimizer.step() 

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        X,A, labels = data
        X, A, labels = X.to(device), A.to(device), labels.to(device) 
        # Forward pass.
        out = model(X, A)  
        # Take the index of the class with the highest probability.
        pred = out.argmax(dim=1) 
        # Compare with ground-truth labels.
        correct += int((pred == labels).sum()) 
    return correct / len(loader.dataset)  

best_val = -1
for epoch in range(1, 241):
    train(train_dataset)
    train_acc = test(train_dataset)
    val_acc = test(val_dataset)
    if val_acc>best_val:
        best_val = val_acc
        epoch_best = epoch
    
    if epoch%10==0:
        print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f} || Best Val Score: {best_val:.4f} (Epoch {epoch_best:03d}) ')

Training on cuda


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)