<a href="https://colab.research.google.com/github/AbhiJeet70/PowerfulGNNs/blob/main/SubStructureGNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@inproceedings{zeng2023substructure,
#title={Substructure aware graph neural networks},
#author={Zeng, Dingyi and Liu, Wanlong and Chen, Wenyu and Zhou, Li and Zhang, Malu and Qu, Hong},
#booktitle={Proc. of AAAI},
#volume={37},
#number={9},
#pages={11129--11137},
#year={2023}
#}

#SAGNN-Substructure-Aware-Graph-Neural-Networks

!pip install torch torch-geometric networkx

import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import subgraph
from torch_geometric.data import Data
from torch_geometric.datasets import Planetoid
from torch_geometric.loader import DataLoader
import networkx as nx



class SubstructureAwareGNN(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(SubstructureAwareGNN, self).__init__()
        self.ego_gnn = MessagePassingLayer(in_channels, hidden_channels)
        self.cut_gnn = MessagePassingLayer(in_channels, hidden_channels)
        self.global_encoder = nn.Linear(in_channels, hidden_channels)
        self.final_fc = nn.Linear(3 * hidden_channels, out_channels)

    def forward(self, x, edge_index):
        # Extract subgraphs
        ego_features = self.extract_ego_subgraph(x, edge_index)
        cut_features = self.extract_cut_subgraph(x, edge_index)

        # Apply GNN layers
        ego_encoded = self.ego_gnn(ego_features, edge_index)
        cut_encoded = self.cut_gnn(cut_features, edge_index)
        global_encoded = self.global_encoder(x)

        # Concatenate and pass through the final layer
        combined_features = torch.cat([ego_encoded, cut_encoded, global_encoded], dim=-1)
        output = self.final_fc(combined_features)
        return F.log_softmax(output, dim=1)

    def extract_ego_subgraph(self, x, edge_index):
        # Optimize k-hop ego networks by leveraging PyTorch Geometric utilities
        from torch_geometric.utils import k_hop_subgraph
        k = 2
        batch_size, num_features = x.size()
        ego_features = torch.zeros_like(x, device=x.device)

        for node_idx in range(batch_size):
            neighbors, _, _, _ = k_hop_subgraph(node_idx, k, edge_index, num_nodes=batch_size, relabel_nodes=False)
            if neighbors.numel() > 0:
                ego_features[node_idx] = x[neighbors].mean(dim=0)  # Aggregate neighbor features
            else:
                ego_features[node_idx] = x[node_idx]  # Use the node's own features if no neighbors

        return ego_features

    def extract_cut_subgraph(self, x, edge_index):
        # Optimize Cut subgraph computation using sparse operations
        G = nx.Graph()
        G.add_edges_from(edge_index.t().tolist())
        edge_betweenness = nx.edge_betweenness_centrality(G)
        sorted_edges = sorted(edge_betweenness.items(), key=lambda item: item[1], reverse=True)

        # Efficiently remove top edges
        num_edges_to_remove = len(sorted_edges) // 2
        G.remove_edges_from([edge for edge, _ in sorted_edges[:num_edges_to_remove]])

        # Extract cut subgraph features
        cut_features = torch.zeros_like(x, device=x.device)
        for node in G.nodes():
            cut_features[node] = x[node]

        return cut_features

class MessagePassingLayer(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(MessagePassingLayer, self).__init__(aggr="add")
        self.linear = nn.Linear(in_channels, out_channels)

    def forward(self, x, edge_index):
        return self.propagate(edge_index, x=self.linear(x))

    def message(self, x_j):
        return x_j

    def update(self, aggr_out):
        return F.relu(aggr_out)

# Training and Evaluation
if __name__ == "__main__":
    # Check for GPU availability
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    # Load Planetoid datasets
    datasets = ['Cora', 'CiteSeer', 'Pubmed']
    for dataset_name in datasets:
        dataset = Planetoid(root=f'./data/{dataset_name}', name=dataset_name)
        data = dataset[0].to(device)

        model = SubstructureAwareGNN(
            in_channels=dataset.num_node_features,
            hidden_channels=16,
            out_channels=dataset.num_classes
        ).to(device)

        optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
        criterion = nn.NLLLoss()

        best_loss = float('inf')
        patience = 10
        patience_counter = 0

        model.train()
        for epoch in range(200):
            optimizer.zero_grad()
            out = model(data.x, data.edge_index)
            loss = criterion(out[data.train_mask], data.y[data.train_mask])
            loss.backward()
            optimizer.step()

            if loss.item() < best_loss:
                best_loss = loss.item()
                patience_counter = 0
            else:
                patience_counter += 1

            if epoch % 10 == 0 or epoch == 199:
                print(f"Dataset: {dataset_name}, Epoch: {epoch}, Loss: {loss.item():.4f}")

            if patience_counter >= patience:
                print(f"Early stopping on epoch {epoch} with best loss {best_loss:.4f}")
                break

        model.eval()
        _, pred = model(data.x, data.edge_index).max(dim=1)
        correct = int((pred[data.test_mask] == data.y[data.test_mask]).sum())
        acc = correct / int(data.test_mask.sum())
        print(f"Accuracy on {dataset_name}: {acc:.4f}")


Using device: cuda
Dataset: Cora, Epoch: 0, Loss: 1.9545
Dataset: Cora, Epoch: 10, Loss: 0.0416
Dataset: Cora, Epoch: 20, Loss: 0.0008
Dataset: Cora, Epoch: 30, Loss: 0.0002
Dataset: Cora, Epoch: 40, Loss: 0.0003
Early stopping on epoch 43 with best loss 0.0002
Accuracy on Cora: 0.7630


Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...
Done!


Dataset: CiteSeer, Epoch: 0, Loss: 1.7947
Dataset: CiteSeer, Epoch: 10, Loss: 0.0029
Dataset: CiteSeer, Epoch: 20, Loss: 0.0000
Dataset: CiteSeer, Epoch: 30, Loss: 0.0000
Early stopping on epoch 35 with best loss 0.0000
Accuracy on CiteSeer: 0.6520
