In [1]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.6.1


In [2]:
import networkx as nx
from torch_geometric.data import Data, Dataset

# Generate synthetic graphs
def generate_graphs(num_samples=100):
    graphs = []
    labels = []

    # Class 1: Erdős-Rényi (random)
    for _ in range(num_samples):
        G = nx.erdos_renyi_graph(n=20, p=0.15)
        graphs.append(G)
        labels.append(0)  # Label 0 for ER

    # Class 2: Barabási-Albert (scale-free)
    for _ in range(num_samples):
        G = nx.barabasi_albert_graph(n=20, m=3)
        graphs.append(G)
        labels.append(1)  # Label 1 for BA

    # Class 3: Ordered grid
    for _ in range(num_samples):
        G = nx.grid_2d_graph(5, 4)  # 5x4 grid
        graphs.append(G)
        labels.append(2)  # Label 2 for Grid

    return graphs, labels

graphs, labels = generate_graphs()

# Convert graphs to PyTorch Geometric Format

Add graph features and convert to PyG Data object

https://pytorch-geometric.readthedocs.io/en/latest/

In [3]:
from torch_geometric.utils import from_networkx
import torch

def preprocess_graph(G, label):
    # Convert to PyG Data
    data = from_networkx(G)

    # Add node features (degree + clustering coefficient)
    data.x = torch.tensor([[G.degree(node), nx.clustering(G, node)] for node in G.nodes()], dtype=torch.float)

    # Add graph label
    data.y = torch.tensor([label], dtype=torch.long)

    return data

dataset = [preprocess_graph(G, label) for G, label in zip(graphs, labels)]

# Define GNN Classifier

Use a 2-layer or 3-layer GCN with global pooling.

GNN architecture can be various.

In [4]:
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool

class GraphClassifier(torch.nn.Module):
    def __init__(self, hidden_dim=64, num_classes=3):
        super().__init__()
        self.conv1 = GCNConv(2, hidden_dim)  # Input: 2 features (degree, clustering)
        self.conv2 = GCNConv(hidden_dim, hidden_dim)
        self.fc = torch.nn.Linear(hidden_dim, num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        x = global_mean_pool(x, batch)  # Graph-level embedding
        return F.log_softmax(self.fc(x), dim=1)

# Train the Model

Split data and train the model.

In [5]:
from torch_geometric.loader import DataLoader
from sklearn.model_selection import train_test_split


# accuracy plotting

import matplotlib.pyplot as plt


# Split dataset
train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42) # for interpetability

# Create data loaders
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32)

# Initialize model and optimizer
model = GraphClassifier()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.NLLLoss()

# Training loop
for epoch in range(100):
    model.train()
    for batch in train_loader:
        optimizer.zero_grad()
        out = model(batch)
        loss = criterion(out, batch.y)
        loss.backward()
        optimizer.step()

    # Validate
    model.eval()
    correct = 0
    for batch in test_loader:
        pred = model(batch).argmax(dim=1)
        correct += (pred == batch.y).sum().item()
    print(f'Epoch {epoch}, Accuracy: {correct / len(test_loader.dataset):.2f}')




# Plotting
plt.figure(figsize=(10, 5))
plt.plot(range(1, len(val_accuracies)+1), val_accuracies, label='Validation Accuracy', marker='o')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Validation Accuracy per Epoch')
plt.legend()
plt.grid(True)
plt.show()

Epoch 0, Accuracy: 0.37
Epoch 1, Accuracy: 0.27
Epoch 2, Accuracy: 0.65
Epoch 3, Accuracy: 0.78
Epoch 4, Accuracy: 0.77
Epoch 5, Accuracy: 0.87
Epoch 6, Accuracy: 0.87
Epoch 7, Accuracy: 0.98
Epoch 8, Accuracy: 0.95
Epoch 9, Accuracy: 0.97
Epoch 10, Accuracy: 0.93
Epoch 11, Accuracy: 1.00
Epoch 12, Accuracy: 1.00
Epoch 13, Accuracy: 1.00
Epoch 14, Accuracy: 0.93
Epoch 15, Accuracy: 0.95
Epoch 16, Accuracy: 1.00
Epoch 17, Accuracy: 1.00
Epoch 18, Accuracy: 1.00
Epoch 19, Accuracy: 1.00
Epoch 20, Accuracy: 1.00
Epoch 21, Accuracy: 1.00
Epoch 22, Accuracy: 1.00
Epoch 23, Accuracy: 1.00
Epoch 24, Accuracy: 1.00
Epoch 25, Accuracy: 1.00
Epoch 26, Accuracy: 1.00
Epoch 27, Accuracy: 1.00
Epoch 28, Accuracy: 1.00
Epoch 29, Accuracy: 1.00
Epoch 30, Accuracy: 1.00
Epoch 31, Accuracy: 1.00
Epoch 32, Accuracy: 1.00
Epoch 33, Accuracy: 1.00
Epoch 34, Accuracy: 1.00
Epoch 35, Accuracy: 1.00
Epoch 36, Accuracy: 1.00
Epoch 37, Accuracy: 1.00
Epoch 38, Accuracy: 1.00
Epoch 39, Accuracy: 1.00
Epoch 40, 

We see the accuracy levels which we plot for each epoch to characterise when GNN actually 'learned' the graph structure.



In [12]:

'''
# accuracy plotting

import matplotlib.pyplot as plt

# Lists to track metrics
train_losses = []
val_accuracies = []

# Training loop
for epoch in range(100):
    model.train()
    epoch_loss = 0
    for batch in train_loader:
        optimizer.zero_grad()
        out = model(batch)
        loss = criterion(out, batch.y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    train_losses.append(epoch_loss / len(train_loader))

    # Validation
    model.eval()
    correct = 0
    for batch in test_loader:
        pred = model(batch).argmax(dim=1)
        correct += (pred == batch.y).sum().item()
    accuracy = correct / len(test_loader.dataset)
    val_accuracies.append(accuracy)

    print(f'Epoch {epoch+1}, Loss: {train_losses[-1]:.4f}, Acc: {val_accuracies[-1]:.4f}')

# Plotting
plt.figure(figsize=(10, 5))
plt.plot(range(1, len(val_accuracies)+1), val_accuracies, label='Validation Accuracy', marker='o')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Validation Accuracy per Epoch')
plt.legend()
plt.grid(True)
plt.show()

plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Training Loss', color='red')
plt.twinx()
plt.plot(val_accuracies, label='Validation Accuracy', color='blue')
plt.title('Training Loss & Validation Accuracy')
plt.legend()
plt.show()
'''


"\n# accuracy plotting \n\nimport matplotlib.pyplot as plt\n\n# Lists to track metrics\ntrain_losses = []\nval_accuracies = []\n\n# Training loop\nfor epoch in range(100):\n    model.train()\n    epoch_loss = 0\n    for batch in train_loader:\n        optimizer.zero_grad()\n        out = model(batch)\n        loss = criterion(out, batch.y)\n        loss.backward()\n        optimizer.step()\n        epoch_loss += loss.item()\n    train_losses.append(epoch_loss / len(train_loader))\n    \n    # Validation\n    model.eval()\n    correct = 0\n    for batch in test_loader:\n        pred = model(batch).argmax(dim=1)\n        correct += (pred == batch.y).sum().item()\n    accuracy = correct / len(test_loader.dataset)\n    val_accuracies.append(accuracy)\n    \n    print(f'Epoch {epoch+1}, Loss: {train_losses[-1]:.4f}, Acc: {val_accuracies[-1]:.4f}')\n\n# Plotting\nplt.figure(figsize=(10, 5))\nplt.plot(range(1, len(val_accuracies)+1), val_accuracies, label='Validation Accuracy', marker='o')\np

In [9]:
def classify_graph(model, G):
    # Preprocess (ensure same features as training)
    data = preprocess_graph(G, label=-1)  # Dummy label
    data.batch = torch.zeros(data.num_nodes, dtype=torch.long)  # Single graph

    # Predict
    model.eval()
    with torch.no_grad():
        out = model(data)
        pred = out.argmax().item()

    return ['Erdős-Rényi', 'Barabási-Albert', 'Grid'][pred]

# Example usage
#new_graph = nx.barabasi_albert_graph(n=20, m=3)  # Should predict BA

new_graph = nx.erdos_renyi_graph(n=20, p=0.1
                                )  # Should predict BA

# only predicts it when it is for lower p=0.1 not for p > 0.5

print(classify_graph(model, new_graph))  # Output: "Barabási-Albert"

Erdős-Rényi
