In [1]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m24.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.6.1


In [2]:
import pandas as pd
import numpy as np

# Load multigraph edges
spatial_weights_edges1 = pd.read_csv('/content/drive/MyDrive/London_heat/Connections/spatial_weights_connections.csv')
waterway_connections_edges2 = pd.read_csv('/content/drive/MyDrive/London_heat/Connections/waterway_connections.csv')

# Load hyperedges (assuming each row represents a hyperedge with nodes separated by commas)
LC_hyperedges1 = pd.read_csv('/content/drive/MyDrive/London_heat/Connections/LC_area_connections.csv')
LOAC_hyperedges2 = pd.read_csv('/content/drive/MyDrive/London_heat/Connections/LOAC_area_connections.csv')

# Load node features and labels
nodes = pd.read_csv('/content/drive/MyDrive/London_heat/Connections/data_features_class.csv')

# Ensure the column names are correctly mapped
spatial_weights_edges1.columns = ['source', 'target']
waterway_connections_edges2.columns = ['source', 'target']

nodes.columns = ['MSOA_CODE', 'road','sidewalk','building','wall',	'fence',	'pole',	'traffic light',	'traffic sign',	'vegetation',	'terrain',	'sky',	'person',	'rider',	'car',	'truck',	'bus',	'train',	'motorcycle',	'bicycle','subset_vulunability-socio_ZH_VULN_IN_class']


Do the MultiGraph first

In [3]:

import networkx as nx
import numpy as np

# Create a MultiGraph
G = nx.MultiDiGraph()  # or nx.MultiGraph() for an undirected graph

# Add edges from the first CSV
for index, row in spatial_weights_edges1.iterrows():
    G.add_edge(row['source'], row['target'], key='relation1')

# Add edges from the second CSV
for index, row in waterway_connections_edges2.iterrows():
    G.add_edge(row['source'], row['target'], key='relation2')

# Add node features and labels
default_features = np.zeros(nodes.shape[1] - 2)  # Default features if a node doesn't have them
default_label = -1  # Default label if a node doesn't have it

for node in G.nodes():
    if node in nodes['MSOA_CODE'].values:
        node_data = nodes[nodes['MSOA_CODE'] == node].iloc[0]
        G.nodes[node]['features'] = node_data[1:-1].values
        G.nodes[node]['label'] = node_data['subset_vulunability-socio_ZH_VULN_IN_class']
    else:
        G.nodes[node]['features'] = default_features
        G.nodes[node]['label'] = default_label

# Ensure all nodes have the same attributes
for node in G.nodes():
    if 'features' not in G.nodes[node]:
        G.nodes[node]['features'] = default_features
    if 'label' not in G.nodes[node]:
        G.nodes[node]['label'] = default_label


Hypergraph

In [4]:
from sklearn.preprocessing import LabelEncoder

# Encode node IDs to integers
le = LabelEncoder()
all_nodes = pd.concat([nodes['MSOA_CODE'], LC_hyperedges1['source'], LC_hyperedges1['target'], LOAC_hyperedges2['source'], LOAC_hyperedges2['target']])
le.fit(all_nodes)

nodes['MSOA_CODE'] = le.transform(nodes['MSOA_CODE'])
LC_hyperedges1['source'] = le.transform(LC_hyperedges1['source'])
LC_hyperedges1['target'] = le.transform(LC_hyperedges1['target'])
LOAC_hyperedges2['source'] = le.transform(LOAC_hyperedges2['source'])
LOAC_hyperedges2['target'] = le.transform(LOAC_hyperedges2['target'])

import numpy as np

def create_hyperedge_incidence_matrix(edges, num_nodes):
    hyperedge_dict = {}
    for _, row in edges.iterrows():
        if row['target'] not in hyperedge_dict:
            hyperedge_dict[row['target']] = []
        hyperedge_dict[row['target']].append(row['source'])

    num_hyperedges = len(hyperedge_dict)
    incidence_matrix = np.zeros((num_nodes, num_hyperedges))

    for col, nodes in enumerate(hyperedge_dict.values()):
        for node in nodes:
            incidence_matrix[node, col] = 1

    return incidence_matrix

num_nodes = nodes.shape[0]
incidence_matrix1 = create_hyperedge_incidence_matrix(LC_hyperedges1, num_nodes)
incidence_matrix2 = create_hyperedge_incidence_matrix(LOAC_hyperedges2, num_nodes)


In [5]:
from torch_geometric.utils import from_networkx
from torch_geometric.data import Data
from sklearn.model_selection import train_test_split
import torch
# Convert the NetworkX multigraph to PyTorch Geometric format
multigraph_data = from_networkx(G)



def incidence_matrix_to_hyperedge_index(incidence_matrix):
    rows, cols = np.nonzero(incidence_matrix)
    edge_index = np.vstack((rows, cols))
    return edge_index

# Convert incidence matrices to hyperedge indices
hyperedge_index1 = incidence_matrix_to_hyperedge_index(incidence_matrix1)
hyperedge_index2 = incidence_matrix_to_hyperedge_index(incidence_matrix2)

# Convert to PyTorch tensors
hyperedge_index1 = torch.tensor(hyperedge_index1, dtype=torch.long)
hyperedge_index2 = torch.tensor(hyperedge_index2, dtype=torch.long)

# Verify shapes
print(hyperedge_index1.shape)  # Should be (2, num_edges1)
print(hyperedge_index2.shape)  # Should be (2, num_edges2)

# Extract node features and labels
node_features = torch.tensor(nodes.iloc[:, 1:-1].values, dtype=torch.float)
node_labels = torch.tensor(nodes['subset_vulunability-socio_ZH_VULN_IN_class'].values, dtype=torch.long)

# Create PyTorch Geometric data object
data = Data(x=node_features, y=node_labels)
data.hyperedge_index1 = hyperedge_index1.to(torch.long)
data.hyperedge_index2 = hyperedge_index2.to(torch.long)
data.edge_index = multigraph_data.edge_index
data.edge_index = data.edge_index.clamp(0, data.num_nodes - 1)

data.edge_attr = multigraph_data.edge_attr

# Split the dataset
train_mask, test_mask = train_test_split(range(data.num_nodes), test_size=0.2, stratify=data.y)
train_mask = torch.tensor(train_mask, dtype=torch.long)
test_mask = torch.tensor(test_mask, dtype=torch.long)

torch.Size([2, 388454])
torch.Size([2, 41997])


In [6]:
def compute_accuracy(logits, labels, mask):
    _, preds = torch.max(logits[mask], dim=1)
    correct = (preds == labels[mask]).sum().item()
    total = mask.size(0)
    return correct / total

In [19]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, HypergraphConv

class CombinedGNN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(CombinedGNN, self).__init__()
        self.gcn1 = GCNConv(input_dim, hidden_dim)
        self.hyperconv1 = HypergraphConv(input_dim, hidden_dim)
        self.gcn2 = GCNConv(hidden_dim, hidden_dim)
        self.hyperconv2 = HypergraphConv(hidden_dim, hidden_dim)
        self.fc = torch.nn.Linear(hidden_dim * 2, output_dim)

    def forward(self, data):
        # GCN part
        x_gcn = self.gcn1(data.x, data.edge_index)
        x_gcn = F.relu(x_gcn)
        x_gcn = self.gcn2(x_gcn, data.edge_index)

        # Hypergraph part
        x_hyper1 = self.hyperconv1(data.x, data.hyperedge_index1)
        x_hyper1 = F.relu(x_hyper1)
        x_hyper2 = self.hyperconv2(x_hyper1, data.hyperedge_index2)

        # Combine features from both parts
        x = torch.cat([x_gcn, x_hyper2], dim=1)

        # Final classification
        x = self.fc(x)
        return F.log_softmax(x, dim=1)

input_dim = node_features.shape[1]
hidden_dim = 16
output_dim = 3  # Assuming labels are integers starting from 0

model = CombinedGNN(input_dim, hidden_dim, output_dim)
optimizer = torch.optim.AdamW(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = criterion(out[train_mask], data.y[train_mask])
    loss.backward()
    optimizer.step()
    return loss.item(), out

best_train_acc = 0
best_epoch = 0
best_model_state = None
patience = 50
counter = 0

for epoch in range(5000):
    loss, train_out = train()
    train_acc = compute_accuracy(train_out, data.y, train_mask)
    print(f"Epoch {epoch + 1}: Train Accuracy: {train_acc:.4f}")

    # Check for best train accuracy
    if train_acc > best_train_acc:
        best_train_acc = train_acc
        best_epoch = epoch
        best_model_state = model.state_dict()
        counter = 0  # Reset patience counter
    else:
        counter += 1

    # Stop if patience is exceeded
    if counter >= patience:
        print("Early stopping at epoch", epoch + 1)
        break

# Load the best model state
model.load_state_dict(best_model_state)

# Evaluate on the test set using the best model
model.eval()
with torch.no_grad():
    test_out = model(data)
    final_test_acc = compute_accuracy(test_out, data.y, test_mask)

print(f"Best Epoch: {best_epoch + 1}, Train Accuracy: {best_train_acc:.4f}")
print("Final Test Accuracy:", final_test_acc)


Epoch 1/500, Loss: 1.0984, Train Accuracy: 0.4714
Epoch 2/500, Loss: 1.0606, Train Accuracy: 0.4714
Epoch 3/500, Loss: 1.0483, Train Accuracy: 0.4714
Epoch 4/500, Loss: 1.0517, Train Accuracy: 0.4714
Epoch 5/500, Loss: 1.0537, Train Accuracy: 0.4714
Epoch 6/500, Loss: 1.0492, Train Accuracy: 0.4714
Epoch 7/500, Loss: 1.0419, Train Accuracy: 0.4714
Epoch 8/500, Loss: 1.0352, Train Accuracy: 0.4714
Epoch 9/500, Loss: 1.0302, Train Accuracy: 0.4714
Epoch 10/500, Loss: 1.0266, Train Accuracy: 0.4857
Epoch 11/500, Loss: 1.0236, Train Accuracy: 0.5000
Epoch 12/500, Loss: 1.0198, Train Accuracy: 0.5000
Epoch 13/500, Loss: 1.0145, Train Accuracy: 0.5000
Epoch 14/500, Loss: 1.0083, Train Accuracy: 0.5143
Epoch 15/500, Loss: 1.0019, Train Accuracy: 0.5143
Epoch 16/500, Loss: 0.9956, Train Accuracy: 0.5286
Epoch 17/500, Loss: 0.9893, Train Accuracy: 0.5571
Epoch 18/500, Loss: 0.9834, Train Accuracy: 0.5571
Epoch 19/500, Loss: 0.9779, Train Accuracy: 0.5429
Epoch 20/500, Loss: 0.9725, Train Accura