In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
from rdflib import Graph
import numpy as np

In [None]:
# Load the RDF graph using rdflib
rdf_graph = Graph()
rdf_graph.parse("14_graph.nt", format="turtle")

print("Graph loaded")

In [None]:
# Create node and edge lists
nodes = list(set([str(s) for s in rdf_graph.subjects()] + [str(o) for o in rdf_graph.objects()]))
node_idx = {node: i for i, node in enumerate(nodes)}

edges = [(node_idx[str(s)], node_idx[str(o)]) for s, p, o in rdf_graph if str(o) in node_idx]

# Convert edge list to torch tensors
edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()

# Number of nodes
num_nodes = len(nodes)

# Generate random labels for a classification task (for demonstration purposes)
num_classes = 2
node_labels = torch.randint(0, num_classes, (num_nodes,), dtype=torch.long)

# Create a Data object from torch_geometric
data = Data(edge_index=edge_index, y=node_labels)

# Define a simple GCN model with an embedding layer
class GCN(torch.nn.Module):
    def __init__(self, num_nodes, embedding_dim, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.embedding = torch.nn.Embedding(num_nodes, embedding_dim)
        self.conv1 = GCNConv(embedding_dim, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, data):
        x = self.embedding(torch.arange(num_nodes, device=data.edge_index.device))
        x, edge_index = x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# Move model and data to MPS (Metal Performance Shaders) if available
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
data = data.to(device)
model = GCN(num_nodes=num_nodes, embedding_dim=16, hidden_channels=32, out_channels=num_classes).to(device)

# Define an optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

# Training loop
model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.cross_entropy(out, data.y)  # Using cross-entropy loss for classification
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

print("Training finished")

In [None]:
torch.save(model.state_dict(), 'models/gcn_embedding.pth')