In [2]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
from rdflib import Graph
import numpy as np

In [3]:
# Load the RDF graph using rdflib
rdf_graph = Graph()
rdf_graph.parse("14_graph.nt", format="turtle")

print("Graph loaded")

Graph loaded


In [4]:
# Create node and edge lists
nodes = list(set([str(s) for s in rdf_graph.subjects()] + [str(o) for o in rdf_graph.objects()]))
node_idx = {node: i for i, node in enumerate(nodes)}

edges = [(node_idx[str(s)], node_idx[str(o)]) for s, p, o in rdf_graph if str(o) in node_idx]

# Convert edge list to torch tensors
edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()

# Number of nodes
num_nodes = len(nodes)

# Generate random labels for a classification task (for demonstration purposes)
num_classes = 2
node_labels = torch.randint(0, num_classes, (num_nodes,), dtype=torch.long)

# Create a Data object from torch_geometric
data = Data(edge_index=edge_index, y=node_labels)

# Define a simple GCN model with an embedding layer
class GCN(torch.nn.Module):
    def __init__(self, num_nodes, embedding_dim, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.embedding = torch.nn.Embedding(num_nodes, embedding_dim)
        self.conv1 = GCNConv(embedding_dim, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, data):
        x = self.embedding(torch.arange(num_nodes, device=data.edge_index.device))
        x, edge_index = x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# Move model and data to MPS (Metal Performance Shaders) if available
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
data = data.to(device)
model = GCN(num_nodes=num_nodes, embedding_dim=16, hidden_channels=32, out_channels=num_classes).to(device)

# Define an optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

# Training loop
model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.cross_entropy(out, data.y)  # Using cross-entropy loss for classification
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

print("Training finished")

  edge_index = edge_index[:, mask]


Epoch 1, Loss: 0.7463042736053467
Epoch 2, Loss: 0.7281002998352051
Epoch 3, Loss: 0.7209431529045105
Epoch 4, Loss: 0.7125515341758728
Epoch 5, Loss: 0.7060894966125488
Epoch 6, Loss: 0.7037792205810547
Epoch 7, Loss: 0.7030373215675354
Epoch 8, Loss: 0.7016439437866211
Epoch 9, Loss: 0.6998331546783447
Epoch 10, Loss: 0.6986168026924133
Epoch 11, Loss: 0.6983292102813721
Epoch 12, Loss: 0.6982117891311646
Epoch 13, Loss: 0.6976706385612488
Epoch 14, Loss: 0.6968595385551453
Epoch 15, Loss: 0.69623863697052
Epoch 16, Loss: 0.6960387229919434
Epoch 17, Loss: 0.6960309147834778
Epoch 18, Loss: 0.6959192752838135
Epoch 19, Loss: 0.6956188082695007
Epoch 20, Loss: 0.6952473521232605
Epoch 21, Loss: 0.6949694752693176
Epoch 22, Loss: 0.6948443055152893
Epoch 23, Loss: 0.6947757005691528
Epoch 24, Loss: 0.6946514248847961
Epoch 25, Loss: 0.6944448947906494
Epoch 26, Loss: 0.6942089200019836
Epoch 27, Loss: 0.6940168738365173
Epoch 28, Loss: 0.693903923034668
Epoch 29, Loss: 0.69384467601776

In [5]:
torch.save(model.state_dict(), 'models/gcn_embedding.pth')