# 1. Modeling Relational Data with Graph Convolutional Networks

### 1.1. Import necessary libraries

In [25]:
import argparse
import os.path as osp
import time

import torch
import torch.nn.functional as F

from torch_geometric.datasets import Entities
# from torch_geometric.nn import FastRGCNConv, RGCNConv
from torch_geometric.utils import k_hop_subgraph
!pip install rdflib
# parser = argparse.ArgumentParser()
# parser.add_argument('--dataset', type=str, default='AIFB',choices=
dataset = 'AIFB'



In [None]:
# Import necessary modules for typing and PyTorch geometric functionality
from typing import Optional, Tuple, Union
import torch
from torch import Tensor
from torch.nn import Parameter

# Import backend utilities and components for geometric operations
import torch_geometric.backend
import torch_geometric.typing
from torch_geometric import is_compiling
from torch_geometric.index import index2ptr
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.nn.inits import glorot, zeros
from torch_geometric.typing import (
    Adj,  # Type alias for adjacency representation
    OptTensor,  # Optional tensor type alias
    SparseTensor,  # Sparse tensor type alias
    pyg_lib,  # Library for advanced geometric operations
    torch_sparse,  # Support for sparse tensor operations
)
from torch_geometric.utils import index_sort, one_hot, scatter, spmm

# Define a utility function to apply a mask to edge indices
def masked_edge_index(edge_index: Adj, edge_mask: Tensor) -> Adj:
    """
    Filters the edge index based on the provided edge mask.
    Supports both dense and sparse adjacency formats.
    """
    if isinstance(edge_index, Tensor):
        return edge_index[:, edge_mask]  # Mask edges for dense adjacency
    return torch_sparse.masked_select_nnz(edge_index, edge_mask, layout='coo')  # For sparse adjacency

# Define a Relational Graph Convolutional Network (RGCN) convolution layer
class RGCNConv(MessagePassing):
    def __init__(
        self,
        in_channels: Union[int, Tuple[int, int]],  # Input channels for source and target nodes
        out_channels: int,  # Number of output channels
        num_relations: int,  # Number of distinct edge types (relations)
        num_bases: Optional[int] = None,  # Number of bases for weight decomposition (optional)
        num_blocks: Optional[int] = None,  # Number of blocks for weight decomposition (optional)
        aggr: str = 'mean',  # Aggregation strategy for message passing
        root_weight: bool = True,  # Whether to use a root node weight
        is_sorted: bool = False,  # Whether edge indices are sorted
        bias: bool = True,  # Whether to use a bias parameter
        **kwargs,  # Additional arguments for the parent class
    ):
        kwargs.setdefault('aggr', aggr)  # Set default aggregation if not provided
        super().__init__(node_dim=0, **kwargs)  # Initialize the base MessagePassing class

        # Validate input parameters to ensure no conflicting decomposition strategies
        if num_bases is not None and num_blocks is not None:
            raise ValueError('Can not apply both basis-decomposition and '
                             'block-diagonal-decomposition at the same time.')

        # Save initialization parameters
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.num_relations = num_relations
        self.num_bases = num_bases
        self.num_blocks = num_blocks
        self.is_sorted = is_sorted

        # Support for tuple input format
        if isinstance(in_channels, int):
            in_channels = (in_channels, in_channels)
        self.in_channels_l = in_channels[0]

        # Attribute for segment matmul heuristic during runtime
        self._use_segment_matmul_heuristic_output: torch.jit.Attribute(
            None, Optional[float])

        # Initialize weights based on decomposition strategy
        if num_bases is not None:  # Basis-decomposition
            self.weight = Parameter(
                torch.empty(num_bases, in_channels[0], out_channels))
            self.comp = Parameter(torch.empty(num_relations, num_bases))

        elif num_blocks is not None:  # Block-diagonal-decomposition
            assert (in_channels[0] % num_blocks == 0
                    and out_channels % num_blocks == 0)
            self.weight = Parameter(
                torch.empty(num_relations, num_blocks,
                            in_channels[0] // num_blocks,
                            out_channels // num_blocks))
            self.register_parameter('comp', None)  # No composition matrix

        else:  # Default dense weight
            self.weight = Parameter(
                torch.empty(num_relations, in_channels[0], out_channels))
            self.register_parameter('comp', None)

        # Root node weight initialization
        if root_weight:
            self.root = Parameter(torch.empty(in_channels[1], out_channels))
        else:
            self.register_parameter('root', None)

        # Bias initialization
        if bias:
            self.bias = Parameter(torch.empty(out_channels))
        else:
            self.register_parameter('bias', None)

        self.reset_parameters()  # Reset weights and biases

    def reset_parameters(self):
        """
        Resets all learnable parameters using standard initialization techniques.
        """
        super().reset_parameters()
        glorot(self.weight)  # Glorot initialization for weights
        glorot(self.comp)  # Glorot initialization for composition matrix
        glorot(self.root)  # Glorot initialization for root weights
        zeros(self.bias)  # Zero initialization for biases

    def forward(self, x: Union[OptTensor, Tuple[OptTensor, Tensor]],
                edge_index: Adj, edge_type: OptTensor = None):
        """
        Performs a forward pass on the input data using relational graph convolution.
        """
        # Prepare input features for source and target nodes
        x_l: OptTensor = None
        if isinstance(x, tuple):
            x_l = x[0]
        else:
            x_l = x
        if x_l is None:  # Fallback to default indices
            x_l = torch.arange(self.in_channels_l, device=self.weight.device)

        x_r: Tensor = x_l  # Target node features
        if isinstance(x, tuple):
            x_r = x[1]

        size = (x_l.size(0), x_r.size(0))  # Define input-output sizes

        # For sparse edge indices, extract edge types from storage
        if isinstance(edge_index, SparseTensor):
            edge_type = edge_index.storage.value()
        assert edge_type is not None

        # Initialize output tensor
        out = torch.zeros(x_r.size(0), self.out_channels, device=x_r.device)

        # Adjust weights based on decomposition strategy
        weight = self.weight
        if self.num_bases is not None:  # Basis-decomposition
            weight = (self.comp @ weight.view(self.num_bases, -1)).view(
                self.num_relations, self.in_channels_l, self.out_channels)

        if self.num_blocks is not None:  # Block-diagonal-decomposition
            # Ensure compatibility with non-floating-point input
            if not torch.is_floating_point(x_r):
                raise ValueError('Block-diagonal decomposition not supported '
                                 'for non-continuous input features.')

            # Process each relation individually
            for i in range(self.num_relations):
                tmp = masked_edge_index(edge_index, edge_type == i)
                h = self.propagate(tmp, x=x_l, edge_type_ptr=None, size=size)
                h = h.view(-1, weight.size(1), weight.size(2))
                h = torch.einsum('abc,bcd->abd', h, weight[i])  # Matrix multiplication
                out = out + h.contiguous().view(-1, self.out_channels)

        else:  # Default decomposition
            ...
        # Other processing omitted for brevity
        return out

# Additional methods (`message`, `message_and_aggregate`, etc.) omitted for brevity.


In [27]:
class FastRGCNConv(RGCNConv):
    def forward(self, x: Union[OptTensor, Tuple[OptTensor, Tensor]],
                edge_index: Adj, edge_type: OptTensor = None):

        self.fuse = False
        assert self.aggr in ['add', 'sum', 'mean']

        # Convert input features to a pair of node features or node indices.
        x_l: OptTensor = None
        if isinstance(x, tuple):
            x_l = x[0]
        else:
            x_l = x
        if x_l is None:
            x_l = torch.arange(self.in_channels_l, device=self.weight.device)

        x_r: Tensor = x_l
        if isinstance(x, tuple):
            x_r = x[1]

        size = (x_l.size(0), x_r.size(0))

        # propagate_type: (x: Tensor, edge_type: OptTensor)
        out = self.propagate(edge_index, x=x_l, edge_type=edge_type, size=size)

        root = self.root
        if root is not None:
            if not torch.is_floating_point(x_r):
                out = out + root[x_r]
            else:
                out = out + x_r @ root

        if self.bias is not None:
            out = out + self.bias

        return out


    def message(self, x_j: Tensor, edge_type: Tensor,
                edge_index_j: Tensor) -> Tensor:
        weight = self.weight
        if self.num_bases is not None:  # Basis-decomposition =================
            weight = (self.comp @ weight.view(self.num_bases, -1)).view(
                self.num_relations, self.in_channels_l, self.out_channels)

        if self.num_blocks is not None:  # Block-diagonal-decomposition =======
            if not torch.is_floating_point(x_j):
                raise ValueError('Block-diagonal decomposition not supported '
                                 'for non-continuous input features.')

            weight = weight[edge_type].view(-1, weight.size(2), weight.size(3))
            x_j = x_j.view(-1, 1, weight.size(1))
            return torch.bmm(x_j, weight).view(-1, self.out_channels)

        else:  # No regularization/Basis-decomposition ========================
            if not torch.is_floating_point(x_j):
                weight_index = edge_type * weight.size(1) + edge_index_j
                return weight.view(-1, self.out_channels)[weight_index]

            return torch.bmm(x_j.unsqueeze(-2), weight[edge_type]).squeeze(-2)

    def aggregate(self, inputs: Tensor, edge_type: Tensor, index: Tensor,
                  dim_size: Optional[int] = None) -> Tensor:

        # Compute normalization in separation for each `edge_type`.
        if self.aggr == 'mean':
            norm = one_hot(edge_type, self.num_relations, dtype=inputs.dtype)
            norm = scatter(norm, index, dim=0, dim_size=dim_size)[index]
            norm = torch.gather(norm, 1, edge_type.view(-1, 1))
            norm = 1. / norm.clamp_(1.)
            inputs = norm * inputs

        return scatter(inputs, index, dim=self.node_dim, dim_size=dim_size)

### 1.2. Trade memory consumption for faster computation

In [28]:
# 
if dataset in ['AIFB', 'MUTAG']:
    Conv = FastRGCNConv
else:
    Conv = RGCNConv


### 1.3. Dowload dataset

In [29]:
path = osp.join('data', 'Entities')
dataset = Entities(path, dataset)
data = dataset[0]

### 1.4. Split dataset

In [30]:
node_idx = torch.cat([data.train_idx, data.test_idx], dim=0)
node_idx, edge_index, mapping, edge_mask = k_hop_subgraph(
    node_idx, 2, data.edge_index, relabel_nodes=True)

data.num_nodes = node_idx.size(0)
data.edge_index = edge_index
data.edge_type = data.edge_type[edge_mask]
data.train_idx = mapping[:data.train_idx.size(0)]
data.test_idx = mapping[data.train_idx.size(0):]

### 1.5. Build rGCN class

In [31]:
class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = Conv(data.num_nodes, 16, dataset.num_relations,
                          num_bases=30)
        self.conv2 = Conv(16, dataset.num_classes, dataset.num_relations,
                          num_bases=30)

    def forward(self, edge_index, edge_type):
        x = F.relu(self.conv1(None, edge_index, edge_type))
        x = self.conv2(x, edge_index, edge_type)
        return F.log_softmax(x, dim=1)

In [32]:
if torch.cuda.is_available():
    device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

In [33]:
device = torch.device('cpu') if dataset == 'AM' else device
model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0005)

### 1.6. Define the train and test functions

In [34]:
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.edge_index, data.edge_type)
    loss = F.nll_loss(out[data.train_idx], data.train_y)
    loss.backward()
    optimizer.step()
    return float(loss)

@torch.no_grad()
def test():
    model.eval()
    pred = model(data.edge_index, data.edge_type).argmax(dim=-1)
    train_acc = float((pred[data.train_idx] == data.train_y).float().mean())
    test_acc = float((pred[data.test_idx] == data.test_y).float().mean())
    return train_acc, test_acc

In [35]:
times = []
for epoch in range(1, 10):
    start = time.time()
    loss = train()
    train_acc, test_acc = test()
    print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Train: {train_acc:.4f} '
          f'Test: {test_acc:.4f}')
    times.append(time.time() - start)
print(f"Median time per epoch: {torch.tensor(times).median():.4f}s")

Epoch: 01, Loss: 1.3997, Train: 0.9643 Test: 0.8333
Epoch: 02, Loss: 0.7475, Train: 0.9643 Test: 0.8889
Epoch: 03, Loss: 0.2929, Train: 0.9714 Test: 0.9167
Epoch: 04, Loss: 0.1242, Train: 0.9714 Test: 0.9167
Epoch: 05, Loss: 0.0789, Train: 0.9714 Test: 0.9167
Epoch: 06, Loss: 0.0565, Train: 0.9786 Test: 0.9444
Epoch: 07, Loss: 0.0342, Train: 0.9929 Test: 0.9444
Epoch: 08, Loss: 0.0166, Train: 1.0000 Test: 0.9444
Epoch: 09, Loss: 0.0074, Train: 1.0000 Test: 0.9444
Median time per epoch: 0.0053s


## 2. Translating Embeddings for Modeling Multi-relational Data


### 2.1. Import libraries

In [4]:
import argparse
import os.path as osp

import torch
import torch.optim as optim

from torch_geometric.datasets import FB15k_237
from torch_geometric.nn import RotatE

### 2.2. TransE

In [54]:
import math

import torch
import torch.nn.functional as F
from torch import Tensor

from torch_geometric.nn.kge import KGEModel


class TransE(KGEModel):
    def __init__(
        self,
        num_nodes: int,
        num_relations: int,
        hidden_channels: int,
        margin: float = 1.0,
        p_norm: float = 1.0,
        sparse: bool = False,
    ):
        super().__init__(num_nodes, num_relations, hidden_channels, sparse)

        self.p_norm = p_norm
        self.margin = margin

        self.reset_parameters()

    def reset_parameters(self):
        bound = 6. / math.sqrt(self.hidden_channels)
        torch.nn.init.uniform_(self.node_emb.weight, -bound, bound)
        torch.nn.init.uniform_(self.rel_emb.weight, -bound, bound)
        F.normalize(self.rel_emb.weight.data, p=self.p_norm, dim=-1,
                    out=self.rel_emb.weight.data)

    def forward(
        self,
        head_index: Tensor,
        rel_type: Tensor,
        tail_index: Tensor,
    ) -> Tensor:

        head = self.node_emb(head_index)
        rel = self.rel_emb(rel_type)
        tail = self.node_emb(tail_index)

        head = F.normalize(head, p=self.p_norm, dim=-1)
        tail = F.normalize(tail, p=self.p_norm, dim=-1)

        # Calculate *negative* TransE norm:
        return -((head + rel) - tail).norm(p=self.p_norm, dim=-1)

    def loss(
        self,
        head_index: Tensor,
        rel_type: Tensor,
        tail_index: Tensor,
    ) -> Tensor:

        pos_score = self(head_index, rel_type, tail_index)
        neg_score = self(*self.random_sample(head_index, rel_type, tail_index))

        return F.margin_ranking_loss(
            pos_score,
            neg_score,
            target=torch.ones_like(pos_score),
            margin=self.margin,
        )

In [55]:
model_map = {
    'transe': TransE,
}

In [56]:
parser = argparse.ArgumentParser()
parser.add_argument('--model', choices=model_map.keys(), type=str.lower, required=False)
# Add this to avoid issues with Jupyter Notebook's internal arguments
parser.add_argument("-f", required=False, help="Dummy argument to prevent Jupyter errors.")
args = parser.parse_args([])  # Passing an empty list ensures no CLI arguments are processed.

device = 'cuda' if torch.cuda.is_available() else 'cpu'
path = osp.join('data', 'FB15k')

train_data = FB15k_237(path, split='train')[0].to(device)
val_data = FB15k_237(path, split='val')[0].to(device)
test_data = FB15k_237(path, split='test')[0].to(device)

### 2.3. TransE model

In [57]:
model_arg_map = {'rotate': {'margin': 9.0}}
args.model = "transe"
model = model_map[args.model](
    num_nodes=train_data.num_nodes,
    num_relations=train_data.num_edge_types,
    hidden_channels=50,
    **model_arg_map.get(args.model, {}),
).to(device)

loader = model.loader(
    head_index=train_data.edge_index[0],
    rel_type=train_data.edge_type,
    tail_index=train_data.edge_index[1],
    batch_size=1000,
    shuffle=True,
)

### 2.4. Optimizers

In [None]:
optimizer_map = {
    'transe': optim.Adam(model.parameters(), lr=0.01),
    'rotate': optim.Adam(model.parameters(), lr=1e-3),
}
optimizer = optimizer_map[args.model]

### 2.5. Train

In [None]:
def train():
    model.train()
    total_loss = total_examples = 0
    for head_index, rel_type, tail_index in loader:
        optimizer.zero_grad()
        loss = model.loss(head_index, rel_type, tail_index)
        loss.backward()
        optimizer.step()
        total_loss += float(loss) * head_index.numel()
        total_examples += head_index.numel()
    return total_loss / total_examples

### 2.6. Test

In [None]:
@torch.no_grad()
def test(data):
    model.eval()
    return model.test(
        head_index=data.edge_index[0],
        rel_type=data.edge_type,
        tail_index=data.edge_index[1],
        batch_size=20000,
        k=10,
    )

for epoch in range(1, 5):
    loss = train()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')
    if epoch % 25 == 0:
        rank, mrr, _ = test(val_data)
        print(f'Epoch: {epoch:03d}, Val Mean Rank: {rank:.2f}, Val MRR: {mrr:.4f}')

# Run
rank, mrr, _ = test(test_data)
print(f'Test Mean Rank: {rank:.2f}, Test MRR: {mrr:.4f}')


Epoch: 001, Loss: 0.7630
Epoch: 002, Loss: 0.5538
Epoch: 003, Loss: 0.4318
Epoch: 004, Loss: 0.3476


100%|██████████| 20466/20466 [00:05<00:00, 3490.59it/s]

Test Mean Rank: 812.74, Test MRR: 0.1884





## 3. Learning Entity and Relation Embeddings for Knowledge Graph Completion

### 3.1. TransR

In [1]:
import torch
import torch.nn as nn

# Define the TransR class, a relation-specific embedding model for knowledge graphs
class TransR(nn.Module):
    def __init__(self, num_nodes, num_rels, hidden_channels, rel_channels, p=1):
        """
        Initializes the TransR model.
        
        Args:
        - num_nodes: Number of entities in the knowledge graph.
        - num_rels: Number of relation types in the knowledge graph.
        - hidden_channels: Dimensionality of entity embeddings.
        - rel_channels: Dimensionality of relation embeddings.
        - p: Norm degree for computing the score (default is 1 for Manhattan norm).
        """
        super(TransR, self).__init__()
        
        # Embedding layer for nodes (entities)
        self.node_emb = nn.Embedding(num_nodes, hidden_channels)
        
        # Embedding layer for relations
        self.rel_emb = nn.Embedding(num_rels, rel_channels)
        
        # Relation-specific projection matrix embeddings
        self.rel_project = nn.Embedding(num_rels, hidden_channels * rel_channels)
        
        # Store hyperparameters
        self.hidden_channels = hidden_channels
        self.rel_channels = rel_channels
        self.p = p

        # Initialize model parameters
        self.reset_parameters()

    def reset_parameters(self):
        """
        Initializes the embeddings using a uniform distribution with bounds
        proportional to the size of the hidden channels.
        """
        bound = 6. / math.sqrt(self.hidden_channels)  # Calculate bound
        torch.nn.init.uniform_(self.node_emb.weight, -bound, bound)  # Initialize node embeddings
        torch.nn.init.uniform_(self.rel_emb.weight, -bound, bound)  # Initialize relation embeddings
        torch.nn.init.uniform_(self.rel_project.weight, -bound, bound)  # Initialize projection matrices

    def forward(self, head_index, rel_type, tail_index):
        """
        Computes the score for a triple (head, relation, tail).
        
        Args:
        - head_index: Indices of head entities.
        - rel_type: Indices of relations.
        - tail_index: Indices of tail entities.
        
        Returns:
        - Negative score for the triple, based on the TransR scoring function.
        """
        # Look up embeddings for the head, relation, and tail
        head = self.node_emb(head_index)
        rel = self.rel_emb(rel_type)
        proj_rel = self.rel_project(rel_type).view(-1, self.hidden_channels, self.rel_channels)  # Reshape relation projection matrix
        tail = self.node_emb(tail_index)

        # Project the head and tail embeddings to the relation-specific space
        head_proj = (head.unsqueeze(1) @ proj_rel).squeeze(1)  # Relation-specific projection for head
        tail_proj = (tail.unsqueeze(1) @ proj_rel).squeeze(1)  # Relation-specific projection for tail

        # Compute the negative score using the p-norm
        return -torch.norm(head_proj + rel - tail_proj, p=self.p, dim=-1)

    def loss(self, head_index, rel_type, tail_index):
        """
        Computes the margin ranking loss for a batch of triples.
        
        Args:
        - head_index: Indices of head entities.
        - rel_type: Indices of relations.
        - tail_index: Indices of tail entities.
        
        Returns:
        - Margin ranking loss for the batch.
        """
        # Compute positive scores for given triples
        pos_score = self(head_index, rel_type, tail_index)
        
        # Compute negative scores for randomly sampled negative triples
        neg_score = self(*self.random_sample(head_index, rel_type, tail_index))

        # Compute and return margin ranking loss
        return F.margin_ranking_loss(
            pos_score,  # Positive scores
            neg_score,  # Negative scores
            target=torch.ones_like(pos_score),  # Target indicating positive > negative
            margin=1.0,  # Margin for the loss
        )

    def random_sample(self, head_index, rel_type, tail_index):
        """
        Generates negative samples by corrupting the tail entity.
        
        Args:
        - head_index: Indices of head entities.
        - rel_type: Indices of relations.
        - tail_index: Indices of tail entities.
        
        Returns:
        - A corrupted triple (head, relation, negative tail).
        """
        # Replace the tail index with random indices to create negative samples
        neg_tail_index = torch.randint(0, self.node_emb.num_embeddings, tail_index.size(), device=tail_index.device)
        return head_index, rel_type, neg_tail_index


In [72]:
model_map = {
    'transe': TransE,
    'transr': TransR,
}

### 3.2. Load dataset

In [73]:
parser = argparse.ArgumentParser()
parser.add_argument('--model', choices=model_map.keys(), type=str.lower, required=False)
# Add this to avoid issues with Jupyter Notebook's internal arguments
parser.add_argument("-f", required=False, help="Dummy argument to prevent Jupyter errors.")
args = parser.parse_args([])  # Passing an empty list ensures no CLI arguments are processed.

device = 'cuda' if torch.cuda.is_available() else 'cpu'
path = osp.join('data', 'FB15k')

train_data = FB15k_237(path, split='train')[0].to(device)
val_data = FB15k_237(path, split='val')[0].to(device)
test_data = FB15k_237(path, split='test')[0].to(device)

### 3.3. Create Loader

In [75]:
from torch.utils.data import DataLoader, TensorDataset

def create_loader(head_index, rel_type, tail_index, batch_size, shuffle):
    dataset = TensorDataset(head_index, rel_type, tail_index)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
    return loader

In [76]:
model_arg_map = {'rotate': {'margin': 9.0}}
args.model = "transr"
model = model_map[args.model](
    num_nodes=train_data.num_nodes,
    num_rels=train_data.num_edge_types,
    hidden_channels=50,
    rel_channels=30,
    p=2,
).to(device)

loader = create_loader(
    head_index=train_data.edge_index[0],
    rel_type=train_data.edge_type,
    tail_index=train_data.edge_index[1],
    batch_size=1000,
    shuffle=True,
)

### 3.4. Train

In [78]:
for epoch in range(1, 5):
    model.train()
    total_loss = total_examples = 0
    for head_index, rel_type, tail_index in loader:
        head_index, rel_type, tail_index = head_index.to(device), rel_type.to(device), tail_index.to(device)
        optimizer.zero_grad()
        loss = model.loss(head_index, rel_type, tail_index)
        loss.backward()
        optimizer.step()
        total_loss += float(loss) * head_index.size(0)
        total_examples += head_index.size(0)
    print(f'Epoch: {epoch:03d}, Loss: {total_loss / total_examples:.4f}')

@torch.no_grad()
def test(data):
    model.eval()
    loader = create_loader(
        head_index=data.edge_index[0],
        rel_type=data.edge_type,
        tail_index=data.edge_index[1],
        batch_size=20000,
        shuffle=False,
    )
    total_rank = total_mrr = total_examples = 0
    for head_index, rel_type, tail_index in loader:
        head_index, rel_type, tail_index = head_index.to(device), rel_type.to(device), tail_index.to(device)
        rank, mrr, _ = model.test(head_index, rel_type, tail_index, k=10)
        total_rank += rank
        total_mrr += mrr
        total_examples += head_index.size(0)
    return total_rank / total_examples, total_mrr / total_examples


Epoch: 001, Loss: 1.5354
Epoch: 002, Loss: 1.5340
Epoch: 003, Loss: 1.5325
Epoch: 004, Loss: 1.5339


### 3.5. Test

In [79]:
@torch.no_grad()
def test(model, data, k=10):
    model.eval()
    loader = create_loader(
        head_index=data.edge_index[0],
        rel_type=data.edge_type,
        tail_index=data.edge_index[1],
        batch_size=20000,
        shuffle=False,
    )

    ranks = []
    mrr = 0.0
    for head_index, rel_type, tail_index in loader:
        head_index, rel_type, tail_index = head_index.to(device), rel_type.to(device), tail_index.to(device)

        # Scores for true triplets
        true_scores = model(head_index, rel_type, tail_index)

        # Evaluate corrupted triplets (e.g., corrupt tail entity)
        all_entities = torch.arange(model.node_emb.num_embeddings, device=device)
        corrupt_tail_scores = []
        for tail in all_entities:
            corrupt_scores = model(head_index, rel_type, tail.repeat(head_index.size(0)))
            corrupt_tail_scores.append(corrupt_scores.unsqueeze(-1))
        corrupt_tail_scores = torch.cat(corrupt_tail_scores, dim=-1)

        # Rank the true triplets among corrupted triplets
        for i, true_score in enumerate(true_scores):
            all_scores = torch.cat([true_score.unsqueeze(0), corrupt_tail_scores[i]])
            rank = (all_scores > true_score).sum().item() + 1  # Rank starts from 1
            ranks.append(rank)
            mrr += 1 / rank

    mean_rank = sum(ranks) / len(ranks)
    mean_reciprocal_rank = mrr / len(ranks)
    return mean_rank, mean_reciprocal_rank


In [80]:
rank, mrr = test(model, test_data, k=10)
print(f'Test Mean Rank: {rank:.2f}, Test MRR: {mrr:.4f}')


Test Mean Rank: 7060.62, Test MRR: 0.0051


## 4. Knowledge Graph Embedding by Translating on Hyperplanes

### 4.1. TransH

In [2]:
# Import commonly used modules for file paths, PyTorch tensors, and neural networks
import os.path as osp
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

# Define a PyTorch model class for the TransH algorithm
class TransH(nn.Module):
    def __init__(self, num_nodes, num_rels, embedding_dim):
        # Initialize the parent nn.Module class
        super(TransH, self).__init__()
        
        # Define entity and relation embeddings
        self.entity_emb = nn.Embedding(num_nodes, embedding_dim)  # Embeddings for entities
        self.relation_emb = nn.Embedding(num_rels, embedding_dim)  # Embeddings for relations
        self.relation_norm = nn.Embedding(num_rels, embedding_dim)  # Embedding for relation-specific normal vectors

        # Initialize embedding parameters
        self.reset_parameters()

    def reset_parameters(self):
        # Initialize embeddings with Xavier uniform distribution
        nn.init.xavier_uniform_(self.entity_emb.weight)
        nn.init.xavier_uniform_(self.relation_emb.weight)
        nn.init.xavier_uniform_(self.relation_norm.weight)

    def forward(self, head_idx, rel_idx, tail_idx):
        # Fetch embeddings for the head entity, relation, and tail entity
        head = self.entity_emb(head_idx)  # Head entity embeddings
        rel = self.relation_emb(rel_idx)  # Relation embeddings
        norm = self.relation_norm(rel_idx)  # Relation-specific normal vector embeddings
        tail = self.entity_emb(tail_idx)  # Tail entity embeddings

        # Normalize the normal vector for the hyperplane (L2 norm)
        norm = F.normalize(norm, p=2, dim=-1)

        # Project head and tail embeddings onto the relation-specific hyperplane
        head_proj = head - (head * norm).sum(dim=-1, keepdim=True) * norm
        tail_proj = tail - (tail * norm).sum(dim=-1, keepdim=True) * norm

        # Compute the score for the triple (head, relation, tail)
        score = head_proj + rel - tail_proj
        return -torch.norm(score, p=2, dim=-1)  # Return the negative L2 norm of the score

    def loss(self, head_idx, rel_idx, tail_idx, neg_tail_idx, margin=1.0):
        # Calculate the positive and negative scores for the triples
        pos_score = self(head_idx, rel_idx, tail_idx)  # Positive triple score
        neg_score = self(head_idx, rel_idx, neg_tail_idx)  # Negative triple score

        # Use margin ranking loss to ensure positive scores are greater than negative scores by a margin
        return F.margin_ranking_loss(
            pos_score, 
            neg_score, 
            target=torch.ones_like(pos_score),  # Target label (+1) to indicate positive > negative
            margin=margin  # Margin value
        )


### 4.2. Create Loader

In [3]:
def create_loader(edge_index, edge_type, num_nodes, batch_size, shuffle=True):
    head_idx = edge_index[0]
    tail_idx = edge_index[1]
    dataset = TensorDataset(head_idx, edge_type, tail_idx)
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)


### 4.3. Load dataset

In [115]:
# Load FB15k-237 dataset
from torch_geometric.datasets import FB15k_237

device = 'cuda' if torch.cuda.is_available() else 'cpu'
path = osp.join('data', 'FB15k-237')
train_data = FB15k_237(path, split='train')[0].to(device)
val_data = FB15k_237(path, split='val')[0].to(device)
test_data = FB15k_237(path, split='test')[0].to(device)


Downloading https://raw.githubusercontent.com/villmow/datasets_knowledge_embedding/master/FB15k-237/train.txt
Downloading https://raw.githubusercontent.com/villmow/datasets_knowledge_embedding/master/FB15k-237/valid.txt
Downloading https://raw.githubusercontent.com/villmow/datasets_knowledge_embedding/master/FB15k-237/test.txt
Processing...
Done!


### 4.4. Optimizers

In [116]:
# Initialize the model
num_nodes = train_data.num_nodes
num_rels = train_data.num_edge_types
model = TransH(num_nodes=num_nodes, num_rels=num_rels, embedding_dim=50).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [117]:
train_loader = create_loader(train_data.edge_index, train_data.edge_type, num_nodes, batch_size=256)

### 4.5. Train

In [118]:
# Training loop
for epoch in range(1, 6):
    model.train()
    total_loss = 0
    for head, rel, tail in train_loader:
        head, rel, tail = head.to(device), rel.to(device), tail.to(device)
        neg_tail = torch.randint(0, num_nodes, tail.size(), device=device)

        optimizer.zero_grad()
        loss = model.loss(head, rel, tail, neg_tail)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch}, Loss: {total_loss / len(train_loader):.4f}")


Epoch 1, Loss: 0.5315
Epoch 2, Loss: 0.1710
Epoch 3, Loss: 0.1004
Epoch 4, Loss: 0.0783
Epoch 5, Loss: 0.0664


### 4.6. Test

In [119]:
# Evaluation
@torch.no_grad()
def test(model, data, num_nodes, k=10):
    model.eval()
    head_idx = data.edge_index[0]
    rel_idx = data.edge_type
    tail_idx = data.edge_index[1]

    ranks, mrr = [], 0.0
    for head, rel, tail in zip(head_idx, rel_idx, tail_idx):
        head, rel, tail = head.to(device), rel.to(device), tail.to(device)

        # True score
        true_score = model(head.unsqueeze(0), rel.unsqueeze(0), tail.unsqueeze(0))

        # Corrupted tail scores
        all_entities = torch.arange(num_nodes, device=device)
        scores = model(head.repeat(num_nodes), rel.repeat(num_nodes), all_entities)

        # Rank calculation
        rank = (scores > true_score).sum().item() + 1
        ranks.append(rank)
        mrr += 1 / rank

    mean_rank = sum(ranks) / len(ranks)
    mean_reciprocal_rank = mrr / len(ranks)
    return mean_rank, mean_reciprocal_rank

In [120]:
# Test the model
mean_rank, mean_reciprocal_rank = test(model, test_data, num_nodes)
print(f"Mean Rank: {mean_rank:.2f}, Mean Reciprocal Rank: {mean_reciprocal_rank:.4f}")

Mean Rank: 285.20, Mean Reciprocal Rank: 0.2399


## 5. Embedding Entities and Relations for Learning and Inference in Knowledge Bases

In [None]:
import argparse
import os.path as osp

import torch
import torch.optim as optim

from torch_geometric.datasets import FB15k_237
# from torch_geometric.nn import DistMult

model_map = {
    'distmult': DistMult,
    'rotate': RotatE,
}


### 5.1. DistMult

In [None]:
import torch
import torch.nn.functional as F
from torch import Tensor

# Import the Knowledge Graph Embedding (KGE) base class
from torch_geometric.nn.kge import KGEModel

# Define a DistMult class inheriting from the KGEModel
class DistMult(KGEModel):
    def __init__(
        self,
        num_nodes: int,         # Number of entities (nodes) in the knowledge graph
        num_relations: int,     # Number of relation types in the knowledge graph
        hidden_channels: int,   # Dimensionality of the embedding space
        margin: float = 1.0,    # Margin for margin ranking loss
        sparse: bool = False,   # Whether embeddings are stored sparsely
    ):
        # Initialize the parent KGEModel with node and relation embeddings
        super().__init__(num_nodes, num_relations, hidden_channels, sparse)

        # Set the margin value for the loss function
        self.margin = margin

        # Reset embeddings to initialize their values
        self.reset_parameters()

    def reset_parameters(self):
        # Initialize node embeddings with Xavier uniform distribution
        torch.nn.init.xavier_uniform_(self.node_emb.weight)
        # Initialize relation embeddings with Xavier uniform distribution
        torch.nn.init.xavier_uniform_(self.rel_emb.weight)

    def forward(
        self,
        head_index: Tensor,  # Indices of head entities
        rel_type: Tensor,    # Indices of relation types
        tail_index: Tensor,  # Indices of tail entities
    ) -> Tensor:
        # Retrieve embeddings for head entities
        head = self.node_emb(head_index)
        # Retrieve embeddings for relations
        rel = self.rel_emb(rel_type)
        # Retrieve embeddings for tail entities
        tail = self.node_emb(tail_index)

        # Compute the DistMult score: element-wise multiplication and summation
        return (head * rel * tail).sum(dim=-1)

    def loss(
        self,
        head_index: Tensor,  # Indices of positive head entities
        rel_type: Tensor,    # Indices of positive relations
        tail_index: Tensor,  # Indices of positive tail entities
    ) -> Tensor:
        # Compute scores for positive triples
        pos_score = self(head_index, rel_type, tail_index)
        # Compute scores for negative triples (randomly sampled)
        neg_score = self(*self.random_sample(head_index, rel_type, tail_index))

        # Apply margin ranking loss to separate positive and negative scores
        return F.margin_ranking_loss(
            pos_score,                        # Positive scores
            neg_score,                        # Negative scores
            target=torch.ones_like(pos_score), # Target: 1 for positive ranking
            margin=self.margin,               # Margin for the loss
        )


### 5.2. Load dataset

In [129]:
parser = argparse.ArgumentParser()
parser.add_argument('--model', choices=model_map.keys(), type=str.lower, required=False)
# Add this to avoid issues with Jupyter Notebook's internal arguments
parser.add_argument("-f", required=False, help="Dummy argument to prevent Jupyter errors.")
args = parser.parse_args([])  # Passing an empty list ensures no CLI arguments are processed.

device = 'cuda' if torch.cuda.is_available() else 'cpu'
path = osp.join('data', 'FB15k')

train_data = FB15k_237(path, split='train')[0].to(device)
val_data = FB15k_237(path, split='val')[0].to(device)
test_data = FB15k_237(path, split='test')[0].to(device)

In [130]:
model_arg_map = {'rotate': {'margin': 9.0}}
args.model = "distmult"
model = model_map[args.model](
    num_nodes=train_data.num_nodes,
    num_relations=train_data.num_edge_types,
    hidden_channels=50,
    **model_arg_map.get(args.model, {}),
).to(device)

loader = model.loader(
    head_index=train_data.edge_index[0],
    rel_type=train_data.edge_type,
    tail_index=train_data.edge_index[1],
    batch_size=1000,
    shuffle=True,
)

In [None]:
optimizer_map = {
    'distmult': optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-6),
    'rotate': optim.Adam(model.parameters(), lr=1e-3),
}
optimizer = optimizer_map[args.model]

### 5.3. Train

In [None]:
def train():
    model.train()
    total_loss = total_examples = 0
    for head_index, rel_type, tail_index in loader:
        optimizer.zero_grad()
        loss = model.loss(head_index, rel_type, tail_index)
        loss.backward()
        optimizer.step()
        total_loss += float(loss) * head_index.numel()
        total_examples += head_index.numel()
    return total_loss / total_examples




### 5.4. Test

In [137]:
@torch.no_grad()
def test(data):
    model.eval()
    result = model.test(
        head_index=data.edge_index[0],
        rel_type=data.edge_type,
        tail_index=data.edge_index[1],
        batch_size=20000,
        k=10,
    )
    print(result)  # Inspect the result
    return result


In [None]:
for epoch in range(1, 11):
    loss = train()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')
    if epoch % 25 == 0:
        rank, mrr = test(val_data)
        print(f'Epoch: {epoch:03d}, Val Mean Rank: {rank:.2f}')

rank, mrr, _ = test(test_data) 
print(f'Test Mean Rank: {rank:.2f}, Test MRR: {mrr:.4f}')


Epoch: 001, Loss: 0.2411
Epoch: 002, Loss: 0.2315
Epoch: 003, Loss: 0.2206
Epoch: 004, Loss: 0.2123
Epoch: 005, Loss: 0.2032
Epoch: 006, Loss: 0.1961
Epoch: 007, Loss: 0.1887
Epoch: 008, Loss: 0.1829
Epoch: 009, Loss: 0.1773
Epoch: 010, Loss: 0.1716


100%|██████████| 20466/20466 [00:04<00:00, 4869.25it/s]

(539.7003173828125, 0.22544750571250916, 0.3587413270790579)
Test Mean Rank: 539.70, Test MRR: 0.2254



