### 1) Relational GCN model
### 2) TransE model
### 3) distmult model
--------------------------------------------------------

### 1) Relational GCN model

#### Import necessary Libraries

In [1]:
import argparse
import os.path as osp
import time

import torch
import torch.nn.functional as F

from torch_geometric.datasets import Entities
from torch_geometric.nn import FastRGCNConv, RGCNConv
from torch_geometric.utils import k_hop_subgraph
!pip install rdflib
# parser = argparse.ArgumentParser()
# parser.add_argument('--dataset', type=str, default='AIFB',choices=['AIFB', 'MUTAG', 'BGS', 'AM'])
# args = parser.parse_args()
dataset = 'AIFB'



#### Trade memory consumption for faster computation.

In [2]:
# 
if dataset in ['AIFB', 'MUTAG']:
    Conv = FastRGCNConv
else:
    Conv = RGCNConv


#### Dowloading dataset

In [3]:
path = osp.join('data', 'Entities')
dataset = Entities(path, dataset)
data = dataset[0]

#### Spliting dataset

In [4]:
node_idx = torch.cat([data.train_idx, data.test_idx], dim=0)
node_idx, edge_index, mapping, edge_mask = k_hop_subgraph(
    node_idx, 2, data.edge_index, relabel_nodes=True)

data.num_nodes = node_idx.size(0)
data.edge_index = edge_index
data.edge_type = data.edge_type[edge_mask]
data.train_idx = mapping[:data.train_idx.size(0)]
data.test_idx = mapping[data.train_idx.size(0):]

#### Building rGCN class

In [5]:

class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = Conv(data.num_nodes, 16, dataset.num_relations,
                          num_bases=30)
        self.conv2 = Conv(16, dataset.num_classes, dataset.num_relations,
                          num_bases=30)

    def forward(self, edge_index, edge_type):
        x = F.relu(self.conv1(None, edge_index, edge_type))
        x = self.conv2(x, edge_index, edge_type)
        return F.log_softmax(x, dim=1)

In [6]:

if torch.cuda.is_available():
    device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

In [7]:
device = torch.device('cpu') if dataset == 'AM' else device
model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0005)

#### Defining the train and test functions

In [8]:

def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.edge_index, data.edge_type)
    loss = F.nll_loss(out[data.train_idx], data.train_y)
    loss.backward()
    optimizer.step()
    return float(loss)

@torch.no_grad()
def test():
    model.eval()
    pred = model(data.edge_index, data.edge_type).argmax(dim=-1)
    train_acc = float((pred[data.train_idx] == data.train_y).float().mean())
    test_acc = float((pred[data.test_idx] == data.test_y).float().mean())
    return train_acc, test_acc

In [9]:
times = []
for epoch in range(1, 10):
    start = time.time()
    loss = train()
    train_acc, test_acc = test()
    print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Train: {train_acc:.4f} '
          f'Test: {test_acc:.4f}')
    times.append(time.time() - start)
print(f"Median time per epoch: {torch.tensor(times).median():.4f}s")

Epoch: 01, Loss: 1.3949, Train: 0.9000 Test: 0.7778
Epoch: 02, Loss: 0.7562, Train: 0.9429 Test: 0.8611
Epoch: 03, Loss: 0.3254, Train: 0.9714 Test: 0.8889
Epoch: 04, Loss: 0.1400, Train: 0.9857 Test: 0.9167
Epoch: 05, Loss: 0.0714, Train: 0.9857 Test: 0.9167
Epoch: 06, Loss: 0.0429, Train: 0.9857 Test: 0.9444
Epoch: 07, Loss: 0.0260, Train: 1.0000 Test: 0.9444
Epoch: 08, Loss: 0.0138, Train: 1.0000 Test: 0.9444
Epoch: 09, Loss: 0.0064, Train: 1.0000 Test: 0.9444
Median time per epoch: 0.0070s


### * TransE and distmult model

In [10]:
import argparse
import os.path as osp

import torch
import torch.optim as optim

from torch_geometric.datasets import FB15k_237
from torch_geometric.nn import ComplEx, DistMult, RotatE, TransE

model_map = {
    'transe': TransE,
    'complex': ComplEx,
    'distmult': DistMult,
    'rotate': RotatE,
}


In [11]:

parser = argparse.ArgumentParser()
parser.add_argument('--model', choices=model_map.keys(), type=str.lower,required=False)
parser.add_argument("-f", required=False)
args = parser.parse_args()

device = 'cuda' if torch.cuda.is_available() else 'cpu'
path = osp.join(  'data', 'FB15k')

train_data = FB15k_237(path, split='train')[0].to(device)
val_data = FB15k_237(path, split='val')[0].to(device)
test_data = FB15k_237(path, split='test')[0].to(device)

### 2. TransE model

In [12]:
model_arg_map = {'rotate': {'margin': 9.0}}
args.model = "transe"
model = model_map[args.model](
    num_nodes=train_data.num_nodes,
    num_relations=train_data.num_edge_types,
    hidden_channels=50,
    **model_arg_map.get(args.model, {}),
).to(device)

loader = model.loader(
    head_index=train_data.edge_index[0],
    rel_type=train_data.edge_type,
    tail_index=train_data.edge_index[1],
    batch_size=1000,
    shuffle=True,
)

In [13]:

loader = model.loader(
    head_index=train_data.edge_index[0],
    rel_type=train_data.edge_type,
    tail_index=train_data.edge_index[1],
    batch_size=1000,
    shuffle=True,
)

optimizer_map = {
    'transe': optim.Adam(model.parameters(), lr=0.01),
    'complex': optim.Adagrad(model.parameters(), lr=0.001, weight_decay=1e-6),
    'distmult': optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-6),
    'rotate': optim.Adam(model.parameters(), lr=1e-3),
}
optimizer = optimizer_map[args.model]

In [14]:

def train():
    model.train()
    total_loss = total_examples = 0
    for head_index, rel_type, tail_index in loader:
        optimizer.zero_grad()
        loss = model.loss(head_index, rel_type, tail_index)
        loss.backward()
        optimizer.step()
        total_loss += float(loss) * head_index.numel()
        total_examples += head_index.numel()
    return total_loss / total_examples




In [15]:
@torch.no_grad()
def test(data):
    model.eval()
    return model.test(
        head_index=data.edge_index[0],
        rel_type=data.edge_type,
        tail_index=data.edge_index[1],
        batch_size=20000,
        k=10,
    )

In [16]:

for epoch in range(1, 5):
    loss = train()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')
    if epoch % 25 == 0:
        rank, mrr = test(val_data)
        print(f'Epoch: {epoch:03d}, Val Mean Rank: {rank:.2f}')

rank, mrr = test(test_data)
print(f'Test Mean Rank: {rank:.2f}, Test MRR: {mrr:.4f}')

Epoch: 001, Loss: 0.7601
Epoch: 002, Loss: 0.5543
Epoch: 003, Loss: 0.4286
Epoch: 004, Loss: 0.3375


100%|██████████████████████████████████████████████████████████████████████████| 20466/20466 [00:09<00:00, 2203.05it/s]

Test Mean Rank: 767.96, Test MRR: 0.3485





### 3. distmult model

In [17]:
model_arg_map = {'rotate': {'margin': 9.0}}
args.model = "distmult"
model = model_map[args.model](
    num_nodes=train_data.num_nodes,
    num_relations=train_data.num_edge_types,
    hidden_channels=50,
    **model_arg_map.get(args.model, {}),
).to(device)

loader = model.loader(
    head_index=train_data.edge_index[0],
    rel_type=train_data.edge_type,
    tail_index=train_data.edge_index[1],
    batch_size=1000,
    shuffle=True,
)

In [18]:

loader = model.loader(
    head_index=train_data.edge_index[0],
    rel_type=train_data.edge_type,
    tail_index=train_data.edge_index[1],
    batch_size=1000,
    shuffle=True,
)

optimizer_map = {
    'transe': optim.Adam(model.parameters(), lr=0.01),
    'complex': optim.Adagrad(model.parameters(), lr=0.001, weight_decay=1e-6),
    'distmult': optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-6),
    'rotate': optim.Adam(model.parameters(), lr=1e-3),
}
optimizer = optimizer_map[args.model]

In [19]:
for epoch in range(1, 11):
    loss = train()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')
    if epoch % 25 == 0:
        rank, mrr = test(val_data)
        print(f'Epoch: {epoch:03d}, Val Mean Rank: {rank:.2f}')

rank, mrr = test(test_data)
print(f'Test Mean Rank: {rank:.2f}, Test MRR: {mrr:.4f}')

Epoch: 001, Loss: 1.0000
Epoch: 002, Loss: 1.0000
Epoch: 003, Loss: 0.9999
Epoch: 004, Loss: 0.9996
Epoch: 005, Loss: 0.9984
Epoch: 006, Loss: 0.9943
Epoch: 007, Loss: 0.9850
Epoch: 008, Loss: 0.9680
Epoch: 009, Loss: 0.9416
Epoch: 010, Loss: 0.9044


100%|██████████████████████████████████████████████████████████████████████████| 20466/20466 [00:08<00:00, 2489.78it/s]

Test Mean Rank: 778.73, Test MRR: 0.3035



