In [1]:
from torch_geometric.datasets import WordNet18RR
import torch_geometric.transforms as T


In [2]:
data_dir = 'data'
dataset = WordNet18RR(data_dir, transform=T.NormalizeFeatures())
            

In [3]:
data = dataset.data
print('Number of nodes: {:,}'.format(data.num_nodes))
print('Number of edges: {:,}'.format(data.num_edges))

Number of nodes: 40,943
Number of edges: 93,003


In [4]:
import numpy as np
# number of unique edge types
num_relations = np.unique(data.edge_type).shape[0]
num_nodes = data.num_nodes
print('Number of relations: {:,}'.format(num_relations))
assert ((0 <= data.edge_type) & (data.edge_type < num_relations)).all(), 'edge_type must be in [0, num_relations)'
assert ((0 <= data.edge_index[0]) & (data.edge_index[0] < data.num_nodes)).all(), 'edge_index must be in [0, num_nodes)'
assert ((0 <= data.edge_index[1]) & (data.edge_index[1] < data.num_nodes)).all(), 'edge_index must be in [0, num_nodes)'

Number of relations: 11


In [5]:
from dataset import WN18RR, Triple
import numpy as np

stores = data.node_stores
edge_index = data.edge_index.t()
edge_type = data.edge_type

train_mask = data.train_mask
test_mask = data.test_mask
val_mask = data.val_mask

triples = []
for i in range(len(edge_index)):
    head = edge_index[i][0]
    tail = edge_index[i][1]
    relation = edge_type[i]
    triples.append(Triple(head, tail, relation))



In [6]:
import numpy as np

triples = np.array(triples)
train_triples, test_triples, val_triples = triples[train_mask], triples[test_mask], triples[val_mask]
train, test, val = WN18RR(train_triples), WN18RR(test_triples), WN18RR(val_triples)

In [7]:
%load_ext autoreload
%autoreload 2

In [46]:
next(train_loader.__iter__())[0].dtype

torch.int64

In [71]:
from model import TransE
from torch import Tensor
import torch

transe = TransE(num_nodes, num_relations, 64)
zeroes = torch.zeros(3, dtype=int)
transe.loss(zeroes, zeroes, zeroes).item()

1.0

In [62]:
from torch.utils.data import DataLoader
train_loader = DataLoader(train, batch_size=32, shuffle=True)
test_loader = DataLoader(test, batch_size=32, shuffle=False)
val_loader = DataLoader(val, batch_size=32, shuffle=False)

In [63]:
def train_model(model, train_loader, val_loader, optimizer, num_epochs):    
    train_history = []
    val_history = []
    for epoch in range(num_epochs):
        model.train() # Enter train mode
        
        loss_accum = 0
        for i_step, (h, r, t) in enumerate(train_loader):
            loss_value = model.loss(h, r, t)

            optimizer.zero_grad()
            loss_value.backward()
            optimizer.step()
            
            loss_accum += loss_value.item()

        train_history.append(loss_accum / len(train_loader))


        model.eval() # Enter evaluation mode
        loss_accum = 0
        for (h, r, t) in val_loader:
            loss_accum += model.loss(h, r, t).item()
            
        val_history.append(loss_accum / len(val_loader))
        
        print(f"Train loss: {train_history[-1]:.4f}, Val loss: {val_history[-1]:.4f}")
        
    return train_history, val_history



In [68]:
import torch.optim as optim

import torch
torch.autograd.set_detect_anomaly(True)
optim = optim.Adam(transe.parameters(), lr=0.001)
train_history, val_history = train_model(transe, train_loader, val_loader, optim, 10)

TypeError: mean() received an invalid combination of arguments - got (axis=NoneType, dtype=NoneType, out=NoneType, ), but expected one of:
 * (*, torch.dtype dtype)
 * (tuple of ints dim, bool keepdim, *, torch.dtype dtype)
 * (tuple of names dim, bool keepdim, *, torch.dtype dtype)


In [9]:
dataset[0]

Data(edge_index=[2, 93003], edge_type=[93003], train_mask=[93003], val_mask=[93003], test_mask=[93003], num_nodes=40943)