In [None]:
%pip install torch --extra-index-url https://download.pytorch.org/whl/cu128

In [1]:
import torch
import pykeen
import pandas as pd
from pykeen import predict
from pykeen.datasets import Nations
from pykeen.pipeline import pipeline
from pykeen.triples import TriplesFactory

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [3]:
file_path = "data/all-triples.tsv"
tf = TriplesFactory.from_path(file_path)
training, testing = tf.split()

result = pipeline(
    training=training,
    testing=testing,
    model="TransE",
    model_kwargs=dict(
        embedding_dim=128,
    ),
    training_kwargs=dict(num_epochs=20),
    optimizer_kwargs=dict(
        lr=0.01,
    ),
    negative_sampler_kwargs=dict(
        num_negs_per_pos=1,
    ),
    random_seed=2025,
    device=device,
)

using automatically assigned random_state=595865306
Training epochs on cuda:0: 100%|██████████| 20/20 [09:59<00:00, 29.97s/epoch, loss=0.827, prev_loss=0.848]
Evaluating on cuda:0: 100%|██████████| 125k/125k [49:44<00:00, 41.9triple/s]    
INFO:pykeen.evaluation.evaluator:Evaluation took 3089.84s seconds


In [None]:
evaluation = result.metric_results.to_dict()
evaluation["both"]["realistic"]

{'arithmetic_mean_rank': 46402.50390625,
 'adjusted_arithmetic_mean_rank_index': 0.5973290901397814,
 'harmonic_mean_rank': 56.25301742553711,
 'inverse_arithmetic_mean_rank': 2.1550560632022098e-05,
 'z_arithmetic_mean_rank': 516.8924445406119,
 'median_absolute_deviation': 23518.51953125,
 'standard_deviation': 62948.8046875,
 'inverse_geometric_mean_rank': 0.00010474642476765439,
 'adjusted_inverse_harmonic_mean_rank': 0.01772143769065738,
 'adjusted_geometric_mean_rank_index': 0.8871474310432702,
 'adjusted_arithmetic_mean_rank': 0.4026760934198168,
 'count': 250368.0,
 'geometric_mean_rank': 9546.865234375,
 'variance': 3962552064.0,
 'median_rank': 16897.0,
 'inverse_median_rank': 5.9182104450883344e-05,
 'z_inverse_harmonic_mean_rank': 3309.2226051967723,
 'inverse_harmonic_mean_rank': 0.01777682453393936,
 'z_geometric_mean_rank': 443.01781981447516,
 'hits_at_1': 0.010200984151329244,
 'hits_at_3': 0.016539653629856852,
 'hits_at_5': 0.0218478399795501,
 'hits_at_10': 0.032456

In [5]:
model_name = "models/TransE-citations"
result.save_to_directory(model_name)

INFO:pykeen.triples.triples_factory:Stored TriplesFactory(num_entities=233568, num_relations=31, create_inverse_triples=False, num_triples=500732, path="C:\Users\mirxm\Storage\Work\MDS\S2\SDM\MDS-SDM-KnowledgeGraphs\data\all-triples.tsv") to file:///C:/Users/mirxm/Storage/Work/MDS/S2/SDM/MDS-SDM-KnowledgeGraphs/models/TransE-citations/training_triples
INFO:pykeen.pipeline.api:Saved to directory: C:\Users\mirxm\Storage\Work\MDS\S2\SDM\MDS-SDM-KnowledgeGraphs\models\TransE-citations


In [None]:
p = "http://localhost:7200/academia-sdm#"
paper = f"<{p}d5db5bea38363c0d5cdee5800d4ad8f8ace7e223>"
paperCites = f"<{p}paperCites>"
entity_embeddings = result.model.entity_representations[0]
relation_embeddings = result.model.relation_representations[0]
rel_id = result.training.relation_to_id[paperCites]
rel_rep = entity_embeddings(indices=torch.as_tensor([rel_id], device=device))
sub_id = result.training.entity_to_id[paper]
sub_rep = entity_embeddings(indices=torch.as_tensor([sub_id], device=device))

tensor([[ 0.1002,  0.0997,  0.0768,  0.1002, -0.1002, -0.0997,  0.0763, -0.0975,
         -0.1002,  0.0997,  0.0997, -0.0997,  0.0763, -0.0764,  0.0997,  0.1001,
         -0.0763,  0.0997,  0.0768, -0.1002, -0.0970,  0.0763,  0.0763, -0.0763,
         -0.0768,  0.0768, -0.1002, -0.0763,  0.1002, -0.0997, -0.0763, -0.0997,
         -0.0997,  0.0763, -0.0763, -0.0764, -0.0768,  0.0763, -0.1002,  0.1002,
          0.0768, -0.0997,  0.0997, -0.1002, -0.0997, -0.0763, -0.1002, -0.0996,
         -0.0790, -0.0997, -0.0790, -0.0997, -0.1001,  0.0768,  0.0997, -0.0975,
         -0.1002,  0.0763,  0.0768, -0.0768, -0.0768,  0.0763, -0.1002,  0.1002,
          0.0769, -0.1002, -0.0763, -0.0768, -0.0763,  0.1002, -0.0997,  0.0763,
         -0.0997, -0.1002,  0.0763, -0.0768,  0.0763,  0.0768,  0.0997, -0.0768,
         -0.0795, -0.0997, -0.0768, -0.0763,  0.1002, -0.1002,  0.0997,  0.0768,
          0.0763,  0.0763, -0.0997,  0.0763, -0.0768,  0.0763, -0.0763,  0.0763,
         -0.0768, -0.0996,  

In [26]:
help(result.model)

Help on TransE in module pykeen.models.unimodal.trans_e object:

class TransE(pykeen.models.nbase.ERModel)
 |  TransE(
 |      *,
 |      embedding_dim: int = 50,
 |      scoring_fct_norm: int = 1,
 |      power_norm: bool = False,
 |      entity_initializer: Union[str, collections.abc.Callable[[torch.Tensor], torch.Tensor], NoneType] = <function xavier_uniform_ at 0x000002A61172EAC0>,
 |      entity_constrainer: Union[str, collections.abc.Callable[[torch.Tensor], torch.Tensor], NoneType] = <function normalize at 0x000002A65C791580>,
 |      relation_initializer: Union[str, collections.abc.Callable[[torch.Tensor], torch.Tensor], NoneType] = <pykeen.utils.compose object at 0x000002A611720980>,
 |      relation_constrainer: Union[str, collections.abc.Callable[[torch.Tensor], torch.Tensor], NoneType] = None,
 |      regularizer: Union[str, pykeen.regularizers.Regularizer, type[pykeen.regularizers.Regularizer], NoneType] = None,
 |      regularizer_kwargs: Optional[collections.abc.Mapping[