In [1]:
import networkx as nx
from pykeen.pipeline import pipeline
from pykeen.datasets import Nations, get_dataset
import matplotlib.pyplot as plt
import matplotlib as mpl
import torch
from pykeen.models import predict
from pykeen.evaluation import evaluate, RankBasedEvaluator
from pykeen.metrics.ranking import HitsAtK
import pandas as pd


import logging
from pathlib import Path

import click
import more_click
import torch
from pykeen.evaluation import RankBasedEvaluator
from pykeen.losses import NSSALoss
from pykeen.models.inductive import InductiveNodePiece, InductiveNodePieceGNN
from pykeen.trackers import ConsoleResultTracker, WANDBResultTracker
from pykeen.training import SLCWATrainingLoop
from pykeen.typing import TESTING, TRAINING, VALIDATION
from pykeen.utils import resolve_device, set_random_seed
from torch.optim import Adam


from pykeen.metrics.ranking import HitsAtK

from pathlib import Path

from pykeen.datasets.inductive.base import DisjointInductivePathDataset
from typing_extensions import Literal
import os
from pykeen.hpo import hpo_pipeline
from pykeen.triples import TriplesFactory
from pykeen.models import InductiveNodePiece
from pykeen.typing import TESTING, TRAINING, VALIDATION

seed = 1234

In [2]:
class InductiveLPDataset(DisjointInductivePathDataset):
    """An inductive link prediction dataset for the ILPC 2022 Challenge."""

    
    
    
    def __init__(self , **kwargs):
        """Initialize the inductive link prediction dataset.

        :param size: "small" or "large"
        :param kwargs: keyword arguments to forward to the base dataset class, cf. DisjointInductivePathDataset
        """
        DATA_TYPE = "_fully_inductive.tsv"
        TRAIN_PATH = "MSCallGraph_0_train" + DATA_TYPE
        TEST_PATH = "MSCallGraph_0_test" + DATA_TYPE
        VALIDATE_PATH = "MSCallGraph_0_validation" + DATA_TYPE
        INFERENCE_PATH = "MSCallGraph_0_inference" + DATA_TYPE


        super().__init__(
            transductive_training_path=os.getcwd()+"/"+TRAIN_PATH,
            inductive_inference_path=os.getcwd()+"/"+INFERENCE_PATH,
            inductive_validation_path=os.getcwd()+"/"+VALIDATE_PATH,
            inductive_testing_path=os.getcwd()+"/"+TEST_PATH,
            create_inverse_triples=True,
            eager=True,
            **kwargs
        )


In [3]:
def show_metrics(dictionary):
    for key in dictionary.keys():
        print(key)
        display(pd.DataFrame(dictionary[key]))

In [4]:
dataset = InductiveLPDataset()


In [5]:
model = InductiveNodePieceGNN(
        triples_factory=dataset.transductive_training,
        inference_factory=dataset.inductive_inference,
    ).to(resolve_device())
print(f"Number of parameters: {sum(p.numel() for p in model.parameters())}")
print(f"Space occupied: {model.num_parameter_bytes} bytes")

No random seed is specified. This may lead to non-reproducible results.


sampling:   0%|          | 0.00/3.79k [00:00<?, ?it/s]

No symbolic computation of output shape.


sampling:   0%|          | 0.00/1.49k [00:00<?, ?it/s]

No symbolic computation of output shape.
No cuda devices were available. The model runs on CPU


Number of parameters: 34112
Space occupied: 136448 bytes


In [6]:
tracker = ConsoleResultTracker()
# default training regime is negative sampling (SLCWA)
# you can also use the 1-N regime with the LCWATrainingLoop
# the LCWA loop does not need negative sampling kwargs, but accepts label_smoothing in the .train() method
training_loop = SLCWATrainingLoop(
        triples_factory=dataset.transductive_training,
        model=model,
        mode=TRAINING,  # must be specified for the inductive setup
        result_tracker=tracker,

    )

In [7]:
metrics = ['meanreciprocalrank', HitsAtK(1),
                 HitsAtK(3), HitsAtK(5), HitsAtK(10)]

train_evaluator = RankBasedEvaluator(
        mode=TRAINING,
        # metrics=["hits_at_k"]*5,
        # metrics_kwargs=[dict(k=k) for k in (1, 3, 5, 10, 100)],
        metrics=metrics,
        add_defaults=False,
    )
# specifying hits@k values: 1, 3, 5, 10, 100
valid_evaluator = RankBasedEvaluator(
        mode=VALIDATION,
        # metrics=["hits_at_k"]*5,
        # metrics_kwargs=[dict(k=k) for k in (1, 3, 5, 10, 100)],
        metrics=metrics,
        add_defaults=False,
    )
test_evaluator = RankBasedEvaluator(
        mode=TESTING,
        # metrics=["hits_at_k"] * 5,
        # metrics_kwargs=[dict(k=k) for k in (1, 3, 5, 10, 100)],
        metrics = metrics,
        add_defaults=False
    )

In [8]:
training_loop.train(
        triples_factory=dataset.transductive_training,
        num_epochs=2,
        callbacks="evaluation",
        callback_kwargs=dict(
            evaluator=valid_evaluator,
            evaluation_triples=dataset.inductive_validation.mapped_triples,
            prefix="validation",
            frequency=1,
            additional_filter_triples=dataset.inductive_inference.mapped_triples,
        ),
        
    )

Training epochs on cpu:   0%|          | 0/2 [00:00<?, ?epoch/s]

Training batches on cpu:   0%|          | 0/51 [00:00<?, ?batch/s]

Evaluating on cpu:   0%|          | 0.00/994 [00:00<?, ?triple/s]

Step: 1
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.05756291548889489
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.09075002048017283
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.07415646798453386
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.054140739142894745
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.0907246470451355
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.07243268936872482
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.05384391134861308
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.09070417015576884
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.07227404075219097
Metric: validation.head.optimistic.hits_at_1 = 0.013078470824949699
Metric: validation.tail.optimistic.hits_at_1 = 0.01609657947686117
Metric: validation.both.optimistic.hits_at_1 = 0.014587525150905433
Metric: validation.head.realistic

Training batches on cpu:   0%|          | 0/51 [00:00<?, ?batch/s]

Evaluating on cpu:   0%|          | 0.00/994 [00:00<?, ?triple/s]

Step: 2
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.06005765760611049
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.14612802665727304
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.10309284213169176
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.059953220188617706
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.14607557654380798
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.10301439464092255
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.059890152314389135
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.14603534305952678
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.10296274768695794
Metric: validation.head.optimistic.hits_at_1 = 0.02012072434607646
Metric: validation.tail.optimistic.hits_at_1 = 0.05130784708249497
Metric: validation.both.optimistic.hits_at_1 = 0.03571428571428571
Metric: validation.head.realistic

[0.6057493330216875, 0.27081094214729234]

In [9]:
# train
show_metrics(train_evaluator.evaluate(
        model=model,
        mapped_triples=dataset.transductive_training.mapped_triples,
        additional_filter_triples=[
        dataset.transductive_training.mapped_triples,
    ]
    ).to_dict())

Evaluating on cpu:   0%|          | 0.00/6.64k [00:00<?, ?triple/s]

head


Unnamed: 0,optimistic,realistic,pessimistic
inverse_harmonic_mean_rank,0.0783,0.076869,0.076233
hits_at_1,0.045194,0.041579,0.041579
hits_at_3,0.07442,0.07442,0.073817
hits_at_5,0.095661,0.095661,0.095661
hits_at_10,0.132721,0.132721,0.132721


tail


Unnamed: 0,optimistic,realistic,pessimistic
inverse_harmonic_mean_rank,0.143599,0.142142,0.14149
hits_at_1,0.059054,0.055438,0.055438
hits_at_3,0.174902,0.174902,0.174299
hits_at_5,0.227328,0.227026,0.227026
hits_at_10,0.297228,0.297228,0.297228


both


Unnamed: 0,optimistic,realistic,pessimistic
inverse_harmonic_mean_rank,0.11095,0.109506,0.108862
hits_at_1,0.052124,0.048509,0.048509
hits_at_3,0.124661,0.124661,0.124058
hits_at_5,0.161494,0.161344,0.161344
hits_at_10,0.214974,0.214974,0.214974


In [10]:
# validation
show_metrics(valid_evaluator.evaluate(
        model=model,
        mapped_triples=dataset.inductive_validation.mapped_triples,
        additional_filter_triples=[
            # filtering of other positive triples
            dataset.inductive_inference.mapped_triples
        ],
    ).to_dict())

Evaluating on cpu:   0%|          | 0.00/994 [00:00<?, ?triple/s]

head


Unnamed: 0,optimistic,realistic,pessimistic
inverse_harmonic_mean_rank,0.060058,0.059953,0.05989
hits_at_1,0.020121,0.020121,0.020121
hits_at_3,0.045272,0.045272,0.045272
hits_at_5,0.076459,0.076459,0.076459
hits_at_10,0.118712,0.118712,0.117706


tail


Unnamed: 0,optimistic,realistic,pessimistic
inverse_harmonic_mean_rank,0.146128,0.146076,0.146035
hits_at_1,0.051308,0.051308,0.051308
hits_at_3,0.171026,0.171026,0.171026
hits_at_5,0.216298,0.216298,0.216298
hits_at_10,0.32495,0.323944,0.323944


both


Unnamed: 0,optimistic,realistic,pessimistic
inverse_harmonic_mean_rank,0.103093,0.103014,0.102963
hits_at_1,0.035714,0.035714,0.035714
hits_at_3,0.108149,0.108149,0.108149
hits_at_5,0.146378,0.146378,0.146378
hits_at_10,0.221831,0.221328,0.220825


In [11]:
# result on the test set
show_metrics(test_evaluator.evaluate(
        model=model,
        mapped_triples=dataset.inductive_testing.mapped_triples,
        additional_filter_triples=[
            # filtering of other positive triples
            dataset.inductive_inference.mapped_triples,
            dataset.inductive_validation.mapped_triples,
        ],
    ).to_dict())

Evaluating on cpu:   0%|          | 0.00/1.26k [00:00<?, ?triple/s]

head


Unnamed: 0,optimistic,realistic,pessimistic
inverse_harmonic_mean_rank,0.057384,0.056134,0.055482
hits_at_1,0.021344,0.018182,0.018182
hits_at_3,0.043478,0.043478,0.043478
hits_at_5,0.071937,0.071937,0.071937
hits_at_10,0.114625,0.114625,0.112253


tail


Unnamed: 0,optimistic,realistic,pessimistic
inverse_harmonic_mean_rank,0.16167,0.161004,0.160633
hits_at_1,0.056126,0.054545,0.054545
hits_at_3,0.179447,0.179447,0.179447
hits_at_5,0.242688,0.242688,0.242688
hits_at_10,0.36917,0.366798,0.366798


both


Unnamed: 0,optimistic,realistic,pessimistic
inverse_harmonic_mean_rank,0.109527,0.108569,0.108057
hits_at_1,0.038735,0.036364,0.036364
hits_at_3,0.111462,0.111462,0.111462
hits_at_5,0.157312,0.157312,0.157312
hits_at_10,0.241897,0.240711,0.239526
