In [1]:
from pykeen.pipeline import pipeline
from pykeen.datasets import Nations, get_dataset
import torch
from pykeen.evaluation import evaluate, RankBasedEvaluator
from pykeen.metrics.ranking import HitsAtK
import pandas as pd


import logging
from pathlib import Path

import click
import more_click
import torch
from pykeen.evaluation import RankBasedEvaluator
from pykeen.losses import NSSALoss,CrossEntropyLoss
from pykeen.models.inductive import InductiveNodePiece, InductiveNodePieceGNN
from pykeen.trackers import ConsoleResultTracker, WANDBResultTracker, FileResultTracker
from pykeen.training import SLCWATrainingLoop
from pykeen.typing import TESTING, TRAINING, VALIDATION
from pykeen.utils import resolve_device, set_random_seed
from torch.optim import Adam


from pykeen.metrics.ranking import HitsAtK

from pathlib import Path

from pykeen.datasets.inductive.base import DisjointInductivePathDataset
from typing_extensions import Literal
import os
from pykeen.hpo import hpo_pipeline
from pykeen.triples import TriplesFactory
from pykeen.models import InductiveNodePiece
from pykeen.typing import TESTING, TRAINING, VALIDATION

import time

import platform

import sys

import cpuinfo

import psutil

import subprocess

import zipfile

seed = 1234

In [2]:
class InductiveLPDataset(DisjointInductivePathDataset):
    """An inductive link prediction dataset for the ILPC 2022 Challenge."""

    
    
    
    def __init__(self , **kwargs):
        """Initialize the inductive link prediction dataset.

        :param size: "small" or "large"
        :param kwargs: keyword arguments to forward to the base dataset class, cf. DisjointInductivePathDataset
        """
        DATA_TYPE = "_fully_inductive.tsv"
        TRAIN_PATH = "MSCallGraph_0_train" + DATA_TYPE
        TEST_PATH = "MSCallGraph_0_test" + DATA_TYPE
        VALIDATE_PATH = "MSCallGraph_0_validation" + DATA_TYPE
        INFERENCE_PATH = "MSCallGraph_0_inference" + DATA_TYPE


        super().__init__(
            transductive_training_path=os.getcwd()+"/"+TRAIN_PATH,
            inductive_inference_path=os.getcwd()+"/"+INFERENCE_PATH,
            inductive_validation_path=os.getcwd()+"/"+VALIDATE_PATH,
            inductive_testing_path=os.getcwd()+"/"+TEST_PATH,
            create_inverse_triples=True,
            eager=True,
            **kwargs
        )


In [3]:
def show_metrics(dictionary,model_name,csv_name):
    for key in dictionary.keys():
        print(key)
        df = pd.DataFrame(dictionary[key])
        df.to_csv(f"{model_name}/{model_name}_{csv_name}_{key}.csv")
        print(df)

In [4]:
dataset = InductiveLPDataset()

In [5]:
model_name = 'nodepiece_0_inductive'

In [6]:
tracker = ConsoleResultTracker()

In [7]:
loss = NSSALoss() #used by RotatE and NodePiece
num_tokens = 20
embedding_dim = 200

In [8]:
model = InductiveNodePiece(
        triples_factory=dataset.transductive_training,
        inference_factory=dataset.inductive_inference,
        random_seed = seed,
        loss = loss,
        num_tokens = num_tokens,
        embedding_dim = embedding_dim
    ).to(resolve_device())
print(f"Number of parameters: {sum(p.numel() for p in model.parameters())}")
print(f"Space occupied: {model.num_parameter_bytes} bytes")

sampling:   0%|          | 0.00/3.83k [00:00<?, ?it/s]

No symbolic computation of output shape.


sampling:   0%|          | 0.00/1.30k [00:00<?, ?it/s]

No symbolic computation of output shape.


Number of parameters: 2600
Space occupied: 10400 bytes


In [9]:
directory = model_name

if not os.path.exists(directory):
    os.makedirs(directory)
    print(f'Directory {directory} created successfully!')
else:
    print(f'Directory {directory} already exists.')

Directory nodepiece_0_inductive created successfully!


In [10]:
learning_rate = 1e-3
optimizer = Adam(params=model.parameters(), lr=learning_rate)
num_epochs = 2000
patience = 20

In [11]:
metrics = ['meanreciprocalrank', HitsAtK(1),
                 HitsAtK(3), HitsAtK(5), HitsAtK(10)]

train_evaluator = RankBasedEvaluator(
        mode=TRAINING,
        metrics=metrics,
        add_defaults=False,
    )
valid_evaluator = RankBasedEvaluator(
        mode=VALIDATION,
        metrics=metrics,
        add_defaults=False,
    )
test_evaluator = RankBasedEvaluator(
        mode=TESTING,
        metrics = metrics,
        add_defaults=False
    )

In [12]:
from pykeen.stoppers import EarlyStopper

stopper = EarlyStopper(
    model = model,
    metric='meanreciprocalrank',
    patience=patience,
    frequency=1,
    evaluator = valid_evaluator,
    training_triples_factory = dataset.inductive_inference,
    evaluation_triples_factory = dataset.inductive_validation,
    result_tracker = tracker

)



In [13]:
# default training regime is negative sampling (SLCWA)
# you can also use the 1-N regime with the LCWATrainingLoop
# the LCWA loop does not need negative sampling kwargs, but accepts label_smoothing in the .train() method
training_loop = SLCWATrainingLoop(
        triples_factory=dataset.transductive_training,
        model=model,
        mode=TRAINING,  # must be specified for the inductive setup
        result_tracker=tracker,
        optimizer=optimizer
)

In [14]:
training_start = time.time()
train_epoch =  training_loop.train(
        triples_factory=dataset.transductive_training,
        num_epochs=num_epochs,
        stopper = stopper
        
    )
training_duration = time.time() - training_start

Training epochs on cuda:0:   0%|          | 0/2000 [00:00<?, ?epoch/s]

Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 1
Metric: loss = 12.901205062866211
Step: 1
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.5896898456255687
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.015428139623942901
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.3025589926247558
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.006515270099043846
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.01326643954962492
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.009890854358673096
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.00374044890460881
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.012272027742143152
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.008006238323375981
Metric: validation.head.optimistic.hits_at_1 = 0.5865921787709497
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.29329608938547486
Metric:

Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 2
Metric: loss = 12.735873222351074
Step: 2
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.5886404832690219
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.013009073003718704
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.3008247781363702
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.006515019107609987
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.012610717676579952
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.009562868624925613
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.0037419778410842136
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.012297007923482577
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.008019492882283397
Metric: validation.head.optimistic.hits_at_1 = 0.5854748603351956
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.2927374301675978
Metri

Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 3
Metric: loss = 12.634761810302734
Step: 3
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.5880390091104453
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.014482309522839493
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.3012606593166423
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.006538759917020798
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.01356790214776993
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.010053331032395363
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.003779598497646011
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.013085251187140287
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.00843242484239315
Metric: validation.head.optimistic.hits_at_1 = 0.5843575418994413
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.29217877094972067
Metric:

Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 4
Metric: loss = 12.510793685913086
Step: 4
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.5880110226098448
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.013986773110654034
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.30099889786024947
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.006535984110087156
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.01294588390737772
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.009740933775901794
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.0037783970437248364
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.012320637742491152
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.008049517393107993
Metric: validation.head.optimistic.hits_at_1 = 0.5843575418994413
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.29217877094972067
Metr

Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 5
Metric: loss = 12.349310874938965
Step: 5
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.5879948942547908
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.013743480299038784
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.30086918727691475
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.006546834018081426
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.013002001680433748
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.009774417616426945
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.003790195109245238
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.012485352612800238
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.00813777386102274
Metric: validation.head.optimistic.hits_at_1 = 0.5843575418994413
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.29217877094972067
Metri

Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 6
Metric: loss = 12.218953132629395
Step: 6
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.5701451046294114
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.015143121073145896
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.29264411285127867
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.00653638131916523
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.013749366626143456
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.010142873972654343
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.0037973545774873264
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.013090530168126458
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.008443942372806892
Metric: validation.head.optimistic.hits_at_1 = 0.5541899441340782
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.2770949720670391
Metri

Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 7
Metric: loss = 12.101510047912598
Step: 7
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.5701519499284672
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.01357374500459655
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.2918628474665319
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.006547576282173394
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.012785827741026878
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.009666701778769493
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.003808632430911086
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.012267463888399046
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.008038048159655066
Metric: validation.head.optimistic.hits_at_1 = 0.5541899441340782
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.2770949720670391
Metric:

Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 8
Metric: loss = 11.964476585388184
Step: 8
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.5637749100692067
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.01657380847445076
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.29017435927182866
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.0065390742383897305
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.013242524117231369
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.009890799410641193
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.003809246724576496
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.012012690531830045
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.00791096862820327
Metric: validation.head.optimistic.hits_at_1 = 0.5541899441340782
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.2770949720670391
Metric

Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 9
Metric: loss = 11.83508586883545
Step: 9
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.5535456427277161
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.01637206597920394
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.28495885435346
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.006530072074383496
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.013054620474576952
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.009792345575988293
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.003805735168897068
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.011848805506721162
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.007827270337809115
Metric: validation.head.optimistic.hits_at_1 = 0.5407821229050279
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.27039106145251396
Metric: v

Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 10
Metric: loss = 11.736677169799805
Step: 10
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.5334129494328296
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.014441306845703064
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.2739271281392663
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.006517251953482628
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.012491573579609394
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.009504413232207298
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.003802460476306876
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.011617839818530996
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.007710150147418935
Metric: validation.head.optimistic.hits_at_1 = 0.5139664804469274
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.2569832402234637
Metr

Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 11
Metric: loss = 11.566774368286133
Step: 11
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.4838449255788369
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.012450856798231722
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.24814789118853436
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.006481528747826814
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.011922922916710377
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.009202225133776665
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.0038093918254588836
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.011600503068857527
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.007704947447158206
Metric: validation.head.optimistic.hits_at_1 = 0.45363128491620114
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.22681564245810057


Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 12
Metric: loss = 11.433897018432617
Step: 12
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.46823011981284657
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.013714744245926148
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.2409724320293864
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.006438824348151685
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.01302383467555046
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.009731329046189785
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.003795326397507242
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.012536449573578475
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.008165887985542856
Metric: validation.head.optimistic.hits_at_1 = 0.43798882681564244
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.21899441340782122
Me

Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 13
Metric: loss = 11.290412902832031
Step: 13
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.46155598359325456
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.014181175252629259
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.2378685794229419
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.006429302506148815
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.01294006872922182
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.009684685617685318
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.003799458735044807
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.012199049975826604
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.007999254355435706
Metric: validation.head.optimistic.hits_at_1 = 0.4324022346368715
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.21620111731843575
Met

Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 14
Metric: loss = 11.200305938720703
Step: 14
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.43977199252421306
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.014600526134782708
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.22718625932949785
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.00637004803866148
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.01361122354865074
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.009990634396672249
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.003790758244097075
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.01307135543192532
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.008431056838011198
Metric: validation.head.optimistic.hits_at_1 = 0.4134078212290503
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.20670391061452514
Metr

Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 15
Metric: loss = 11.065949440002441
Step: 15
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.4198131714892308
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.014222494516115196
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.21701783300267302
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.00634099030867219
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.013520678505301476
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.009930835105478764
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.0037884604626945845
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.01307335557605333
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.008430908019373958
Metric: validation.head.optimistic.hits_at_1 = 0.39217877094972065
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.19608938547486032
Me

Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 16
Metric: loss = 10.92365837097168
Step: 16
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.4063511702859462
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.014030874446794399
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.2101910223663703
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.006311353761702776
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.013160773552954195
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.00973606389015913
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.0037830255691050935
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.012575288740240132
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.008179157154672613
Metric: validation.head.optimistic.hits_at_1 = 0.3776536312849162
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.1888268156424581
Metri

Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 17
Metric: loss = 10.819657325744629
Step: 17
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.40213106514291813
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.015356968371989512
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.20874401675745374
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.00632129656150937
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.013900602236390114
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.010110949166119099
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.0037977391066305365
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.013002568120903174
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.008400153613766856
Metric: validation.head.optimistic.hits_at_1 = 0.37206703910614525
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.18603351955307262


Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 18
Metric: loss = 10.689016342163086
Step: 18
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.39525662418860297
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.014027690638969578
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.20464215741378627
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.00626042066141963
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.012813404202461243
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.009536911733448505
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.003776381600731687
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.012212782708615135
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.007994582154673411
Metric: validation.head.optimistic.hits_at_1 = 0.37094972067039106
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.18547486033519553
M

Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 19
Metric: loss = 10.542724609375
Step: 19
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.38855289387243047
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.013575497212160734
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.2010641955422956
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.0061659421771764755
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.012757289223372938
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.009461616165935993
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.0037572141715780942
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.01222757789918865
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.007992396035383374
Metric: validation.head.optimistic.hits_at_1 = 0.37094972067039106
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.18547486033519553
Met

Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 20
Metric: loss = 10.43466567993164
Step: 20
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.38565977701597903
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.014932490307457134
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.20029613366171806
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.006099728401750327
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.014053122140467167
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.010076425038278103
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.0037420686898587685
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.013459659608056876
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.008600864148957823
Metric: validation.head.optimistic.hits_at_1 = 0.37094972067039106
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.18547486033519553


Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 21
Metric: loss = 10.313801765441895
Step: 21
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.3846653832676549
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.015175542539807676
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.19992046290373133
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.006076041609048843
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.013636280782520773
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.00985616073012352
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.0037350114341187673
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.012754188848894732
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.00824460014150675
Metric: validation.head.optimistic.hits_at_1 = 0.37094972067039106
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.18547486033519553
Me

Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 22
Metric: loss = 10.152822494506836
Step: 22
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.3831006613481834
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.013487232869306181
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.1982939471087448
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.006031650118529797
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.012972809374332428
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.009502229280769825
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.0037292592687475958
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.01263812635246381
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.008183692810605702
Metric: validation.head.optimistic.hits_at_1 = 0.37094972067039106
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.18547486033519553
Me

Training batches on cuda:0:   0%|          | 0/1 [00:00<?, ?batch/s]

Step: 23
Metric: loss = 10.05725383758545
Step: 23
Metric: validation.head.optimistic.inverse_harmonic_mean_rank = 0.38259136597141435
Metric: validation.tail.optimistic.inverse_harmonic_mean_rank = 0.01315785436578016
Metric: validation.both.optimistic.inverse_harmonic_mean_rank = 0.19787461016859723
Metric: validation.head.realistic.inverse_harmonic_mean_rank = 0.006072917953133583
Metric: validation.tail.realistic.inverse_harmonic_mean_rank = 0.01234837155789137
Metric: validation.both.realistic.inverse_harmonic_mean_rank = 0.009210644289851189
Metric: validation.head.pessimistic.inverse_harmonic_mean_rank = 0.0037807822523998613
Metric: validation.tail.pessimistic.inverse_harmonic_mean_rank = 0.011829872548229396
Metric: validation.both.pessimistic.inverse_harmonic_mean_rank = 0.007805327400314627
Metric: validation.head.optimistic.hits_at_1 = 0.37094972067039106
Metric: validation.tail.optimistic.hits_at_1 = 0.0
Metric: validation.both.optimistic.hits_at_1 = 0.18547486033519553
Me

In [15]:
torch.save(model,f"{model_name}/model.pth")
model = torch.load(f"{model_name}/model.pth")

In [16]:
print("Train error per epoch:")
df = pd.DataFrame(train_epoch)
print(df)
df.to_csv(f"{model_name}/{model_name}_train_error_per_epoch.csv")

Train error per epoch:
            0
0   12.901205
1   12.735873
2   12.634762
3   12.510794
4   12.349311
5   12.218953
6   12.101510
7   11.964477
8   11.835086
9   11.736677
10  11.566774
11  11.433897
12  11.290413
13  11.200306
14  11.065949
15  10.923658
16  10.819657
17  10.689016
18  10.542725
19  10.434666
20  10.313802
21  10.152822
22  10.057254


In [17]:
training_evaluation_start = time.time()
# train
print("Train error")
show_metrics(train_evaluator.evaluate(
        model=model,
        mapped_triples=dataset.transductive_training.mapped_triples,
        additional_filter_triples=[
        dataset.transductive_training.mapped_triples,
    ]
    ).to_dict(),model_name,'train_metrics')
training_evaluation_duration = time.time() - training_evaluation_start

Train error


Evaluating on cuda:0:   0%|          | 0.00/6.72k [00:00<?, ?triple/s]

head
                            optimistic  realistic  pessimistic
inverse_harmonic_mean_rank    0.677439   0.002254      0.00126
hits_at_1                     0.675893   0.000000      0.00000
hits_at_3                     0.676488   0.000000      0.00000
hits_at_5                     0.676488   0.000000      0.00000
hits_at_10                    0.676488   0.000000      0.00000
tail
                            optimistic  realistic  pessimistic
inverse_harmonic_mean_rank    0.008683   0.008333     0.008102
hits_at_1                     0.005208   0.005208     0.005208
hits_at_3                     0.006399   0.006101     0.006101
hits_at_5                     0.006994   0.006696     0.006101
hits_at_10                    0.015030   0.008631     0.006994
both
                            optimistic  realistic  pessimistic
inverse_harmonic_mean_rank    0.343061   0.005294     0.004681
hits_at_1                     0.340551   0.002604     0.002604
hits_at_3                     0.341443  

In [18]:
validation_evaluation_start = time.time()
# validation
print("Validation error")
show_metrics(valid_evaluator.evaluate(
        model=model,
        mapped_triples=dataset.inductive_validation.mapped_triples,
        additional_filter_triples=[
            # filtering of other positive triples
            dataset.inductive_validation.mapped_triples
        ],
    ).to_dict(),model_name,'validation_metrics')
validation_evaluation_duration = time.time() - validation_evaluation_start

Validation error


Evaluating on cuda:0:   0%|          | 0.00/895 [00:00<?, ?triple/s]

head
                            optimistic  realistic  pessimistic
inverse_harmonic_mean_rank    0.588027   0.006420     0.003703
hits_at_1                     0.584358   0.000000     0.000000
hits_at_3                     0.585475   0.000000     0.000000
hits_at_5                     0.585475   0.000000     0.000000
hits_at_10                    0.585475   0.007821     0.000000
tail
                            optimistic  realistic  pessimistic
inverse_harmonic_mean_rank    0.013511   0.012583     0.012113
hits_at_1                     0.000000   0.000000     0.000000
hits_at_3                     0.015642   0.012291     0.012291
hits_at_5                     0.027933   0.021229     0.017877
hits_at_10                    0.029050   0.029050     0.029050
both
                            optimistic  realistic  pessimistic
inverse_harmonic_mean_rank    0.300769   0.009502     0.007908
hits_at_1                     0.292179   0.000000     0.000000
hits_at_3                     0.300559  

In [19]:
testing_evaluation_start = time.time()
# result on the test set
print("Test error")
show_metrics(test_evaluator.evaluate(
        model=model,
        mapped_triples=dataset.inductive_testing.mapped_triples,
        additional_filter_triples=[
            # filtering of other positive triples
            dataset.inductive_validation.mapped_triples,
        ],
    ).to_dict(),model_name,'test_metrics')
testing_evaluation_duration = time.time() - testing_evaluation_start

Test error


Evaluating on cuda:0:   0%|          | 0.00/1.14k [00:00<?, ?triple/s]

head
                            optimistic  realistic  pessimistic
inverse_harmonic_mean_rank    0.566521   0.006970     0.004001
hits_at_1                     0.562390   0.000000     0.000000
hits_at_3                     0.562390   0.000000     0.000000
hits_at_5                     0.562390   0.000000     0.000000
hits_at_10                    0.562390   0.009666     0.000000
tail
                            optimistic  realistic  pessimistic
inverse_harmonic_mean_rank    0.014507   0.013669     0.013232
hits_at_1                     0.004394   0.004394     0.004394
hits_at_3                     0.014938   0.009666     0.009666
hits_at_5                     0.019332   0.016696     0.012302
hits_at_10                    0.023726   0.023726     0.023726
both
                            optimistic  realistic  pessimistic
inverse_harmonic_mean_rank    0.290514   0.010320     0.008617
hits_at_1                     0.283392   0.002197     0.002197
hits_at_3                     0.288664  

In [20]:
infodict = {}
infodict['device'] = model.device
infodict['parameters bytes'] = model.num_parameter_bytes
infodict['number parameters'] = model.num_parameters
infodict['training duration'] = training_duration
infodict['training evaluation duration'] = training_evaluation_duration
infodict['validation evaluation duration'] = validation_evaluation_duration
infodict['testing evaluation duration'] = testing_evaluation_duration
infodict["Operating system name"] = platform.system()
infodict["Operating system version"] = platform.release()
infodict["Processor architecture"] = platform.machine()
infodict["Python version"] = sys.version
infodict["Processor model name"] = cpuinfo.get_cpu_info()['brand_raw']
infodict['Number cpu cores'] = os.cpu_count()
infodict["Total physical memory"] = psutil.virtual_memory().total

In [21]:

output = subprocess.check_output(['nvidia-smi', '--query-gpu=name', '--format=csv'])
output = output.decode('utf-8')  # convert byte string to regular string

# split output into rows and remove header row
rows = output.strip().split('\n')[1:]

# extract GPU names from each row
gpu_names = []
for row in rows:
    name = row.strip()
    gpu_names.append(name)

infodict['GPU'] = gpu_names[0]


In [22]:
infodict['loss'] = NSSALoss
infodict['num_tokens'] = num_tokens
infodict['embedding_dim'] = embedding_dim
infodict['learning_rate'] = learning_rate
infodict['optimizer'] = Adam
infodict['num_epochs'] = num_epochs
infodict['patience'] = patience

In [23]:
info_df = pd.DataFrame(columns=['name','value'], data = infodict.items())
info_df.to_csv(f"{model_name}/{model_name}_information.csv")
print(info_df)

                              name  \
0                           device   
1                 parameters bytes   
2                number parameters   
3                training duration   
4     training evaluation duration   
5   validation evaluation duration   
6      testing evaluation duration   
7            Operating system name   
8         Operating system version   
9           Processor architecture   
10                  Python version   
11            Processor model name   
12                Number cpu cores   
13           Total physical memory   
14                             GPU   
15                            loss   
16                      num_tokens   
17                   embedding_dim   
18                   learning_rate   
19                       optimizer   
20                      num_epochs   
21                        patience   

                                                value  
0                                              cuda:0  
1            

In [24]:
def zip_folder(folder_path, output_path):
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                zipf.write(os.path.join(root, file))

folder_path = model_name
output_path = f'{model_name}.zip'

zip_folder(folder_path, output_path)