In [1]:
import networkx as nx
from pykeen.pipeline import pipeline
from pykeen.datasets import Nations, get_dataset
import matplotlib.pyplot as plt
import matplotlib as mpl
import torch
from pykeen.models import predict
import json


In [4]:
from pykeen.hpo import hpo_pipeline
import os
from pykeen.triples import TriplesFactory


NATIONS_TRAIN_PATH = "train"
NATIONS_TEST_PATH = "test"
NATIONS_VALIDATE_PATH = "validation"


training = TriplesFactory.from_path(NATIONS_TRAIN_PATH)
testing = TriplesFactory.from_path(
    NATIONS_TEST_PATH,
    entity_to_id=training.entity_to_id,
    relation_to_id=training.relation_to_id,
)
validation = TriplesFactory.from_path(
    NATIONS_VALIDATE_PATH,
    entity_to_id=training.entity_to_id,
    relation_to_id=training.relation_to_id,
)
pipeline_result = pipeline(
#    n_trials=3,  # you probably want more than this
    training=training,
    testing=testing,
    validation=validation,
    model='TransE',
    epochs=5,  # short epochs for testing - you should go higher
    random_seed = 1234
)
pipeline_result.save_to_directory(os.getcwd()+'/prova_transE')

AttributeError: type object 'TriplesFactory' has no attribute 'from_directory_binary'

In [None]:
help(pipeline_result)

In [None]:
pipeline_result.metric_results.to_dict()

In [None]:
pipeline_result.plot_losses()

In [None]:
model = pipeline_result.model
# Predict tails
predicted_tails_df = predict.get_prediction_df(
    model=model, head_label = "user-service", relation_label="get_user_id_server", triples_factory=pipeline_result.training,
)


predicted_tails_df

In [None]:
# Predict relations
predicted_relations_df = predict.get_prediction_df(
    model=model, head_label="user-service", tail_label="user-service", triples_factory=pipeline_result.training,
)
predicted_relations_df

In [None]:
# Predict heads
predicted_heads_df = predict.get_prediction_df(
    model=model, relation_label="get_user_id_server", tail_label="user-service", triples_factory=pipeline_result.training
)
predicted_heads_df

In [None]:
# Score all triples (memory intensive)
predictions_df = predict.get_all_prediction_df(model, triples_factory=pipeline_result.training)
predictions_df

In [None]:
# Score top K triples (computationally expensive)
top_k_predictions_df = predict.get_all_prediction_df(model, k=10, triples_factory=pipeline_result.training)
top_k_predictions_df

In [None]:
# Score a given list of triples
score_df = predict.predict_triples_df(
    model=model,
    triples=[('user-service', 'user_mmc_get_user_id_client', 'user-service'), ('user-service', 'get_user_id_server', 'social-graph-service')],
    triples_factory=pipeline_result.training,
)
score_df

## Evaluation
If the results are the same, it means that the traces are the same

In [None]:
from pykeen.evaluation import evaluate, ClassificationEvaluator
results_training = evaluate(model=model,mapped_triples=pipeline_result.training.mapped_triples,evaluator = ClassificationEvaluator(),mode=None,additional_filter_triples=[pipeline_result.training.mapped_triples])
results_training.to_dict()

In [None]:
results_validation = evaluate(model=model,mapped_triples=validation.mapped_triples,evaluator = ClassificationEvaluator(),mode=None,additional_filter_triples=[pipeline_result.training.mapped_triples])
results_validation.to_dict()

In [None]:
results_testing = evaluate(model=model,mapped_triples=testing.mapped_triples,evaluator = ClassificationEvaluator(),mode=None,additional_filter_triples=[pipeline_result.training.mapped_triples])
results_testing.to_dict()