In [23]:
import pandas as pd

from pyrdf2vec import RDF2VecTransformer
from pyrdf2vec.embedders import Word2Vec
from pyrdf2vec.graphs import KG
from pyrdf2vec.walkers import RandomWalker
import rdflib
from pykeen.pipeline import pipeline

In [18]:
# Read a CSV file containing the entities we want to classify.
data = pd.read_csv("../data/trainingSet.tsv", sep="\t")
entities = [entity for entity in data["person"]]
print(entities)

['http://www.aifb.uni-karlsruhe.de/Personen/viewPersonOWL/id1909instance', 'http://www.aifb.uni-karlsruhe.de/Personen/viewPersonOWL/id2040instance', 'http://www.aifb.uni-karlsruhe.de/Personen/viewPersonOWL/id46instance', 'http://www.aifb.uni-karlsruhe.de/Personen/viewPersonOWL/id3instance', 'http://www.aifb.uni-karlsruhe.de/Personen/viewPersonOWL/id1842instance', 'http://www.aifb.uni-karlsruhe.de/Personen/viewPersonOWL/id1915instance', 'http://www.aifb.uni-karlsruhe.de/Personen/viewPersonOWL/id1992instance', 'http://www.aifb.uni-karlsruhe.de/Personen/viewPersonOWL/id1966instance', 'http://www.aifb.uni-karlsruhe.de/Personen/viewPersonOWL/id2039instance', 'http://www.aifb.uni-karlsruhe.de/Personen/viewPersonOWL/id2065instance', 'http://www.aifb.uni-karlsruhe.de/Personen/viewPersonOWL/id2068instance', 'http://www.aifb.uni-karlsruhe.de/Personen/viewPersonOWL/id2058instance', 'http://www.aifb.uni-karlsruhe.de/Personen/viewPersonOWL/id67instance', 'http://www.aifb.uni-karlsruhe.de/Personen/v

In [19]:
from pyrdf2vec.graphs import KG

# Defined the MUTAG KG, as well as a set of predicates to exclude from
# this KG and a list of predicate chains to get the literals.
knowledge_graph = KG(
    "../data/aifb_oaff_complete.nt"
    # skip_predicates={"http://dl-learner.org/carcinogenesis#isMutagenic"},
    # literals=[
    #     [
    #         "http://dl-learner.org/carcinogenesis#hasBond",
    #         "http://dl-learner.org/carcinogenesis#inBond",
    #     ],
    #     [
    #         "http://dl-learner.org/carcinogenesis#hasAtom",
    #         "http://dl-learner.org/carcinogenesis#charge",
    #     ],
    # ],
)

In [20]:
knowledge_graph

KG(location='../data/aifb_oaff_complete.nt', skip_predicates=set(), literals=[], fmt=None, mul_req=False, skip_verify=False, cache=TTLCache([], maxsize=1024, currsize=0), _is_remote=False)

In [21]:
# Create our transformer, setting the embedding & walking strategy.
transformer = RDF2VecTransformer(
    Word2Vec(epochs=10),
    walkers=[RandomWalker(4, 10, with_reverse=False, n_jobs=2)],
    # verbose=1
)
# Get our embeddings.
embeddings, literals = transformer.fit_transform(knowledge_graph, entities)
print(embeddings)

[array([-0.02390279,  0.02940556,  0.00421787,  0.01701239,  0.00564786,
       -0.05779158,  0.0284382 ,  0.07839274, -0.011943  , -0.03788786,
        0.01961041, -0.05071007, -0.01616843,  0.02802508, -0.00667117,
       -0.00779884,  0.02588021, -0.00675174, -0.01351   , -0.07503898,
        0.01477689,  0.00304774,  0.03286965, -0.0082777 ,  0.00723569,
        0.0005389 , -0.01414679, -0.00736145, -0.03273418,  0.01169373,
        0.0358769 , -0.00637451,  0.01335901, -0.04826278, -0.01007514,
        0.02339797,  0.03100892, -0.00484382, -0.01532144, -0.02416749,
        0.01658948, -0.03028548, -0.0150673 , -0.0003292 ,  0.01122803,
       -0.01230868, -0.01030884, -0.00759598,  0.0177003 ,  0.03787038,
        0.00145429, -0.01193773, -0.01660687,  0.01216249, -0.01045402,
        0.00623794,  0.02423731, -0.01144106, -0.04445395,  0.01215708,
        0.00162709, -0.00890543,  0.023516  ,  0.00422869, -0.02925676,
        0.02632124,  0.02031859,  0.00826622, -0.03391296,  0.0

In [22]:
print(literals)

[]


In [27]:
pipeline_result = pipeline(
    #dataset='../data/aifb_oaff_complete.nt',
    model='TransE',
    training='../data/trainingSet.tsv',
    testing='../data/testSet.tsv',
)
pipeline_result.save_to_directory('../data/aifb_transe.pkl')

INFO:pykeen.pipeline.api:Using device: None
Training epochs on cpu: 100%|██████████| 5/5 [00:02<00:00,  2.42epoch/s, loss=1.09, prev_loss=1.06]
INFO:pykeen.evaluation.evaluator:Currently automatic memory optimization only supports GPUs, but you're using a CPU. Therefore, the batch_size will be set to the default value.
INFO:pykeen.evaluation.evaluator:No evaluation batch_size provided. Setting batch_size to '32'.
Evaluating on cpu: 100%|██████████| 1.00/1.00 [00:00<00:00, 5.52triple/s]
INFO:pykeen.evaluation.evaluator:Evaluation took 0.21s seconds
INFO:pykeen.triples.triples_factory:Stored TriplesFactory(num_entities=146, num_relations=141, create_inverse_triples=False, num_triples=141, path="C:\Users\luisa\Projekte\Masterthesis\AIFB\data\trainingSet.tsv") to file:///C:/Users/luisa/Projekte/Masterthesis/AIFB/data/aifb_transe.pkl/training_triples
INFO:pykeen.pipeline.api:Saved to directory: file:///C:/Users/luisa/Projekte/Masterthesis/AIFB/data/aifb_transe.pkl
