In [1]:
import pandas as pd

from pyrdf2vec import RDF2VecTransformer
from pyrdf2vec.embedders import Word2Vec
from pyrdf2vec.graphs import KG
from pyrdf2vec.walkers import RandomWalker
from pyrdf2vec.graphs import Vertex

from rdflib import Graph
import rdflib

In [3]:
knowledge_graph = KG("../DATA/Query_2020_sortiert_ohneAutor__mitTitel_editiert.ttl", skip_predicates= {"http://prismstandard.org/namespaces/basic/2.0/publicationDate", "http://purl.org/spar/fabio/hasPublicationYear"}, literals=[
    ["http://www.w3.org/2004/02/skos/core#prefLabel"],
])


In [4]:
for subj, pred, obj in rdflib.Graph().parse(
    "../DATA/Query_2020_sortiert_ohneAutor__mitTitel_editiert.ttl", format="ttl"
):
    #editiert deswegen, weil ich die eine Work aus ttl-Datei gelöscht habe, weil sie nicht in CSV war
    #Neuste Version 13.12.22, habe noch die Konzepttitel hinzugefügt (die ttl-Datei von davor ist aber immernoch vorhanden)
    subj = Vertex(str(subj))
    obj = Vertex(str(obj))
    pred = Vertex(str(pred), predicate=True, vprev=subj, vnext=obj)
    knowledge_graph.add_walk(obj, pred, subj)

    #Ausgabe alles:
    #print(subj.name, pred.name, obj.name)

#Ausgabe:
#print(subj.name, pred.name, obj.name)

In [5]:
data = pd.read_csv("../DATA/CSV_2020_sortiert.csv", sep=",")
entities = [entity for entity in data["relatedConceptsWork"]]
entities_distinct = list(dict.fromkeys(entities))
entities_distinct

['https://semopenalex.org/concept/C71924100',
 'https://semopenalex.org/concept/C126322002',
 'https://semopenalex.org/concept/C121608353',
 'https://semopenalex.org/concept/C98274493',
 'https://semopenalex.org/concept/C2983331546',
 'https://semopenalex.org/concept/C2778250585',
 'https://semopenalex.org/concept/C2780234812',
 'https://semopenalex.org/concept/C126838900',
 'https://semopenalex.org/concept/C192562407',
 'https://semopenalex.org/concept/C171250308',
 'https://semopenalex.org/concept/C136229726',
 'https://semopenalex.org/concept/C81288441',
 'https://semopenalex.org/concept/C143753070',
 'https://semopenalex.org/concept/C19527891',
 'https://semopenalex.org/concept/C2779820397',
 'https://semopenalex.org/concept/C2779949491',
 'https://semopenalex.org/concept/C3019816032',
 'https://semopenalex.org/concept/C513720949',
 'https://semopenalex.org/concept/C182606246',
 'https://semopenalex.org/concept/C86803240',
 'https://semopenalex.org/concept/C18903297',
 'https://sem

In [6]:
transformer = RDF2VecTransformer(
    Word2Vec(epochs=10),
    walkers=[RandomWalker(4, 10, with_reverse=False)],
    verbose=1
)

In [7]:
embeddings, literals = transformer.fit_transform(knowledge_graph, entities_distinct)

100%|██████████| 2003/2003 [08:55<00:00,  3.74it/s]


Extracted 19845 walks for 2003 entities (537.2667s)
Fitted 19845 walks (0.7333s)


100%|██████████| 2003/2003 [00:00<00:00, 5831.04it/s]

Extracted 2003 literals for 2003 entities (0.3452s)





In [7]:
embeddings

[array([-7.0613851e-03,  3.1045740e-02,  1.3899602e-02,  2.8346451e-02,
        -2.5895799e-02, -5.3677380e-02,  3.2124545e-02,  1.8032316e-02,
        -3.1993110e-03, -7.7744280e-03,  3.5371304e-02, -9.0992302e-03,
        -3.6969952e-02,  2.6642649e-02, -1.5584906e-02, -2.2991052e-02,
        -9.5244134e-03, -7.9295374e-03, -2.1679984e-02, -5.1287297e-02,
         8.9530386e-03,  3.2559741e-02,  3.6289573e-02, -9.4196834e-03,
         2.2807471e-02, -3.7524778e-02, -1.2806116e-02, -2.3576953e-03,
        -6.4958051e-02, -1.6914275e-02,  3.9585205e-03, -2.4864856e-02,
         2.7333105e-02, -3.5430413e-02, -1.2355444e-02, -2.2161780e-03,
         2.4259787e-02, -2.0871380e-02,  1.0877839e-02, -5.2840926e-02,
        -4.8348054e-02, -1.3689287e-02, -5.2059907e-02, -1.4677966e-02,
         1.5398864e-02, -1.5886402e-02, -3.9751049e-02,  3.2056618e-02,
         2.7665745e-02,  5.2853283e-02, -1.7125741e-02, -1.4777128e-02,
        -1.6441323e-02, -8.7502571e-03,  1.0340982e-02, -3.12018

In [8]:
literals

[[('https://semopenalex.org/work/W2991352583',
   'https://semopenalex.org/work/W3020532242',
   'https://semopenalex.org/work/W3120037813',
   'https://semopenalex.org/work/W3005284711',
   'https://semopenalex.org/work/W3036458784',
   'https://semopenalex.org/work/W3153615334',
   'https://semopenalex.org/work/W3111211578',
   'https://semopenalex.org/work/W3112321047',
   'https://semopenalex.org/work/W3022953708',
   'https://semopenalex.org/work/W2991598862',
   'https://semopenalex.org/work/W3047672403',
   'https://semopenalex.org/work/W3084346496',
   'https://semopenalex.org/work/W3090676142',
   'https://semopenalex.org/work/W3043550608',
   'https://semopenalex.org/work/W3010510895',
   'https://semopenalex.org/work/W3016672077',
   'https://semopenalex.org/work/W2935829974',
   'https://semopenalex.org/work/W3096260105',
   'https://semopenalex.org/work/W3015547080',
   'https://semopenalex.org/work/W3023728791',
   'https://semopenalex.org/work/W3032533822',
   'https://s