In [1]:
%%capture
!pip uninstall -y python-louvain community
!pip install aiohttp nest-asyncio rdflib python-louvain
!git clone https://github.com/AlexGerry/pyrdf2vec-for-graph-embeddings
!pip install ./pyRDF2Vec --use-feature=in-tree-build

In [None]:
from pyrdf2vec import RDF2VecTransformer
from pyrdf2vec.graphs import KG
from pyrdf2vec import walkers

In [None]:
# set start entity for random walks
entities_dbpedia = [
    "http://dbpedia.org/resource/Italy"
]

entities_wikidata = [
    'http://www.wikidata.org/entity/Q38'  # Italy
]


In [None]:
# Define our knowledge graph (here: DBPedia SPARQL endpoint).
knowledge_graph_dbpedia = KG(
    "https://dbpedia.org/sparql",
    literals=[["http://dbpedia.org/ontology/wikiPageWikiLink"]],
    mul_req=True
)

# Define our knowledge graph for wikidata (here: Wikidata SPARQL endpoint).
knowledge_graph_wikidata = KG(
    "https://query.wikidata.org/sparql",
    query_string="query",
    literals=[["http://www.w3.org/2004/02/skos/core#prefLabel"]],
    mul_req=True
)


In [None]:
# With with_reverse=True random walk starts from the provided entity (entity -> ... -> ... *max_depth*)
# Then for each walk starts another random walk (each with max_depth=... and max_walks=...) but backwords (*max_depth* ... -> ... -> entity)
# So max_walks*max_walks walks (*max_depth* ... -> ... -> entity -> ... -> ... *max_depth*) are produced

walker = walkers.RandomWalker(
    max_depth=3, max_walks=10, with_reverse=True, md5_bytes=None)

# Create our transformer, setting the embedding & walking strategy.
transformer = RDF2VecTransformer(
    walkers=[walker],
    verbose=1
)
# transformer and walker are the same for Dbpedia and wikidata


In [None]:
# extraction of random walks for dbpedia
walks_dbpedia = transformer.get_walks(knowledge_graph_dbpedia, entities_dbpedia)

In [None]:
# ectraction of random walks for wikidata
walks_wikidata = transformer.get_walks(knowledge_graph_wikidata, entities_wikidata)

In [None]:
# save dbpedia walks in a text file, not necessary, we already put in git repository our walks
resource = [[[i.replace("http://dbpedia.org/resource/", "") for i in j if i.startswith(
    "http://dbpedia.org/resource/")] for j in k] for k in walks_dbpedia]
with open('dbpedia_walks_final.txt', 'wt') as f:
    for w in resource[0]:
        f.write(' '.join(w) + '\n')


In [None]:
# save wikidata walks in a text file, not necessary, we already put in git repository our walks
resource = [[[i.replace("http://www.wikidata.org/entity/", "") for i in j if i.startswith(
    "http://www.wikidata.org/entity/")] for j in k] for k in walks_wikidata]
with open('wikidata_walks_final.txt', 'wt') as f:
    for w in resource[0]:
        f.write(' '.join(w) + '\n')
