In [6]:
import rdflib
from rdflib import Namespace
from rdflib.namespace import DC, DCTERMS, DOAP, FOAF, SKOS, OWL, RDF, RDFS, VOID, XMLNS, XSD
import os
from os.path import exists
import wget
import urllib

def quote(id):
    return urllib.parse.quote(id, safe='@~():/')

In [7]:
train_path = r"cache\train.txt"
test_path = r"cache\test.txt"
valid_path = r"cache\valid.txt"

In [9]:
# https://everest.hds.utc.fr/lib/exe/fetch.php?media=en:wordnet-mlj12.tar.gz

import requests
import tarfile

if not os.path.exists('cache'):
    os.makedirs('cache')

if not exists("cache/wordnet-mlj12.tar.gz"):
    url = "https://everest.hds.utc.fr/lib/exe/fetch.php?media=en:wordnet-mlj12.tar.gz"
    response = requests.get(url, stream=True)
    file = tarfile.open(fileobj=response.raw, mode="r|gz")
    file.extractall(path="cache")

if not exists("cache/WN18RR.tar.gz"):
    url = "https://github.com/TimDettmers/ConvE/raw/master/WN18RR.tar.gz"
    wget.download(url, 'cache/WN18RR.tar.gz')
    file = tarfile.open('cache/WN18RR.tar.gz')
    file.extractall('./cache')
    file.close()



In [7]:
content = None
with open("./cache/wordnet-mlj12/wordnet-mlj12-definitions.txt") as definitions:
    content = definitions.readlines()
content = [x.strip() for x in content]

ai = Namespace("http://ai-strategies.org/kgc/")

g = rdflib.Graph()

g.bind("ai", ai)
g.bind("rdf", RDF)
g.bind("rdfs", RDFS)

for line in content:
    id, word, comment = line.split("\t")
    word = word.replace("__","").split("_")
    entity = " ".join(word[0:-2])
    entity_type = word[-2]

    g.add((
        ai.term(id),
        RDFS.label,
        rdflib.Literal(entity, datatype=XSD.string)

    ))

    g.add((
        ai.term(id),
        RDF.type,
        rdflib.Literal(entity_type, datatype=XSD.string)
    ))

    pos = {
        "NN": "n",
        "VB": "v",
        "JJ": "a",
        "RB": "r"
    }

    g.add((
        ai.term(id),
        ai.wwwresource,
        rdflib.Literal(f"http://wordnet-rdf.princeton.edu/pwn30/{id}-{pos[entity_type]}", datatype=XSD.string)
    ))

    g.add((
        ai.term(id),
        RDFS.comment,
        rdflib.Literal(comment, datatype=XSD.string)
    ))


In [8]:

relations = {
    "_hypernym": "has hypernym",
    "_derivationally_related_form": "has derivationally related form",
    "_instance_hypernym": "has instance hypernym",
    "_also_see": "also see",
    "_member_meronym": "is member of meronym",
    "_synset_domain_topic_of": "is a synset domain topic of",
    "_has_part": "has part",
    "_member_of_domain_usage": "is member of domain usage",
    "_member_of_domain_region": "is member of domain region",
    "_verb_group": "part of same verb group as",
    "_similar_to": "is similar to"
}

for relation, label in relations.items():
    g.add((
        ai.term(relation),
        RDFS.label,
        rdflib.Literal(label, datatype=XSD.string)
    ))


In [9]:
g.serialize(os.path.abspath(r"workspace/graphs/wn18rr.ttl"),format="turtle")

In [10]:

outfile = open(r"workspace/graphs/wn18rr.ttl", 'a')

def read_set(path, typ):
    content = None
    with open(path) as f:
        content = f.readlines()
    content = [x.strip() for x in content]
    for line in content:
        head,rel,tail = line.split("\t")
        outfile.write(f"<<<http://ai-strategies.org/kgc/{head}> <http://ai-strategies.org/kgc/{rel}> <http://ai-strategies.org/kgc/{tail}>>> ai:split ai:{typ} . " + "\n")

read_set(train_path, 'train')
read_set(test_path, 'test')
read_set(valid_path, 'valid')

outfile.close()