In [8]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [9]:
import os
import json

**ZSL DATA**
---
load KG triples

In [10]:
def read_triples(dataname, type):
  """
  Function: Load triples for training, validation and testing

  Parameter
  --------------------------
  dataname: str
    Dataset Name.
  type: str
    type, including "train (for training), test (for testing), and dev (for validation)"

  Returns
  --------------------------
    total_triples
  Examples
  --------------------------
  """
  if dataname == "NELL":
    dir_path = "/content/drive/MyDrive/ISWC_demo/ZS_KGC/data/NELL/datasplit"
  elif dataname == "Wiki":
    dir_path = "/content/drive/MyDrive/ISWC_demo/ZS_KGC/data/Wiki/datasplit"
  if type == "train":
    path = os.path.join(dir_path, 'train_tasks.json')
  elif type == "dev":
    path = os.path.join(dir_path, 'dev_tasks.json')
  elif type == "test":
    path = os.path.join(dir_path, 'test_tasks.json')
  
  json_file = json.load(open(path))
  total_triples = 0
  for key,value in json_file.items():
    total_triples += len(value)
  
  return total_triples

In [11]:
def read_triples_per_relation(dataname, type, relation_name):
  """
  Function: Load triples of one specified relation

  Parameter
  --------------------------
  dataname: str
    Dataset Name.
  relation_name: str
    relation name

  Returns
  --------------------------
    total_per_triples, detail_triple
  Examples
  --------------------------
  """
  if dataname == "NELL":
    dir_path = "/content/drive/MyDrive/ISWC_demo/ZS_KGC/data/NELL/datasplit"
  elif dataname == "Wiki":
    dir_path = "/content/drive/MyDrive/ISWC_demo/ZS_KGC/data/Wiki/datasplit"
  if type == "train":
    path = os.path.join(dir_path, 'train_tasks.json')
  elif type == "dev":
    path = os.path.join(dir_path, 'dev_tasks.json')
  elif type == "test":
    path = os.path.join(dir_path, 'test_tasks.json')
  
  json_file = json.load(open(path))
  total_per_triples = 0
  for key,value in json_file.items():
    if key == relation_name:
      total_per_triples = len(value)
      detail_triple = list(value)
  
  return total_per_triples, detail_triple

In [12]:
dataname = "NELL" # "NELL" OR "Wiki"
total_train_triples = read_triples(dataname, "train")
total_dev_triples = read_triples(dataname, "dev")
total_test_triples = read_triples(dataname, "test")
print("total_train_triples in {}:{}".format(dataname, total_train_triples))
print("total_dev_triples in {}:{}".format(dataname, total_dev_triples))
print("total_test_triples in {}:{}".format(dataname, total_test_triples))

total_train_triples in NELL:181053
total_dev_triples in NELL:1856
total_test_triples in NELL:5483


In [13]:
dataname = "NELL" # "NELL" OR "Wiki"
data_type = "train" # "train" or "dev" or "test"
relation_name = "concept:agriculturalproductcutintogeometricshape" # relation name should be Under relevant data
total_per_triples, detail_triple = read_triples_per_relation(dataname, data_type, relation_name)
print("total_per_triples in {} about {}:{}".format(dataname, relation_name, total_per_triples))
print("detail triples in {} about {}:{}".format(dataname, relation_name, detail_triple))

total_per_triples in NELL about concept:agriculturalproductcutintogeometricshape:173
detail triples in NELL about concept:agriculturalproductcutintogeometricshape:[['concept:agriculturalproduct:mango', 'concept:agriculturalproductcutintogeometricshape', 'concept:geometricshape:cubes'], ['concept:agriculturalproduct:value', 'concept:agriculturalproductcutintogeometricshape', 'concept:geometricshape:percent'], ['concept:vegetable:cucumber', 'concept:agriculturalproductcutintogeometricshape', 'concept:geometricshape:dice'], ['concept:vegetable:cucumber', 'concept:agriculturalproductcutintogeometricshape', 'concept:geometricshape:slice'], ['concept:vegetable:pepper', 'concept:agriculturalproductcutintogeometricshape', 'concept:geometricshape:squares'], ['concept:vegetable:pepper', 'concept:agriculturalproductcutintogeometricshape', 'concept:geometricshape:dice'], ['concept:agriculturalproduct:peel', 'concept:agriculturalproductcutintogeometricshape', 'concept:geometricshape:cubes'], ['conc

**Onto DATA**
---
load Ontological Schema triples

In [14]:
!pip install rdflib

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting rdflib
  Downloading rdflib-6.2.0-py3-none-any.whl (500 kB)
[K     |████████████████████████████████| 500 kB 33.4 MB/s 
Collecting isodate
  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
[K     |████████████████████████████████| 41 kB 527 kB/s 
Installing collected packages: isodate, rdflib
Successfully installed isodate-0.6.1 rdflib-6.2.0


In [16]:
def load_onto_triples(dataname, triple_type="all", read_format="list"):
  """
  Function: load Ontological Schemas (mapped to RDF graphs) with different type

  Parameter
  --------------------------
  dataname: str
    Dataset Name.
  triple_type: str
    Triple Type, including "
    rdfs (short for triples of RDFS semantics, i.e., rdfs:subPropertyOf, rdfs:domain, rdfs:range, rdfs:subClassOf),
    literal (short for textual information, i.e., textual descriptions and literal names for KG relation and entity type),
    owl (short for triples of OWL semantics)
    ",
    the default value "all" means the triples of all the above type
  read_format: str
    Return Format, including "triple list, xml, turtle"

  Returns
  --------------------------
    List or Str (for xml and turtle)
  Examples
  --------------------------
  """
  data_dir = "/content/drive/MyDrive/ISWC_demo/ZS_KGC/data"
  data_path = os.path.join(data_dir, dataname, "Onto_triples")

  if read_format in ["turtle", "xml"]:
    from rdflib import Graph, Literal, RDF, URIRef, Namespace #basic RDF handling
    from rdflib.namespace import RDF, RDFS, OWL, XSD #most common namespaces
    import urllib.parse #for parsing strings to URI's
    if dataname == 'NELL':
      namespace = 'http://www.semanticweb.org/ontologies/NELL#'
    elif dataname == 'Wiki':
      namespace = 'http://www.semanticweb.org/ontologies/Wikidata#'
    namespace = Namespace(namespace)
    g = Graph()

  def readTriples(file_name):
    triples, entities, relations = [], [], []
    file = open(file_name, 'r')
    try:
        for line in file:
            lines = line[:-1].split('\t')
            h, r, t = lines[0], lines[1], lines[2]
            entities.append(h)
            entities.append(t)
            relations.append(r)
            triples.append((h, r, t))
    finally:
        file.close()
    return list(set(entities)), list(set(relations)), triples
  
  results = list()
  res_ents, res_rels = [], []
  if triple_type == 'all' or triple_type == 'rdfs':
    # load triples of RDFS semantics
    ents, rels, triples = readTriples(os.path.join(data_path, 'rdfs_triples.txt'))
    results.extend(triples)
    res_ents.extend(ents)
    res_rels.extend(rels)
    print("RDFS #concepts: %d, #meta-relations: %d, #triples:%d"%(len(ents), len(rels), len(triples)))


    if read_format in ["turtle", "xml"]:
      for (h, r, t) in triples:
        h_content = h.split(':')[1]
        t_content = t.split(':')[1]
        if r == 'rdfs:subPropertyOf':
            g.add((URIRef(namespace + h_content), RDFS.subPropertyOf, URIRef(namespace + t_content)))
        elif r == 'rdfs:subClassOf':
            g.add((URIRef(namespace + h_content), RDFS.subClassOf, URIRef(namespace + t_content)))
        elif r == 'rdfs:domain':
            g.add((URIRef(namespace + h_content), RDFS.domain, URIRef(namespace + t_content)))
        elif r == 'rdfs:range':
            g.add((URIRef(namespace + h_content), RDFS.range, URIRef(namespace + t_content)))   

  if triple_type == 'all' or triple_type == 'literal':
    # load triples of textual information
    ents, rels, triples = readTriples(os.path.join(data_path, 'literals.txt'))
    results.extend(triples)
    res_rels.extend(rels)
    print("LITERAL #concepts: %d, #meta-relations: %d, #triples:%d"%(len(ents), len(rels), len(triples)))
    if read_format in ["turtle", "xml"]:
      for (h, r, t) in triples:
        h_content = h.split(':')[1]
        if r == 'rdfs:label':
            g.add((URIRef(namespace+h_content), RDFS.label, Literal(t, datatype=XSD.string)))
        elif r == 'rdfs:comment':
            g.add((URIRef(namespace+h_content), RDFS.comment, Literal(t, datatype=XSD.string)))

  if triple_type == 'all' or triple_type == 'owl':
    # load triples of OWL semantics
    owl_triples = list()
    _, _, triples = readTriples(os.path.join(data_path, 'owl1.txt'))
    results.extend(triples)
    owl_triples.extend(triples)
    if read_format in ["turtle", "xml"]:
      for (h, r, t) in triples:
          h_content = h.split(':')[1]
          t_content = t.split(':')[1]
          if t_content == 'SymmetricProperty':
              g.add((URIRef(namespace + h_content), RDF.type, OWL.SymmetricProperty))
          elif t_content == 'AsymmetricProperty':
              g.add((URIRef(namespace + h_content), RDF.type, OWL.AsymmetricProperty))
          elif t_content == 'ReflexiveProperty':
              g.add((URIRef(namespace + h_content), RDF.type, OWL.ReflexiveProperty))
          elif t_content == 'IrreflexiveProperty':
              g.add((URIRef(namespace + h_content), RDF.type, OWL.IrreflexiveProperty))
          elif t_content == 'FunctionalProperty':
              g.add((URIRef(namespace + h_content), RDF.type, OWL.FunctionalProperty))
          elif t_content == 'InverseFunctionalProperty':
              g.add((URIRef(namespace + h_content), RDF.type, OWL.InverseFunctionalProperty))
  
    def save_composition(r1, r2, r3, y1, y2):
      triples = list()
      triples.append((r2, 'rdf:first', y2))
      triples.append(('rdf:nil', 'rdf:rest', y2))
      triples.append((y2, 'rdf:rest', y1))
      triples.append((r1, 'rdf:first', y1))
      triples.append((y1, 'owl:propertyChainAxiom', r3))
      if read_format in ["turtle", "xml"]:
        r1 = r1.split(':')[1]
        r2 = r2.split(':')[1]
        r3 = r3.split(':')[1]
        g.add((URIRef(namespace + r2), RDF.first, URIRef(namespace + y2)))
        g.add((RDF.nil, RDF.rest, URIRef(namespace + y2)))
        g.add((URIRef(namespace + y2), RDF.rest, URIRef(namespace + y1)))
        g.add((URIRef(namespace + r1), RDF.first, URIRef(namespace + y1)))
        g.add((URIRef(namespace + y1), OWL.propertyChainAxiom, URIRef(namespace + r3)))
      return triples

    _, _, triples = readTriples(os.path.join(data_path, 'owl2_composition.txt'))
    index = 0
    all_composition_triples = list()
    for (r1, r2, r3) in triples:
        y1 = '_:y'+str(index+1)
        y2 = '_:y' + str(index+2)
        composition_triples = save_composition(r1, r2, r3, y1, y2)
        index += 2
        all_composition_triples.extend(composition_triples)



    results.extend(all_composition_triples)
    owl_triples.extend(all_composition_triples)

    print("OWL #triples: %d"%(len(owl_triples)))
 

  if read_format in ["turtle", "xml"]:
    g.bind("owl", OWL)
    g.bind(dataname.lower(), namespace)
    if dataname == 'Wiki':
        g.bind('wikidata', namespace)
    return g.serialize(format=read_format)
  else:
    return results

In [17]:
results = load_onto_triples("NELL", triple_type="owl")

OWL #triples: 214


In [18]:
results = load_onto_triples("Wiki", triple_type="literal", read_format="turtle")

LITERAL #concepts: 5591, #meta-relations: 2, #triples:3808


In [19]:
print(results)

@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix wikidata: <http://www.semanticweb.org/ontologies/Wikidata#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

wikidata:P1000 rdfs:label "record held"^^xsd:string ;
    rdfs:comment "notable record achieved by a person or entity, include qualifiers for dates held"^^xsd:string .

wikidata:P1001 rdfs:label "applies to jurisdiction"^^xsd:string ;
    rdfs:comment "the item (institution, law, public office, public register...) or statement belongs to or has power over or applies to the value (a territorial jurisdiction: a country, state, municipality, ...)"^^xsd:string .

wikidata:P1002 rdfs:label "engine configuration"^^xsd:string ;
    rdfs:comment "configuration of an engine's cylinders"^^xsd:string .

wikidata:P101 rdfs:label "field of work"^^xsd:string ;
    rdfs:comment "specialization of a person or organization; see P106 for the occupation"^^xsd:string .

wikidata:P1018 rdfs:label "language regulatory body"^^xsd:str