In [186]:
# Construction of dataset

import os, itertools
from xml.dom import minidom
from collections import Counter


alignment_folder = "reference-alignment/"

# Load reference alignments 
def load_alignments(folder):
    alignments = []
    for f in os.listdir(folder):
        doc = minidom.parse(folder + f)
        ls = list(zip(doc.getElementsByTagName('entity1'), doc.getElementsByTagName('entity2')))
        alignments.extend([(a.getAttribute('rdf:resource'), b.getAttribute('rdf:resource')) for (a,b) in ls])
    return alignments
        
reference_alignments = load_alignments(alignment_folder)

In [182]:

class Ontology():
    def __init__(self, ontology):
        self.ontology = ontology
        self.ontology_obj = minidom.parse(ontology)
        self.root = self.ontology_obj.documentElement
        self.object_prop = "http://www.w3.org/2002/07/owl#ObjectProperty"
        self.data_prop = "http://www.w3.org/2002/07/owl#DatatypeProperty"
    
    def get_child_node(self, element, tag):
        return [e for e in element._get_childNodes() if type(e)==minidom.Element and e._get_tagName() == tag]
        
    def has_attribute_value(self, element, attribute, value):
        return True if element.getAttribute(attribute).split("#")[-1] == value else False
    
    def get_triples(self):
        obj_props = self.parse_object_properties()
        all_triples = []
        for prop in obj_props:
            domain_children = self.get_child_node(prop, "rdfs:domain")
            range_children = self.get_child_node(prop, "rdfs:range")
            domain_prop = self.filter_null([self.extract_ID(el) for el in domain_children])
            range_prop = self.filter_null([self.extract_ID(el) for el in range_children])
            if not domain_children or not range_children:
                continue
            if not domain_prop:
                domain_prop = self.filter_null([self.extract_ID(el) for el in domain_children[0].getElementsByTagName("owl:Class")])
            if not range_prop:
                range_prop = self.filter_null([self.extract_ID(el) for el in range_children[0].getElementsByTagName("owl:Class")])
            if domain_prop and range_prop:
                all_triples.extend([(el[0], el[1], self.extract_ID(prop)) for el in list(itertools.product(domain_prop, range_prop))])
        return list(set(all_triples))
        
    
    def filter_null(self, data):
        return [el for el in data if el]
    
    def extract_ID(self, element):
        element_id = element.getAttribute("rdf:ID") or element.getAttribute("rdf:resource") or element.getAttribute("rdf:about")
        return element_id.split("#")[-1]
    
    def parse_classes(self):
        return self.root.getElementsByTagName("owl:Class")
    
    def classes(self):
        classes = [self.extract_ID(el) for el in self.parse_classes()]
        return list(set(self.filter_null(classes)))

    def parse_object_properties(self):
        obj_properties = [el for el in self.get_child_node(self.root, 'owl:ObjectProperty')]
        fn_obj_properties = [el for el in self.get_child_node(self.root, 'owl:FunctionalProperty') if el]
        fn_obj_properties = [el for el in fn_obj_properties if type(el)==minidom.Element and 
            self.has_attribute_value(self.get_child_node(el, "rdf:type")[0], "rdf:resource", "ObjectProperty")]
        inv_fn_obj_properties = [el for el in self.get_child_node(self.root, 'owl:InverseFunctionalProperty') if el]
        inv_fn_obj_properties = [el for el in inv_fn_obj_properties if type(el)==minidom.Element and 
            self.has_attribute_value(self.get_child_node(el, "rdf:type")[0], "rdf:resource", "ObjectProperty")]
        return obj_properties + fn_obj_properties + inv_fn_obj_properties
    
    def object_properties(self):
        obj_props = [self.extract_ID(el) for el in self.parse_object_properties()]
        return list(set(self.filter_null(obj_props)))

    def load_ontology(self):    
        onto = get_ontology(self.ontology).load()
        return onto



In [194]:

flatten = lambda l: [item for sublist in l for item in sublist]
ontologies_in_alignment = [l.split(".")[0].split("-") for l in os.listdir("reference-alignment/")]

def generate(ontology, concept_prefix, id_prefix=[0,0]):
    ont = Ontology(ontology)
    classes_dict = {concept_prefix+"#"+elem : str(id_prefix[0] + i) for i,elem in enumerate(list(set(ont.classes())))} 
    
    prop_dict = {concept_prefix+"#"+elem : str(id_prefix[1] + i) for i,elem in enumerate(list(set(ont.object_properties())))} 
    triplets = ["\t".join((classes_dict[concept_prefix+"#"+el[0]], classes_dict[concept_prefix+"#"+el[1]], prop_dict[concept_prefix+"#"+el[2]]))
                for el in list(set(ont.get_triples()))]
    return classes_dict, prop_dict, triplets

    
for l in ontologies_in_alignment:
    ont1 = "conference_ontologies/" + l[0] + ".owl"
    ont2 = "conference_ontologies/" + l[1] + ".owl"
    
    benchmark_dir = "OpenKE/benchmarks/" + l[0] + "-" + l[1]
    if not os.path.isdir(benchmark_dir):
        os.mkdir(benchmark_dir)
    
    c1, p1, t1 = generate(ont1, l[0])
    c2, p2, t2 = generate(ont2, l[1], [len(c1), len(p1)])
    
    classes_dict = {**c1, **c2}
    prop_dict = {**p1, **p2}
    triplets = t1 + t2
    
    entity_str = str(len(classes_dict)) + "\n" + "\n".join(["\t".join(elem) for elem in classes_dict.items()])
    prop_str = str(len(prop_dict)) + "\n" + "\n".join(["\t".join(elem) for elem in prop_dict.items()])
    triplets_str = str(len(triplets)) + "\n" + "\n".join(triplets)

    open(benchmark_dir + "/entity2id.txt", "w+").write(entity_str)
    open(benchmark_dir + "/relation2id.txt", "w+").write(prop_str)
    open(benchmark_dir + "/train2id.txt", "w+").write(triplets_str)

In [154]:
import torch
model = torch.load("OpenKE/conference.ckpt")

In [190]:
ontologies_in_alignment


[['conference', 'ekaw'],
 ['confOf', 'ekaw'],
 ['ekaw', 'sigkdd'],
 ['edas', 'sigkdd'],
 ['confOf', 'sigkdd'],
 ['iasted', 'sigkdd'],
 ['confOf', 'iasted'],
 ['conference', 'iasted'],
 ['confOf', 'edas'],
 ['edas', 'iasted'],
 ['conference', 'edas'],
 ['cmt', 'ekaw'],
 ['cmt', 'confOf'],
 ['cmt', 'edas'],
 ['conference', 'sigkdd'],
 ['cmt', 'sigkdd'],
 ['conference', 'confOf'],
 ['edas', 'ekaw'],
 ['cmt', 'conference'],
 ['cmt', 'iasted'],
 ['ekaw', 'iasted']]

In [184]:
ont = Ontology("conference_ontologies/conference.owl")
len(ont.get_triples())

54