In [2]:
# Construction of dataset

import os, itertools
from xml.dom import minidom


alignment_folder = "reference-alignment/"

# Load reference alignments 
def load_alignments(folder):
    alignments = []
    for f in os.listdir(folder):
        doc = minidom.parse(folder + f)
        ls = list(zip(doc.getElementsByTagName('entity1'), doc.getElementsByTagName('entity2')))
        alignments.extend([(a.getAttribute('rdf:resource'), b.getAttribute('rdf:resource')) for (a,b) in ls])
    return alignments
        
reference_alignments = load_alignments(alignment_folder)

In [134]:

class Ontology():
    def __init__(self, ontology):
        self.ontology = ontology
        self.ontology_obj = minidom.parse(ontology)
        self.root = self.ontology_obj.documentElement
        self.object_prop = "http://www.w3.org/2002/07/owl#ObjectProperty"
        self.data_prop = "http://www.w3.org/2002/07/owl#DatatypeProperty"
    
    def get_child_node(self, element, tag):
        return [e for e in element._get_childNodes() if type(e)==minidom.Element and e._get_tagName() == tag]
        
    def has_attribute_value(self, element, attribute, value):
        return True if element.getAttribute(attribute).split("#")[-1] == value else False
    
    def get_triples(self):
        obj_props = self.parse_object_properties()
        all_triples = []
        for prop in obj_props:
            domain_children = self.get_child_node(prop, "rdfs:domain")
            range_children = self.get_child_node(prop, "rdfs:range")
            domain_prop = self.filter_null([self.extract_ID(el) for el in domain_children])
            range_prop = self.filter_null([self.extract_ID(el) for el in range_children])
            if not domain_prop:
                domain_prop = self.filter_null([self.extract_ID(el) for el in domain_children[0].getElementsByTagName("owl:Class")])
            if not range_prop:
                range_prop = self.filter_null([self.extract_ID(el) for el in range_children[0].getElementsByTagName("owl:Class")])
            if domain_prop and range_prop:
                all_triples.extend([(el[0], self.extract_ID(prop), el[1]) for el in list(itertools.product(domain_prop, range_prop))])
        return all_triples
        
    
    def filter_null(self, data):
        return [el for el in data if el]
    
    def extract_ID(self, element):
        element_id = element.getAttribute("rdf:ID") or element.getAttribute("rdf:resource") or element.getAttribute("rdf:about")
        return element_id.split("#")[-1]
    
    def parse_classes(self):
        return self.get_child_node(self.root, 'owl:Class')
    
    def classes(self):
        classes = [self.extract_ID(el) for el in self.parse_classes()]
        return self.filter_null(classes)

    def parse_object_properties(self):
        obj_properties = [el for el in self.get_child_node(self.root, 'owl:ObjectProperty')]
        fn_obj_properties = [el for el in self.get_child_node(self.root, 'owl:FunctionalProperty') if el]
        fn_obj_properties = [el for el in fn_obj_properties if type(el)==minidom.Element and 
            self.has_attribute_value(self.get_child_node(el, "rdf:type")[0], "rdf:resource", "ObjectProperty")]
        inv_fn_obj_properties = [el for el in self.get_child_node(self.root, 'owl:InverseFunctionalProperty') if el]
        inv_fn_obj_properties = [el for el in inv_fn_obj_properties if type(el)==minidom.Element and 
            self.has_attribute_value(self.get_child_node(el, "rdf:type")[0], "rdf:resource", "ObjectProperty")]
        return obj_properties + fn_obj_properties + inv_fn_obj_properties
    
    def object_properties(self):
        obj_props = [self.extract_ID(el) for el in self.parse_object_properties()]
        return list(set(self.filter_null(obj_props)))

    def load_ontology(self):    
        onto = get_ontology(self.ontology).load()
        return onto

ontology = "conference_ontologies/conference.owl"

ont = Ontology(ontology)


In [135]:
ont.get_triples()

[('Organizing_committee',
  'was_an_organizing_committee_of',
  'Conference_volume'),
 ('Program_committee', 'was_a_program_committee_of', 'Conference_volume'),
 ('Person', 'contributes', 'Conference_document'),
 ('Conference_volume', 'has_parts', 'Conference_part'),
 ('Reviewer', 'invites_co-reviewers', 'Reviewer'),
 ('Conference_document', 'has_authors', 'Person'),
 ('Conference_volume', 'has_workshops', 'Workshop'),
 ('Review_expertise',
  'has_a_submitted_contribution',
  'Submitted_contribution'),
 ('Committee', 'has_members', 'Committee_member'),
 ('Conference_contribution', 'is_submitted_at', 'Conference_volume'),
 ('Review_preference', 'belongs_to_reviewers', 'Reviewer'),
 ('Review_expertise', 'belongs_to_reviewers', 'Reviewer'),
 ('Topic', 'belongs_to_a_review_reference', 'Review_preference'),
 ('Conference_volume', 'has_tracks', 'Track'),
 ('Active_conference_participant', 'gives_presentations', 'Presentation'),
 ('Reviewed_contribution', 'has_a_review', 'Review'),
 ('Confere