In [57]:
from rdflib import Graph, RDFS, RDF, OWL, URIRef, Literal
import json
from collections import defaultdict
from urllib.parse import urlparse

In [58]:
g = Graph()
g.parse("clariah-tools.ttl", format="turtle")
g.serialize(destination='clariah-tools.rdf', format='xml')


g = Graph()
g.parse("code-lib.ttl", format="turtle")
g.serialize(destination='code-lib.rdf', format='xml')

<Graph identifier=N16c4582d6d1b4458bf56feb7a07c00c7 (<class 'rdflib.graph.Graph'>)>

In [None]:
from ontoaligner.base import BaseOntologyParser

class SchemaOntologyParser(BaseOntologyParser):

    def get_comments(self, owl_property):

        comments = []
        for p, o in owl_property.graph.predicate_objects(subject=owl_property):
            if p == RDFS.comment:
                comments.append(str(o))
        return comments

    def extract_properties(self, graph):
        props = set()

        for s, _, _ in graph.triples((None, RDF.type, OWL.ObjectProperty)):
            if isinstance(s, URIRef):
                props.add(s)

        for s, _, _ in graph.triples((None, RDF.type, OWL.DatatypeProperty)):
            if isinstance(s, URIRef):
                props.add(s)

        for s, _, _ in graph.triples((None, RDF.type, OWL.AnnotationProperty)):
            if isinstance(s, URIRef):
                props.add(s)
        return props

    def parse(self, input_file_path: str = ""):
        g = Graph()
        g.parse(input_file_path, format="xml")
        entries = []

        for s in self.extract_properties(g):
            iri = str(s)
            name = None
            label = None
            parents = []

            for p, o in g.predicate_objects(subject=s):
                pred = str(p)

                if pred.endswith("name"):
                    name = str(o)
                    label = str(o)

                if p == RDF.type:
                    parent_iri = str(o)
                    parent_name = parent_iri.split("#")[-1] if "#" in parent_iri else parent_iri.split("/")[-1]
                    parents.append({
                        "iri": parent_iri,
                        "name": parent_name,
                        "label": parent_name
                    })

            if name and iri:
                entries.append({
                    "name": name,
                    "iri": iri,
                    "label": label,
                    "childrens": [],
                    "parents": list({p['iri']: p for p in parents}.values()),
                    "synonyms": [],
                    "comment": []
                })

        return entries

In [64]:
from ontoaligner.ontology import OMDataset

class SchemaOMDataset(OMDataset):

    track = "Schema-Alignment"
    ontology_name = "ClariahTools-CodeLib"
    source_ontology = SchemaOntologyParser()
    target_ontology = SchemaOntologyParser()


task = SchemaOMDataset()

dataset = task.collect(source_ontology_path = "clariah-tools.rdf", target_ontology_path = "code-lib.rdf")

In [65]:
dataset

{'dataset-info': {'track': 'Schema-Alignment',
  'ontology-name': 'ClariahTools-CodeLib'},
 'source': [],
 'target': [],
 'reference': []}

In [62]:
from ontoaligner import ontology, encoder
from ontoaligner.aligner import SimpleFuzzySMLightweight
from ontoaligner.utils import metrics, xmlify

encoder_model = encoder.ConceptParentLightweightEncoder()
encoder_output = encoder_model(
        source=dataset['source'],
        target=dataset['target']
)

model = SimpleFuzzySMLightweight(fuzzy_sm_threshold=0.5)
matchings = model.generate(input_data=encoder_output)

0it [00:00, ?it/s]


In [63]:
matchings

[]