Skip to content

Commit

Permalink
feat: add ontology import
Browse files Browse the repository at this point in the history
  • Loading branch information
simontaurus committed Apr 21, 2023
1 parent ec27c2f commit 385e451
Show file tree
Hide file tree
Showing 4 changed files with 295 additions and 6 deletions.
210 changes: 210 additions & 0 deletions examples/ontology_import.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
import json
import os
import re
from uuid import UUID

from pyld import jsonld
from rdflib import Graph

import osw.model.entity as model
from osw.core import OSW
from osw.wtsite import WtSite

# create/update the password file under examples/accounts.pwd.yaml
pwd_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "accounts.pwd.yaml"
)
wtsite = WtSite.from_domain("onto-wiki.eu", pwd_file_path)
osw = OSW(site=wtsite)

# load the EmmoTerm schema => run this code only once
# osw.fetch_schema(
# osw.FetchSchemaParam(
# schema_title="Category:OSW57beed5e1294434ba77bb6516e461456", mode="replace" # EmmoTerm
# )
# )

# load the ontology
g = Graph()
# g.parse("http://www.w3.org/People/Berners-Lee/card")
# g.parse("https://raw.githubusercontent.com/emmo-repo/domain-battery/master/battery.ttl", format="n3")
g.parse(r"BVCO_inferred.ttl")

# convert to json-ld dict
g = json.loads(g.serialize(format="json-ld", auto_compact=True))

# define the context
context = {
"owl": "http://www.w3.org/2002/07/owl#",
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
"xsd": "http://www.w3.org/2001/XMLSchema#",
"skos": "http://www.w3.org/2004/02/skos/core#",
"dc": "http://purl.org/dc/terms/",
# "emmo": "http://emmo.info/emmo#", #keep values with full uri
"uri": {"@id": "@id"},
"rdf_type": {"@id": "@type"},
# "label": "rdfs:label",
"label": {"@id": "skos:prefLabel"},
"altLabel": {"@id": "skos:altLabel"},
"text": {"@id": "@value"},
"lang": {"@id": "@language"},
"subClassOf": {"@id": "rdfs:subClassOf", "@type": "@id"},
"source": "dc:source",
"disjointUnionOf": "owl:disjointUnionOf",
"disjointWith": "owl:disjointWith",
"equivalentClass": "owl:equivalentClass",
"unionOf": {"@id": "owl:unionOf", "@container": "@list", "@type": "@id"},
"comment": "rdfs:comment",
"isDefinedBy": "rdfs:isDefinedBy",
"seeAlso": "rdfs:seeAlso",
# shorten properties
"qudtReference": "http://emmo.info/emmo#EMMO_1f1b164d_ec6a_4faa_8d5e_88bda62316cc",
"omReference": "http://emmo.info/emmo#EMMO_209ba1b3_149f_4ff0_b672_941610eafd72",
"wikidataReference": "http://emmo.info/emmo#EMMO_26bf1bef_d192_4da6_b0eb_d2209698fb54",
"ISO9000Reference": "http://emmo.info/emmo#EMMO_3aa37f92_8dc5_4ee4_8438_e41e6ae20c62",
"IEVReference": "http://emmo.info/emmo#EMMO_50c298c2_55a2_4068_b3ac_4e948c33181f",
"dbpediaReference": "http://emmo.info/emmo#EMMO_6dd685dd_1895_46e4_b227_be9f7d643c25",
"etymology": "http://emmo.info/emmo#EMMO_705f27ae_954c_4f13_98aa_18473fc52b25",
"definition": "http://emmo.info/emmo#EMMO_70fe84ff_99b6_4206_a9fc_9a8931836d84",
"ISO80000Reference": "http://emmo.info/emmo#EMMO_8de5d5bf_db1c_40ac_b698_095ba3b18578",
"ISO14040Reference": "http://emmo.info/emmo#EMMO_964568dd_64d2_454b_a12f_ac389f1c5e7f",
"description": "http://emmo.info/emmo#EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9", # elucidation
"example": "http://emmo.info/emmo#EMMO_b432d2d5_25f4_4165_99c5_5935a7763c1a",
"VIMTerm": "http://emmo.info/emmo#EMMO_bb49844b_45d7_4f0d_8cae_8e552cbc20d6",
"emmo_comment": "http://emmo.info/emmo#EMMO_c7b62dd7_063a_4c2a_8504_42f7264ba83f",
"wikipediaReference": "http://emmo.info/emmo#EMMO_c84c6752_6d64_48cc_9500_e54a3c34898d",
"iupacReference": "http://emmo.info/emmo#EMMO_fe015383_afb3_44a6_ae86_043628697aa2",
}

# compact the json-ld (replace IRIs defined in the context with plain properties)
compacted = jsonld.compact(g, context)

# define postprocessed properties
ensure_multilang = ["label", "prefLabel", "altLabel", "comment", "description"]
ensure_array = [
"label",
"prefLabel",
"altLabel",
"comment",
"description",
"subClassOf",
]
map_uuid_uri = []
remove_unnamed = ["subClassOf"] # , 'equivalentClass']

# postprocess json-ld
for node in compacted["@graph"]:
for key in ensure_multilang:
if key in node:
if isinstance(node[key], str):
node[key] = {"text": node[key], "lang": "en"}
elif "text" in node[key] and "lang" not in node[key]:
node[key]["lang"] = "en"
elif isinstance(node[key], list):
for i, val in enumerate(node[key]):
if isinstance(node[key][i], str):
node[key][i] = {"text": node[key][i], "lang": "en"}
elif "text" in node[key][i] and "lang" not in node[key][i]:
node[key][i]["lang"] = "en"
for key in ensure_array:
if key in node and not isinstance(node[key], list):
node[key] = [node[key]]
for key in map_uuid_uri:
if key in node:
if isinstance(node[key], list):
for i, val in enumerate(node[key]):
node[key][i] = "Category:OSW" + str(
UUID(re.sub(r"[^A-Fa-f0-9]", "", node[key][i])[-32:])
)
if isinstance(node[key], str):
node[key][i] = "Category:OSW" + str(
UUID(re.sub(r"[^A-Fa-f0-9]", "", node[key][i])[-32:])
)
for key in remove_unnamed:
if key in node:
if isinstance(node[key], list):
node[key] = [value for value in node[key] if not value.startswith("_:")]
elif isinstance(node[key], str) and node[key].startswith("_:"):
del node[key]

if "rdf_type" in node and node["rdf_type"] == "owl:Class":
node["uuid"] = str(UUID(re.sub(r"[^A-Fa-f0-9]", "", node["uri"])[-32:]))

if "prefLabel" in node:
node["name"] = node["prefLabel"][0]["text"]
elif "label" in node:
node["name"] = node["label"][0]["text"]
else:
print("No label: ", node["uri"])

# store the json-ld serialization on disk
with open("BVCO.compacted.jsonld", "w", encoding="utf-8") as f:
json.dump(compacted, f, indent=4, ensure_ascii=False)

# optional: also serialize as ttl
g2 = Graph()
g2.parse("BVCO.compacted.jsonld")
g2.serialize(destination="BVCO.jsonld.ttl", format="ttl")

# create OSW entities
limit = 3000 # choose a smaller number for tests
counter = 0
max_index = len(compacted["@graph"])
entities = []
for index, node in enumerate(compacted["@graph"]):
if "rdf_type" in node and node["rdf_type"] == "owl:Class":
if "label" in node:
if counter < limit:
e = model.EmmoTerm(**node)
entities.append(e)
counter += 1

# define ontology metadata
emmo = model.Ontology(
name="EMMO",
iri="http://emmo.info/emmo",
prefix="http://emmo.info/emmo#",
prefix_name="emmo",
link="https://github.com/emmo-repo/EMMO",
)
battinfo = model.Ontology(
name="EMMO BattINFO",
iri="http://emmo.info/battery",
prefix="http://emmo.info/battery#",
prefix_name="battinfo",
link="https://github.com/BIG-MAP/BattINFO",
)
electrochemistry = model.Ontology(
name="EMMO Electrochemistry",
iri="http://emmo.info/electrochemistry",
prefix="http://emmo.info/electrochemistry#",
prefix_name="electrochemistry",
link="https://github.com/emmo-repo/EMMO",
)
periodictable = model.Ontology(
name="EMMO Periodic Table",
iri="http://emmo.info/emmo/domain/periodic-table",
prefix="http://emmo.info/emmo/domain/periodic-table#",
prefix_name="periodictable",
link="https://github.com/emmo-repo/EMMO",
)
gpo = model.Ontology(
name="GPO",
iri="https://gpo.ontology.link",
prefix="https://gpo.ontology.link/",
prefix_name="gpo",
link="https://github.com/General-Process-Ontology/ontology",
)
bvco = model.Ontology(
name="BVCO",
iri="https://bvco.ontology.link",
prefix="https://bvco.ontology.link/",
prefix_name="bvco",
link="https://github.com/Battery-Value-Chain-Ontology/ontology",
)

ontologies = [emmo, battinfo, electrochemistry, periodictable, gpo, bvco]

# import ontologies
osw.import_ontology(OSW.ImportOntologyParam(ontologies=ontologies, entities=entities))
81 changes: 76 additions & 5 deletions src/osw/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ def fetch_schema(self, fetchSchemaParam: FetchSchemaParam = None) -> None:
header = (
"from uuid import uuid4\n"
"from typing import TYPE_CHECKING, Type, TypeVar\n"
"from osw.model.static import OswBaseModel\n"
"from osw.model.static import OswBaseModel, Ontology\n"
"\n"
"if TYPE_CHECKING:\n"
" from dataclasses import dataclass as _basemodel_decorator\n"
Expand Down Expand Up @@ -466,20 +466,35 @@ def load_entity(self, entity_title) -> model.Entity:

return entity

def store_entity(self, entity: Union[model.Entity, List[model.Entity]]) -> None:
class StoreEntityParam(model.OswBaseModel):
entities: Union[model.Entity, List[model.Entity]]
namespace: Optional[str]

def store_entity(
self, param: Union[StoreEntityParam, model.Entity, List[model.Entity]]
) -> None:
"""stores the given datasclass instance as OSW page by calling BaseModel.json()
Parameters
----------
entity
the datasclass instance or a list of instances
StoreParam, the datasclass instance or a list of instances
"""

namespace = None
entity = param
if isinstance(param, OSW.StoreEntityParam):
entity = param.entities
namespace = param.namespace

if not isinstance(entity, list):
entity = [entity]
max_index = len(entity)
for index, e in enumerate(entity):
if isinstance(e, model.Item):
entity_title = "Item:" + OSW.get_osw_id(e.uuid)
if namespace is None and isinstance(e, model.Item):
namespace = "Item"
if namespace is not None:
entity_title = namespace + ":" + OSW.get_osw_id(e.uuid)
page = self.site.get_WtPage(entity_title)
jsondata = json.loads(
e.json(exclude_none=True)
Expand Down Expand Up @@ -519,3 +534,59 @@ def delete_entity(self, entity, comment: str = None):
print("Entity deleted: " + page.get_url())
else:
print(f"Entity '{entity_title}' does not exist!")

class _ImportOntologyParam(model.OswBaseModel):
ontology: model.Ontology
entities: List[model.Entity]
properties: Optional[List[model.Entity]]

def _import_ontology(self, param: _ImportOntologyParam):
import_page = self.site.get_WtPage(
"MediaWiki:Smw_import_" + param.ontology.prefix_name
)
text = f"{param.ontology.prefix}|[{param.ontology.link} {param.ontology.name}]"
for e in param.entities:
iri = None
if hasattr(e, "iri"):
iri = e.iri
if hasattr(e, "uri"):
iri = e.uri
if iri is not None:
text += f"\n {iri.replace(param.ontology.prefix, '')}|Category"
else:
print("Error: Entity has not iri/uri property")
import_page.set_slot_content("main", text)
import_page.edit("import ontology")

self.store_entity(
OSW.StoreEntityParam(namespace="Category", entities=param.entities)
)

class ImportOntologyParam(model.OswBaseModel):
entities: List[model.Entity]
ontologies: List[model.Ontology]

def import_ontology(self, param: ImportOntologyParam):
prefix_dict = {}
for e in param.entities:
if "#" in e.uri:
key = e.uri.split("#")[0] + "#"
else:
key = e.uri.replace(e.uri.split("/")[-1], "")
if key not in prefix_dict:
prefix_dict[key] = []
prefix_dict[key].append(e)

for prefix in prefix_dict.keys():
onto = None
for o in param.ontologies:
if o.prefix == prefix:
onto = o
if onto is None:
print(f"Error: No ontology defined for prefix {prefix}")
else:
self._import_ontology(
OSW._ImportOntologyParam(
ontology=onto, entities=prefix_dict[prefix]
)
)
2 changes: 1 addition & 1 deletion src/osw/model/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class LangCode(Enum):
from typing import TYPE_CHECKING, Type, TypeVar
from uuid import uuid4

from osw.model.static import OswBaseModel
from osw.model.static import Ontology, OswBaseModel

if TYPE_CHECKING:
from dataclasses import dataclass as _basemodel_decorator
Expand Down
8 changes: 8 additions & 0 deletions src/osw/model/static.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,11 @@ def test_if_empty_list_or_none(obj) -> bool:
if not test_if_empty_list_or_none(v)
}
)


class Ontology(OswBaseModel):
iri: str
prefix: str
name: str
prefix_name: str
link: str

0 comments on commit 385e451

Please sign in to comment.