Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Augment as many SWEET owl:Class with a skos:definition from wikidata #5

Merged
merged 8 commits into from
Jul 17, 2020
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,9 @@ sweet-alignment-manager/AML-Jar
sweet-alignment-manager/sweet
sweet-alignment-manager/target/*
sweet-alignment-manager/alignments/*
sc/target*
sc/augment_wikidata_definitions/project/target*
sc/augment_wikidata_definitions/target*
.DS_Store
target
sc/augment_wikidata_definitions/.*
11 changes: 11 additions & 0 deletions .project
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>sweet-tools</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
</buildSpec>
<natures>
</natures>
</projectDescription>
11 changes: 11 additions & 0 deletions sc/augment_wikidata_definitions/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

Install sbt - https://www.scala-sbt.org/

Launch sbt:

$ sbt

Run the program with an argument indicating the directory:

> run path/to/directory/with/turtle/files

25 changes: 25 additions & 0 deletions sc/augment_wikidata_definitions/build.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
scalaVersion := "2.13.1"
version := "0.1.0-SNAPSHOT"
organization := "org.esipfed"
organizationName := "sweet"

lazy val root = (project in file("."))
.settings(
name := "augment_wikidata_definitions",

libraryDependencies ++= Seq(
("org.apache.jena" % "jena-core" % "3.15.0").exclude("org.slf4j", "slf4j-log4j12"),

"org.apache.jena" % "jena-tdb" % "3.15.0",
// Per https://jena.apache.org/download/maven.html:
// "...use of <type>pom</type> ... does not work in all tools.
// An alternative is to depend on jena-tdb, which will pull in the other artifacts."

"org.slf4j" % "slf4j-nop" % "1.7.25",

//"com.github.owlcs" % "ontapi" % "2.0.0",

"net.sourceforge.owlapi" % "owlapi-distribution" % "5.1.15"
)
)

1 change: 1 addition & 0 deletions sc/augment_wikidata_definitions/project/build.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
sbt.version=1.3.8
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
package sweet.tools

import java.io.{File, FileOutputStream}
import java.time.LocalDateTime
import java.util
import java.util.Optional

import org.apache.jena.query.QueryExecutionFactory
import org.apache.jena.rdf.model._
import org.semanticweb.owlapi.formats
import org.semanticweb.owlapi.model._
import org.semanticweb.owlapi.search.EntitySearcher
import org.semanticweb.owlapi.vocab.OWL2Datatype

object augment_wikidata_definitions {

def main(args: Array[String]): Unit = {
if (args.nonEmpty) {
main(args.head)
}
else println("Missing sweet directory argument")
}

def main(inDir: String): Unit = {
val dirFile = new File(inDir)

println(s"listing .ttl files under ${dirFile.getCanonicalPath}")

val files = dirFile
.listFiles()
.filter(_.getName.endsWith(".ttl"))
.sortBy(_.getName)

files foreach { file =>
println(s" loading ${file.getCanonicalPath}")
import org.semanticweb.owlapi.apibinding.OWLManager
val manager = OWLManager.createOWLOntologyManager
loadModel(file, manager) foreach { owlOntology =>
val changes = new util.ArrayList[OWLOntologyChange]()
val df = manager.getOWLDataFactory
val oldVersionAnnotation = df.getOWLAnnotation(df.getOWLVersionInfo, df.getOWLLiteral("3.5.0"))
changes.add(new RemoveOntologyAnnotation(owlOntology, oldVersionAnnotation))
val newVersionAnnotation = df.getOWLAnnotation(df.getOWLVersionInfo, df.getOWLLiteral("3.6.0"))
changes.add(new AddOntologyAnnotation(owlOntology, newVersionAnnotation))
import org.semanticweb.owlapi.model.{IRI, OWLOntologyID, SetOntologyID}
val versionIRI = IRI.create(owlOntology.getOntologyID.getOntologyIRI.get() + "/3.6.0")
val newVersionIRI = new SetOntologyID(owlOntology,
new OWLOntologyID(
owlOntology.getOntologyID.getOntologyIRI.get(), Optional.of(versionIRI).get()))
changes.add(newVersionIRI)
println(s" getting class resources")
val classIter = owlOntology.classesInSignature().iterator()
while (classIter.hasNext) {
val owlClass = classIter.next()
println(s" class resource: ${owlClass.toString}")
val annotation = EntitySearcher.getAnnotations(owlClass, owlOntology, df.getRDFSLabel()).findFirst()
if (annotation.isPresent) {
val annotString = annotation.get().annotationValue().toString
val trimmedLabel = annotString.substring(1, annotString.length - 4)
println(s" label statement: ${trimmedLabel}")
val wikidataDescription = executeWikidataDescriptionQuery(trimmedLabel)
if (wikidataDescription != null) {
//skos:definition
val defProp = df.getOWLAnnotationProperty("http://www.w3.org/2004/02/skos/core#definition")
val skosAnno = df.getOWLAnnotation(defProp, df.getOWLAnonymousIndividual)
val skosAxiom = df.getOWLAnnotationAssertionAxiom(owlClass.getIRI(), skosAnno)
changes.add(new AddAxiom(owlOntology, skosAxiom))
//rdfs:comment
val commentAnno = df.getOWLAnnotation(
df.getRDFSComment,
df.getOWLLiteral(wikidataDescription.get(1), "en"))
val commentAxiom = df.getOWLAnnotationAssertionAxiom(
df.getOWLAnnotationProperty("http://www.w3.org/2000/01/rdf-schema#comment"),
skosAxiom.anonymousIndividualValue().get(), commentAnno.annotationValue())
changes.add(new AddAxiom(owlOntology, commentAxiom))
//prov:wasDerivedFrom
val wdfProp = df.getOWLAnnotationProperty("http://www.w3.org/ns/prov#wasDerivedFrom")
val provAnno = df.getOWLAnnotation(wdfProp, IRI.create(wikidataDescription.get(0)))
val provAxiom = df.getOWLAnnotationAssertionAxiom(
wdfProp, skosAxiom.anonymousIndividualValue().get(), provAnno.annotationValue())
changes.add(new AddAxiom(owlOntology, provAxiom))
//dcterms:source
val sProp = df.getOWLAnnotationProperty("http://purl.org/dc/terms/source")
val sourceAnno = df.getOWLAnnotation(sProp, IRI.create(wikidataDescription.get(0)))
val sourceAxiom = df.getOWLAnnotationAssertionAxiom(
sProp, skosAxiom.anonymousIndividualValue().get(), sourceAnno.annotationValue())
changes.add(new AddAxiom(owlOntology, sourceAxiom))
//dcterms:created
val ldt = LocalDateTime.now();
val cProp = df.getOWLAnnotationProperty("http://purl.org/dc/terms/created")
val createdAnno = df.getOWLAnnotation(cProp, df.getOWLLiteral(ldt.toString, OWL2Datatype.XSD_DATE_TIME))
val createdAxiom = df.getOWLAnnotationAssertionAxiom(
cProp, skosAxiom.anonymousIndividualValue().get(), createdAnno.annotationValue())
changes.add(new AddAxiom(owlOntology, createdAxiom))
//dcterms:creator
val crProp = df.getOWLAnnotationProperty("http://purl.org/dc/terms/creator")
val creatorAnno = df.getOWLAnnotation(crProp, IRI.create("https://orcid.org/0000-0003-2185-928X"))
val creatorAxiom = df.getOWLAnnotationAssertionAxiom(
crProp, skosAxiom.anonymousIndividualValue().get(), creatorAnno.annotationValue())
changes.add(new AddAxiom(owlOntology, creatorAxiom))
}
}
};
manager.applyChanges(changes)
val fos = new FileOutputStream(file)
val format = new formats.TurtleDocumentFormat()
format.setDefaultPrefix(owlOntology.getOntologyID.getOntologyIRI.get().getIRIString + "/")
format.setPrefix("dcterms", "http://purl.org/dc/terms/")
format.setPrefix("prov", "http://www.w3.org/ns/prov#")
format.setPrefix("skos", "http://www.w3.org/2004/02/skos/core#")
manager.saveOntology(owlOntology, format, fos)
fos.close()
manager.clearOntologies()
manager.removeOntology(owlOntology)
}
}

def loadModel(file: File, manager: OWLOntologyManager): Option[OWLOntology] = {
try Some(manager.loadOntologyFromOntologyDocument(file.getAbsoluteFile))
catch {
case e: Exception =>
println(s"ERROR: $file: ${e.getMessage}")
None
}
}

def executeWikidataDescriptionQuery(trimmedLabel: String): util.ArrayList[String] = {
val query = getWikidataDescriptionQuery(trimmedLabel)
val response: Unit = tryWith(QueryExecutionFactory.sparqlService("https://query.wikidata.org/sparql", query)){ qexec =>
val results = qexec.execSelect()
if (results.hasNext) {
val soln = results.next
val wikiList: util.ArrayList[String] = new util.ArrayList()
wikiList.add(soln.getResource("s").toString)
wikiList.add(soln.getLiteral("o").getLexicalForm)
return wikiList
}
}
return null
}

def getWikidataDescriptionQuery(trimmedLabel: String): String = {
s"""PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|PREFIX schema: <http://schema.org/>
|SELECT * WHERE {
| ?s rdfs:label "$trimmedLabel"@en .
| ?s schema:description ?o .
| FILTER(LANGMATCHES(LANG(?o), "en"))
| FILTER(STRLEN(?o) > 15)
lewismc marked this conversation as resolved.
Show resolved Hide resolved
|}
|""".stripMargin
}

def getValueAsString(node: RDFNode): String = node match {
case lit: Literal => lit.getLexicalForm
case res: Resource => res.getURI
}

def tryWith[R, T <: AutoCloseable](resource: T)(doWork: T => R): R = {
try {
doWork(resource)
}
finally {
try {
if (resource != null) {
resource.close()
}
}
catch {
case e: Exception => throw e
}
}
}
}
}