Skip to content
This repository has been archived by the owner on Oct 8, 2020. It is now read-only.

Commit

Permalink
Added RDF/JSON syntax support for writing OWL data
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickwestphal committed Mar 6, 2020
1 parent 4802ee7 commit fa97f40
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import org.apache.spark.sql.SparkSession
import org.semanticweb.owlapi.formats._
import org.semanticweb.owlapi.model.OWLDocumentFormat
import net.sansa_stack.owl.spark.rdd._
import net.sansa_stack.owl.spark.writers.{DLSyntaxWriter, KRSS2SyntaxWriter, KRSSSyntaxWriter, OBOWriter, OWLFunctionalSyntaxWriter}
import net.sansa_stack.owl.spark.writers.{DLSyntaxWriter, KRSS2SyntaxWriter, KRSSSyntaxWriter, OBOWriter, OWLFunctionalSyntaxWriter, RDFJSonWriter}


class UnknownOWLFormatException(msg: String) extends Exception
Expand Down Expand Up @@ -181,8 +181,7 @@ package object owl {
case _: KRSSDocumentFormat => KRSSSyntaxWriter.save(path, axioms)
case _: DLSyntaxDocumentFormat => DLSyntaxWriter.save(path, axioms)
case _: OBODocumentFormat => OBOWriter.save(path, axioms)
case format: RDFJsonDocumentFormat =>
throw new NotImplementedError(s"Support for ${format.getClass.getName} not implemented, yet")
case _: RDFJsonDocumentFormat => RDFJSonWriter.save(path, axioms)
case format: RDFJsonLDDocumentFormat =>
throw new NotImplementedError(s"Support for ${format.getClass.getName} not implemented, yet")
case format: NQuadsDocumentFormat =>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package net.sansa_stack.owl.spark.writers

import java.io.{BufferedWriter, ByteArrayOutputStream, OutputStreamWriter, PrintWriter}
import java.util.Collections

import scala.collection.JavaConverters._

import org.eclipse.rdf4j.rio.RDFFormat
import org.eclipse.rdf4j.rio.rdfjson.RDFJSONWriter
import org.semanticweb.owlapi.apibinding.OWLManager
import org.semanticweb.owlapi.formats.RDFJsonDocumentFormat
import org.semanticweb.owlapi.model.{OWLAxiom, OWLOntology}
import org.semanticweb.owlapi.rio.RioRenderer

import net.sansa_stack.owl.spark.rdd.OWLAxiomsRDD


protected class SANSARioRenderer(ont: OWLOntology, rdfJsonWriter: RDFJSONWriter, docFormat: RDFJsonDocumentFormat)
extends RioRenderer(ont, rdfJsonWriter, docFormat) {

override def renderOntologyHeader(): Unit = None
}

object RDFJSonWriter extends OWLWriterBase {
override def save(filePath: String, owlAxioms: OWLAxiomsRDD): Unit = {
owlAxioms.mapPartitionsWithIndex((idx: Int, partition: Iterator[OWLAxiom]) => if (partition.hasNext) {
val snippets = partition.map(axiom => {
// writer stuff...
val os = new ByteArrayOutputStream()
val osWriter = new OutputStreamWriter(os)
val buffPrintWriter = new PrintWriter(new BufferedWriter(osWriter))

val man = OWLManager.createOWLOntologyManager()
val ont = man.createOntology(Seq(axiom).asJava)

val renderer = new SANSARioRenderer(
ont,
new RDFJSONWriter(osWriter, RDFFormat.RDFJSON),
new RDFJsonDocumentFormat)

renderer.render()
buffPrintWriter.flush()

os.toString("UTF-8")
.replaceFirst("\\{", "") // remove per-partition opening curly brace
.reverse.replaceFirst("\\}", "").reverse // remove per-partition closing curly brace
// make blank node IDs unique (by appending the partition ID)
.replaceAll("_:genid([0-9]+)", "_:genid$1" + s"_$idx")
.replaceAll("\\s+$", "") // trim end
})
Collections.singleton(snippets.mkString(",")).iterator().asScala

} else {
Iterator()
}).saveAsTextFile(filePath)
}
}

0 comments on commit fa97f40

Please sign in to comment.