diff --git a/build.sbt b/build.sbt index 8a16e78..664927e 100644 --- a/build.sbt +++ b/build.sbt @@ -6,7 +6,7 @@ resolvers += "Sonatype OSS Snapshots" at "https://s01.oss.sonatype.org/content/repositories/snapshots" lazy val jenaV = "5.3.0" -lazy val jellyV = "2.8.0" +lazy val jellyV = "2.9.1" addCommandAlias("fixAll", "scalafixAll; scalafmtAll") diff --git a/src/main/scala/eu/neverblink/jelly/cli/ErrorHandler.scala b/src/main/scala/eu/neverblink/jelly/cli/ErrorHandler.scala index 676d08b..f3f0591 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/ErrorHandler.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/ErrorHandler.scala @@ -12,7 +12,7 @@ object ErrorHandler: case e: Throwable => command.printLine("Unknown error", toStderr = true) printStackTrace(command, t) - command.exit(1) + command.exit(1, t) /** Print out stack trace or debugging information * @param command diff --git a/src/main/scala/eu/neverblink/jelly/cli/Exceptions.scala b/src/main/scala/eu/neverblink/jelly/cli/Exceptions.scala index 9ba65cf..c741178 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/Exceptions.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/Exceptions.scala @@ -28,6 +28,15 @@ case class InvalidFormatSpecified(format: String, validFormats: String) extends CriticalException( s"Invalid format option: \"$format\", needs to be one of ${validFormats}.", ) -case class ExitException(code: Int) extends CriticalException(s"Exiting with code $code.") +case class InvalidArgument(argument: String, argumentValue: String, message: Option[String] = None) + extends CriticalException( + s"Invalid value for argument $argument: \"$argumentValue\". " + message.getOrElse(""), + ) +case class ExitException( + code: Int, + cause: Option[Throwable] = None, +) extends CriticalException( + s"Exiting with code $code." + cause.map(e => s" Cause: ${e.getMessage}").getOrElse(""), + ) class CriticalException(message: String) extends Exception(message) diff --git a/src/main/scala/eu/neverblink/jelly/cli/JellyCommand.scala b/src/main/scala/eu/neverblink/jelly/cli/JellyCommand.scala index 89657c7..7f648a7 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/JellyCommand.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/JellyCommand.scala @@ -6,18 +6,18 @@ import eu.neverblink.jelly.cli.util.IoUtil import java.io.* import scala.compiletime.uninitialized -case class JellyOptions( +case class JellyCommandOptions( @HelpMessage("Add to run command in debug mode") debug: Boolean = false, ) -trait HasJellyOptions: +trait HasJellyCommandOptions: @Recurse - val common: JellyOptions + val common: JellyCommandOptions -abstract class JellyCommand[T <: HasJellyOptions: {Parser, Help}] extends Command[T]: +abstract class JellyCommand[T <: HasJellyCommandOptions: {Parser, Help}] extends Command[T]: private var isTest = false - private var isDebug = false + private var options: Option[T] = None final protected[cli] var out = System.out final protected[cli] var err = System.err final protected[cli] var in = System.in @@ -44,21 +44,29 @@ abstract class JellyCommand[T <: HasJellyOptions: {Parser, Help}] extends Comman /** Check and set the values of all the general options repeating for every JellyCommand */ - private def setUpGeneralArgs(options: T, remainingArgs: RemainingArgs): Unit = - if options.common.debug then this.isDebug = true + private def setUpGeneralArgs(options: T): Unit = + this.options = Some(options) + + /** Returns the options set up for this command + */ + protected final def getOptions: T = options match { + case Some(value) => value + case None => + throw new CriticalException("Command tried to access options before they were set up") + } /** Makes sure that the repetitive options needed for every JellyCommand are set up before calling * the doRun method, which contains Command-specific logic */ final override def run(options: T, remainingArgs: RemainingArgs): Unit = - setUpGeneralArgs(options, remainingArgs) + setUpGeneralArgs(options) doRun(options, remainingArgs) /** This abstract method is the main entry point for every JellyCommand. It should be overridden * by Command-specific implementation, including logic needed for this specific object extendind * JellyCommand. */ - def doRun(options: T, remainingArgs: RemainingArgs): Unit + protected def doRun(options: T, remainingArgs: RemainingArgs): Unit /** Override to have custom error handling for Jelly commands */ @@ -71,7 +79,7 @@ abstract class JellyCommand[T <: HasJellyOptions: {Parser, Help}] extends Comman /** Returns information about whether the command is in debug mode (which returns stack traces of * every error) or not */ - final def isDebugMode: Boolean = this.isDebug + final def isDebugMode: Boolean = this.getOptions.common.debug /** Runs the command in test mode from the outside app parsing level * @param args @@ -154,6 +162,11 @@ abstract class JellyCommand[T <: HasJellyOptions: {Parser, Help}] extends Comman } (inputStream, outputStream) + @throws[ExitException] + final def exit(code: Int, cause: Throwable): Nothing = + if isTest then throw ExitException(code, Some(cause)) + else exit(code) + @throws[ExitException] final override def exit(code: Int): Nothing = if isTest then throw ExitException(code) diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/Version.scala b/src/main/scala/eu/neverblink/jelly/cli/command/Version.scala index d92f61d..bd1a287 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/Version.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/Version.scala @@ -5,8 +5,8 @@ import eu.neverblink.jelly.cli.* case class VersionOptions( @Recurse - common: JellyOptions = JellyOptions(), -) extends HasJellyOptions + common: JellyCommandOptions = JellyCommandOptions(), +) extends HasJellyCommandOptions object Version extends JellyCommand[VersionOptions]: override def names: List[List[String]] = List( diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfCommand.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfCommand.scala index 26c8946..2696a0f 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfCommand.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfCommand.scala @@ -12,7 +12,7 @@ import java.io.{InputStream, OutputStream} /** This abstract class is responsible for the common logic in both RDF parsing commands */ -abstract class RdfCommand[T <: HasJellyOptions: {Parser, Help}, F <: RdfFormat](using +abstract class RdfCommand[T <: HasJellyCommandOptions: {Parser, Help}, F <: RdfFormat](using tt: TypeTest[RdfFormat, F], ) extends JellyCommand[T]: @@ -25,7 +25,7 @@ abstract class RdfCommand[T <: HasJellyOptions: {Parser, Help}, F <: RdfFormat]( lazy val printUtil: RdfCommandPrintUtil[F] /** The method responsible for matching the format to a given action */ - def matchToAction(option: F): Option[(InputStream, OutputStream) => Unit] + def matchFormatToAction(option: F): Option[(InputStream, OutputStream) => Unit] /** This method takes care of proper error handling and takes care of the parameter priorities in * matching the input to a given format conversion @@ -54,13 +54,13 @@ abstract class RdfCommand[T <: HasJellyOptions: {Parser, Help}, F <: RdfFormat]( if (fileName.isDefined) RdfFormat.inferFormat(fileName.get) else None (explicitFormat, implicitFormat) match { case (Some(f: F), _) => - matchToAction(f).get(inputStream, outputStream) - // If format explicitely defined but does not match any available actions or formats, we throw an error + matchFormatToAction(f).get(inputStream, outputStream) + // If format explicitly defined but does not match any available actions or formats, we throw an error case (_, _) if format.isDefined => throw InvalidFormatSpecified(format.get, printUtil.validFormatsString) case (_, Some(f: F)) => - matchToAction(f).get(inputStream, outputStream) - // If format not explicitely defined but implicitely not understandable we default to this + matchFormatToAction(f).get(inputStream, outputStream) + // If format not explicitly defined but implicitly not understandable we default to this case (_, _) => defaultAction(inputStream, outputStream) } } catch diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala index 6189769..b111488 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala @@ -16,14 +16,14 @@ object RdfFromJellyPrint extends RdfCommandPrintUtil[RdfFormat.Writeable]: case class RdfFromJellyOptions( @Recurse - common: JellyOptions = JellyOptions(), + common: JellyCommandOptions = JellyCommandOptions(), @ExtraName("to") outputFile: Option[String] = None, @ValueDescription("Output format.") @HelpMessage( RdfFromJellyPrint.helpMsg, ) @ExtraName("out-format") outputFormat: Option[String] = None, -) extends HasJellyOptions +) extends HasJellyCommandOptions object RdfFromJelly extends RdfCommand[RdfFromJellyOptions, RdfFormat.Writeable]: @@ -41,7 +41,7 @@ object RdfFromJelly extends RdfCommand[RdfFromJellyOptions, RdfFormat.Writeable] this.getIoStreamsFromOptions(remainingArgs.remaining.headOption, options.outputFile) parseFormatArgs(inputStream, outputStream, options.outputFormat, options.outputFile) - override def matchToAction( + override def matchFormatToAction( option: RdfFormat.Writeable, ): Option[(InputStream, OutputStream) => Unit] = option match diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfJellySerializationOptions.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfJellySerializationOptions.scala new file mode 100644 index 0000000..0160b54 --- /dev/null +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfJellySerializationOptions.scala @@ -0,0 +1,63 @@ +package eu.neverblink.jelly.cli.command.rdf + +import caseapp.* +import eu.neverblink.jelly.cli.InvalidArgument +import eu.ostrzyciel.jelly.core.{JellyOptions, LogicalStreamTypeFactory} +import eu.ostrzyciel.jelly.core.proto.v1.{LogicalStreamType, RdfStreamOptions} + +/** Options for serializing in Jelly-RDF */ +case class RdfJellySerializationOptions( + @HelpMessage("Name of the output stream (in metadata). Default: (empty)") + `opt.streamName`: String = "", + @HelpMessage( + "Whether the stream may contain generalized triples, quads, or datasets. Default: true", + ) + `opt.generalizedStatements`: Boolean = true, + @HelpMessage("Whether the stream may contain RDF-star statements. Default: true") + `opt.rdfStar`: Boolean = true, + @HelpMessage( + "Maximum size of the name lookup table. Default: " + JellyOptions.bigStrict.maxNameTableSize, + ) + `opt.maxNameTableSize`: Int = JellyOptions.bigStrict.maxNameTableSize, + @HelpMessage( + "Maximum size of the prefix lookup table. Default: " + JellyOptions.bigStrict.maxPrefixTableSize, + ) + `opt.maxPrefixTableSize`: Int = JellyOptions.bigStrict.maxPrefixTableSize, + @HelpMessage( + "Maximum size of the datatype lookup table. Default: " + JellyOptions.bigStrict.maxDatatypeTableSize, + ) + `opt.maxDatatypeTableSize`: Int = JellyOptions.bigStrict.maxDatatypeTableSize, + @HelpMessage( + "Logical (RDF-STaX-based) stream type. This can be either a name like " + + "`FLAT_QUADS` or a full IRI like `https://w3id.org/stax/ontology#flatQuadStream`. " + + "Default: (unspecified)", + ) + `opt.logicalType`: Option[String] = None, +): + lazy val asRdfStreamOptions: RdfStreamOptions = + val logicalIri = `opt.logicalType` + .map(_.trim).filter(_.nonEmpty) + .map { + case x if x.startsWith("http") => x + case x if x.toUpperCase.endsWith("S") => + val words = x.substring(0, x.length - 1).split("_").map(_.toLowerCase) + val wordSeq = words.head +: words.tail.map(_.capitalize) + "https://w3id.org/stax/ontology#" + wordSeq.mkString + "Stream" + case _ => "" // invalid IRI, we'll catch it in the next step + } + val logicalType = logicalIri.flatMap(LogicalStreamTypeFactory.fromOntologyIri) + if logicalIri.isDefined && logicalType.isEmpty then + throw InvalidArgument( + "--opt.logical-type", + `opt.logicalType`.get, + Some("Logical type must be either a full RDF-STaX IRI or a name like `FLAT_QUADS`"), + ) + RdfStreamOptions( + streamName = `opt.streamName`, + generalizedStatements = `opt.generalizedStatements`, + rdfStar = `opt.rdfStar`, + maxNameTableSize = `opt.maxNameTableSize`, + maxPrefixTableSize = `opt.maxPrefixTableSize`, + maxDatatypeTableSize = `opt.maxDatatypeTableSize`, + logicalType = logicalType.getOrElse(LogicalStreamType.UNSPECIFIED), + ) diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala index 9915c50..cfde32b 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala @@ -4,7 +4,7 @@ import eu.neverblink.jelly.cli.* import eu.neverblink.jelly.cli.command.rdf.RdfFormat.* import eu.ostrzyciel.jelly.convert.jena.riot.JellyLanguage import org.apache.jena.riot.system.StreamRDFWriter -import org.apache.jena.riot.{Lang, RDFParser} +import org.apache.jena.riot.{Lang, RDFParser, RIOT} import java.io.{InputStream, OutputStream} @@ -13,14 +13,25 @@ object RdfToJellyPrint extends RdfCommandPrintUtil[RdfFormat.Jena.Readable]: case class RdfToJellyOptions( @Recurse - common: JellyOptions = JellyOptions(), + common: JellyCommandOptions = JellyCommandOptions(), @ExtraName("to") outputFile: Option[String] = None, @ValueDescription("Input format.") @HelpMessage( RdfToJellyPrint.helpMsg, ) @ExtraName("in-format") inputFormat: Option[String] = None, -) extends HasJellyOptions + @Recurse + jellySerializationOptions: RdfJellySerializationOptions = RdfJellySerializationOptions(), + @HelpMessage( + "Target number of rows per frame – the writer may slightly exceed that. Default: 256", + ) + rowsPerFrame: Int = 256, + @HelpMessage( + "Whether to preserve explicit namespace declarations in the output (PREFIX: in Turtle). " + + "Default: false", + ) + enableNamespaceDeclarations: Boolean = false, +) extends HasJellyCommandOptions object RdfToJelly extends RdfCommand[RdfToJellyOptions, RdfFormat.Jena.Readable]: @@ -34,6 +45,8 @@ object RdfToJelly extends RdfCommand[RdfToJellyOptions, RdfFormat.Jena.Readable] langToJelly(RdfFormat.NQuads.jenaLang, _, _) override def doRun(options: RdfToJellyOptions, remainingArgs: RemainingArgs): Unit = + // Touch the options to make sure they are valid + options.jellySerializationOptions.asRdfStreamOptions val (inputStream, outputStream) = getIoStreamsFromOptions(remainingArgs.remaining.headOption, options.outputFile) parseFormatArgs( @@ -43,10 +56,10 @@ object RdfToJelly extends RdfCommand[RdfToJellyOptions, RdfFormat.Jena.Readable] remainingArgs.remaining.headOption, ) - override def matchToAction( - option: RdfFormat.Jena.Readable, + override def matchFormatToAction( + format: RdfFormat.Jena.Readable, ): Option[(InputStream, OutputStream) => Unit] = - Some(langToJelly(option.jenaLang, _, _)) + Some(langToJelly(format.jenaLang, _, _)) /** This method reads the file, rewrites it to Jelly and writes it to some output stream * @param jenaLang @@ -61,5 +74,20 @@ object RdfToJelly extends RdfCommand[RdfToJellyOptions, RdfFormat.Jena.Readable] inputStream: InputStream, outputStream: OutputStream, ): Unit = - val jellyWriter = StreamRDFWriter.getWriterStream(outputStream, JellyLanguage.JELLY) + // Configure the writer + val writerContext = RIOT.getContext.copy() + .set( + JellyLanguage.SYMBOL_STREAM_OPTIONS, + getOptions.jellySerializationOptions.asRdfStreamOptions, + ) + .set(JellyLanguage.SYMBOL_FRAME_SIZE, getOptions.rowsPerFrame) + .set( + JellyLanguage.SYMBOL_ENABLE_NAMESPACE_DECLARATIONS, + getOptions.enableNamespaceDeclarations, + ) + val jellyWriter = StreamRDFWriter.getWriterStream( + outputStream, + JellyLanguage.JELLY, + writerContext, + ) RDFParser.source(inputStream).lang(jenaLang).parse(jellyWriter) diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/RdfToJellySpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/RdfToJellySpec.scala deleted file mode 100644 index 3409902..0000000 --- a/src/test/scala/eu/neverblink/jelly/cli/command/RdfToJellySpec.scala +++ /dev/null @@ -1,88 +0,0 @@ -package eu.neverblink.jelly.cli.command - -import eu.neverblink.jelly.cli.{ExitException, InvalidFormatSpecified} -import eu.neverblink.jelly.cli.command.helpers.{DataGenHelper, TestFixtureHelper} -import eu.neverblink.jelly.cli.command.rdf.{RdfFormat, RdfToJelly, RdfToJellyPrint} -import eu.ostrzyciel.jelly.convert.jena.riot.JellyLanguage -import org.apache.jena.rdf.model.{Model, ModelFactory} -import org.scalatest.matchers.should.Matchers -import org.scalatest.wordspec.AnyWordSpec -import org.apache.jena.riot.RDFParser - -import java.io.{ByteArrayInputStream, FileInputStream, InputStream} -import scala.util.Using - -class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers: - - protected val testCardinality: Integer = 33 - - def translateJellyBack(inputStream: InputStream): Model = - Using(inputStream) { content => - val newModel = ModelFactory.createDefaultModel() - RDFParser.source(content).lang(JellyLanguage.JELLY).parse(newModel) - newModel - } match { - case scala.util.Success(value) => value - case scala.util.Failure(exception) => throw exception - } - - "rdf to-jelly command" should { - "handle conversion of NTriples to Jelly" when { - "a file to output stream" in withFullQuadFile { f => - val (out, err) = - RdfToJelly.runTestCommand(List("rdf", "to-jelly", f)) - val newIn = new ByteArrayInputStream(RdfToJelly.getOutBytes) - val content = translateJellyBack(newIn) - content.containsAll(DataGenHelper.generateTripleModel(testCardinality).listStatements()) - } - - "a file to file" in withFullQuadFile { f => - withEmptyJellyFile { j => - val (out, err) = - RdfToJelly.runTestCommand(List("rdf", "to-jelly", f, "--to", j)) - val content = translateJellyBack(new FileInputStream(j)) - content.containsAll(DataGenHelper.generateTripleModel(testCardinality).listStatements()) - } - } - "input stream to output stream" in { - val input = DataGenHelper.generateNQuadInputStream(testCardinality) - RdfToJelly.setStdIn(input) - val tripleModel = DataGenHelper.generateTripleModel(testCardinality) - val (out, err) = RdfToJelly.runTestCommand( - List("rdf", "to-jelly", "--in-format", RdfFormat.NQuads.cliOptions.head), - ) - val newIn = new ByteArrayInputStream(RdfToJelly.getOutBytes) - val content = translateJellyBack(newIn) - content.containsAll(tripleModel.listStatements()) - } - "an input stream to file" in withEmptyJellyFile { j => - val input = DataGenHelper.generateNQuadInputStream(testCardinality) - RdfToJelly.setStdIn(input) - val tripleModel = DataGenHelper.generateTripleModel(testCardinality) - val (out, err) = RdfToJelly.runTestCommand(List("rdf", "to-jelly", "--to", j)) - val content = translateJellyBack(new FileInputStream(j)) - content.containsAll(tripleModel.listStatements()) - } - } - "throw proper exception" when { - "invalid format is specified" in withFullQuadFile { f => - val exception = - intercept[ExitException] { - RdfToJelly.runTestCommand(List("rdf", "to-jelly", f, "--in-format", "invalid")) - } - val msg = InvalidFormatSpecified("invalid", RdfToJellyPrint.validFormatsString) - RdfToJelly.getErrString should include(msg.getMessage) - exception.code should be(1) - } - "invalid format out of existing is specified" in withFullQuadFile { f => - val exception = - intercept[ExitException] { - RdfToJelly.runTestCommand(List("rdf", "to-jelly", f, "--in-format", "jelly-text")) - } - val msg = InvalidFormatSpecified("jelly-text", RdfToJellyPrint.validFormatsString) - RdfToJelly.getErrString should include(msg.getMessage) - exception.code should be(1) - } - - } - } diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/RdfFromJellySpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJellySpec.scala similarity index 99% rename from src/test/scala/eu/neverblink/jelly/cli/command/RdfFromJellySpec.scala rename to src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJellySpec.scala index aa58018..6212d2c 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/RdfFromJellySpec.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJellySpec.scala @@ -1,14 +1,13 @@ -package eu.neverblink.jelly.cli.command +package eu.neverblink.jelly.cli.command.rdf import com.google.protobuf.InvalidProtocolBufferException import eu.neverblink.jelly.cli.* import eu.neverblink.jelly.cli.command.helpers.* -import eu.neverblink.jelly.cli.command.rdf.* import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec -import java.nio.file.{Files, Paths} import java.nio.file.attribute.PosixFilePermissions +import java.nio.file.{Files, Paths} import scala.io.Source import scala.util.Using diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala new file mode 100644 index 0000000..3b02271 --- /dev/null +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala @@ -0,0 +1,227 @@ +package eu.neverblink.jelly.cli.command.rdf + +import eu.neverblink.jelly.cli.command.helpers.{DataGenHelper, TestFixtureHelper} +import eu.neverblink.jelly.cli.{ExitException, InvalidArgument, InvalidFormatSpecified} +import eu.ostrzyciel.jelly.convert.jena.riot.JellyLanguage +import eu.ostrzyciel.jelly.core.JellyOptions +import eu.ostrzyciel.jelly.core.proto.v1.{LogicalStreamType, RdfStreamFrame} +import org.apache.jena.rdf.model.{Model, ModelFactory} +import org.apache.jena.riot.RDFParser +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import java.io.{ByteArrayInputStream, FileInputStream, InputStream} +import scala.util.Using + +class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers: + + protected val testCardinality: Integer = 33 + + def translateJellyBack(inputStream: InputStream): Model = + Using(inputStream) { content => + val newModel = ModelFactory.createDefaultModel() + RDFParser.source(content).lang(JellyLanguage.JELLY).parse(newModel) + newModel + } match { + case scala.util.Success(value) => value + case scala.util.Failure(exception) => throw exception + } + + def readJellyFile(inputStream: InputStream): Seq[RdfStreamFrame] = + Using(inputStream) { content => + Iterator.continually(RdfStreamFrame.parseDelimitedFrom(content)) + .takeWhile(_.nonEmpty) + .map(_.get) + .toSeq + } match { + case scala.util.Success(value) => value + case scala.util.Failure(exception) => throw exception + } + + "rdf to-jelly command" should { + "handle conversion of NTriples to Jelly" when { + "a file to output stream" in withFullQuadFile { f => + val (out, err) = + RdfToJelly.runTestCommand(List("rdf", "to-jelly", f)) + val newIn = new ByteArrayInputStream(RdfToJelly.getOutBytes) + val content = translateJellyBack(newIn) + content.containsAll(DataGenHelper.generateTripleModel(testCardinality).listStatements()) + } + + "a file to file" in withFullQuadFile { f => + withEmptyJellyFile { j => + val (out, err) = + RdfToJelly.runTestCommand(List("rdf", "to-jelly", "--to", j, f)) + val content = translateJellyBack(new FileInputStream(j)) + content.containsAll(DataGenHelper.generateTripleModel(testCardinality).listStatements()) + } + } + + "input stream to output stream" in { + val input = DataGenHelper.generateNQuadInputStream(testCardinality) + RdfToJelly.setStdIn(input) + val tripleModel = DataGenHelper.generateTripleModel(testCardinality) + val (out, err) = RdfToJelly.runTestCommand( + List("rdf", "to-jelly", "--in-format", RdfFormat.NQuads.cliOptions.head), + ) + val newIn = new ByteArrayInputStream(RdfToJelly.getOutBytes) + val content = translateJellyBack(newIn) + content.containsAll(tripleModel.listStatements()) + } + + "an input stream to file" in withEmptyJellyFile { j => + val input = DataGenHelper.generateNQuadInputStream(testCardinality) + RdfToJelly.setStdIn(input) + val tripleModel = DataGenHelper.generateTripleModel(testCardinality) + val (out, err) = RdfToJelly.runTestCommand(List("rdf", "to-jelly", "--to", j)) + val content = translateJellyBack(new FileInputStream(j)) + content.containsAll(tripleModel.listStatements()) + } + + "a file to file, modified stream options" in withFullQuadFile { f => + withEmptyJellyFile { j => + val (out, err) = + RdfToJelly.runTestCommand( + List( + "rdf", + "to-jelly", + f, + "--opt.stream-name=testName", + "--opt.generalized-statements=false", + "--opt.rdf-star=false", + "--opt.max-name-table-size=100", + "--opt.max-prefix-table-size=100", + "--opt.max-datatype-table-size=100", + "--opt.logical-type=FLAT_QUADS", + "--to", + j, + ), + ) + val content = translateJellyBack(new FileInputStream(j)) + content.containsAll(DataGenHelper.generateTripleModel(testCardinality).listStatements()) + val frames = readJellyFile(new FileInputStream(j)) + val opts = frames.head.rows.head.row.options + opts.streamName should be("testName") + opts.generalizedStatements should be(false) + opts.rdfStar should be(false) + opts.maxNameTableSize should be(100) + opts.maxPrefixTableSize should be(100) + opts.maxDatatypeTableSize should be(100) + opts.logicalType should be(LogicalStreamType.FLAT_QUADS) + opts.version should be(1) + } + } + + "a file to file, modified logical type with full IRI" in withFullQuadFile { f => + withEmptyJellyFile { j => + val (out, err) = + RdfToJelly.runTestCommand( + List( + "rdf", + "to-jelly", + f, + "--opt.logical-type=https://w3id.org/stax/ontology#flatQuadStream", + "--to", + j, + ), + ) + val content = translateJellyBack(new FileInputStream(j)) + content.containsAll(DataGenHelper.generateTripleModel(testCardinality).listStatements()) + val frames = readJellyFile(new FileInputStream(j)) + val opts = frames.head.rows.head.row.options + opts.streamName should be("") + opts.generalizedStatements should be(true) + opts.rdfStar should be(true) + opts.maxNameTableSize should be(JellyOptions.bigStrict.maxNameTableSize) + opts.maxPrefixTableSize should be(JellyOptions.bigStrict.maxPrefixTableSize) + opts.maxDatatypeTableSize should be(JellyOptions.bigStrict.maxDatatypeTableSize) + opts.logicalType should be(LogicalStreamType.FLAT_QUADS) + opts.version should be(1) + } + } + + "a file to file, lowered number of rows per frame" in withFullQuadFile { f => + withEmptyJellyFile { j => + val (out, err) = + RdfToJelly.runTestCommand( + List( + "rdf", + "to-jelly", + f, + "--rows-per-frame=10", + "--to", + j, + ), + ) + val content = translateJellyBack(new FileInputStream(j)) + content.containsAll(DataGenHelper.generateTripleModel(testCardinality).listStatements()) + val frames = readJellyFile(new FileInputStream(j)) + frames.size should be > 3 + for frame <- frames do + // The encoder may slightly overshoot the target if it needs to pack the lookup entries + // together with the triple. + frame.rows.size should be <= 15 + } + } + + "a file to file, enabled namespace declarations" in withFullQuadFile { f => + withEmptyJellyFile { j => + val (out, err) = + RdfToJelly.runTestCommand( + List( + "rdf", + "to-jelly", + f, + "--enable-namespace-declarations", + "--to", + j, + ), + ) + val content = translateJellyBack(new FileInputStream(j)) + content.containsAll(DataGenHelper.generateTripleModel(testCardinality).listStatements()) + // Note: no actual namespace declarations are present in the test data, because it's + // N-Quads. + // TODO: test if the namespace declarations are preserved with Turtle or RDF/XML input. + val frames = readJellyFile(new FileInputStream(j)) + val opts = frames.head.rows.head.row.options + opts.version should be(2) + } + } + } + + "throw proper exception" when { + "invalid format is specified" in withFullQuadFile { f => + val e = + intercept[ExitException] { + RdfToJelly.runTestCommand(List("rdf", "to-jelly", f, "--in-format", "invalid")) + } + e.code should be(1) + e.cause.get shouldBe a[InvalidFormatSpecified] + val cause = e.cause.get.asInstanceOf[InvalidFormatSpecified] + cause.validFormats should be(RdfToJellyPrint.validFormatsString) + cause.format should be("invalid") + } + "invalid format out of existing is specified" in withFullQuadFile { f => + val e = + intercept[ExitException] { + RdfToJelly.runTestCommand(List("rdf", "to-jelly", f, "--in-format", "jelly-text")) + } + e.code should be(1) + e.cause.get shouldBe a[InvalidFormatSpecified] + val cause = e.cause.get.asInstanceOf[InvalidFormatSpecified] + cause.validFormats should be(RdfToJellyPrint.validFormatsString) + cause.format should be("jelly-text") + } + "invalid logical stream type is specified" in withFullQuadFile { f => + val e = + intercept[ExitException] { + RdfToJelly.runTestCommand(List("rdf", "to-jelly", f, "--opt.logical-type", "test")) + } + e.cause.get shouldBe a[InvalidArgument] + val cause = e.cause.get.asInstanceOf[InvalidArgument] + cause.argument should be("--opt.logical-type") + cause.argumentValue should be("test") + e.code should be(1) + } + } + }