Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
 
 
Cannot retrieve contributors at this time
59 lines (42 sloc) 1.51 KB
package net.sansa_stack.examples.flink.rdf
import java.io.File
import scala.collection.mutable
import net.sansa_stack.rdf.flink.io._
import net.sansa_stack.rdf.flink.stats._
import org.apache.flink.api.scala.ExecutionEnvironment
import org.apache.jena.riot.Lang
object RDFStats {
def main(args: Array[String]) {
parser.parse(args, Config()) match {
case Some(config) =>
run(config.in, config.out)
case None =>
println(parser.usage)
}
}
def run(input: String, output: String): Unit = {
val rdf_stats_file = new File(input).getName
println("======================================")
println("| RDF Statistic example |")
println("======================================")
val env = ExecutionEnvironment.getExecutionEnvironment
val triples = env.rdf(Lang.NTRIPLES)(input)
// compute stats
val rdf_statistics = triples.stats
.voidify(rdf_stats_file, output)
}
case class Config(
in: String = "",
out: String = "")
// the CLI parser
val parser = new scopt.OptionParser[Config]("RDF Dataset Statistics Example") {
head("RDF Dataset Statistics Example")
opt[String]('i', "input").required().valueName("<path>").
action((x, c) => c.copy(in = x)).
text("path to file that contains the data (in N-Triples format)")
opt[String]('o', "out").required().valueName("<directory>").
action((x, c) => c.copy(out = x)).
text("the output directory")
help("help").text("prints this usage text")
}
}
You can’t perform that action at this time.