This repository has been archived by the owner on Oct 8, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 13
/
RDFQualityAssessment.scala
92 lines (69 loc) · 3.24 KB
/
RDFQualityAssessment.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
package net.sansa_stack.examples.spark.rdf
import java.io.File
import scala.collection.mutable
import net.sansa_stack.rdf.spark.io._
import net.sansa_stack.rdf.spark.qualityassessment._
import org.apache.jena.riot.Lang
import org.apache.spark.sql.SparkSession
object RDFQualityAssessment {
def main(args: Array[String]) {
parser.parse(args, Config()) match {
case Some(config) =>
run(config.in, config.out)
case None =>
println(parser.usage)
}
}
def run(input: String, output: String): Unit = {
val rdf_quality_file = new File(input).getName
val spark = SparkSession.builder
.appName(s"RDF Quality Assessment Example $rdf_quality_file")
.master("local[*]")
.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
.getOrCreate()
println("======================================")
println("| RDF Quality Assessment Example |")
println("======================================")
val lang = Lang.NTRIPLES
val triples = spark.rdf(lang)(input)
// compute quality assessment
val completeness_schema = triples.assessSchemaCompleteness()
val completeness_interlinking = triples.assessInterlinkingCompleteness()
val completeness_property = triples.assessPropertyCompleteness()
val syntacticvalidity_literalnumeric = triples.assessLiteralNumericRangeChecker()
val syntacticvalidity_XSDDatatypeCompatibleLiterals = triples.assessXSDDatatypeCompatibleLiterals()
val availability_DereferenceableUris = triples.assessDereferenceableUris()
val relevancy_CoverageDetail = triples.assessCoverageDetail()
val relevancy_CoverageScope = triples.assessCoverageScope()
val relevancy_AmountOfTriples = triples.assessAmountOfTriples()
val performance_NoHashURIs = triples.assessNoHashUris()
val understandability_LabeledResources = triples.assessLabeledResources()
val AssessQualityStr = s"""
completeness_schema:$completeness_schema
completeness_interlinking:$completeness_interlinking
completeness_property:$completeness_property
syntacticvalidity_literalnumeric:$syntacticvalidity_literalnumeric
syntacticvalidity_XSDDatatypeCompatibleLiterals:$syntacticvalidity_XSDDatatypeCompatibleLiterals
availability_DereferenceableUris:$availability_DereferenceableUris
relevancy_CoverageDetail:$relevancy_CoverageDetail
relevancy_CoverageScope:$relevancy_CoverageScope
relevancy_AmountOfTriples:$relevancy_AmountOfTriples
performance_NoHashURIs:$performance_NoHashURIs
understandability_LabeledResources:$understandability_LabeledResources
"""
println(s"\n AssessQuality for $rdf_quality_file :\n $AssessQualityStr")
}
case class Config(
in: String = "",
out: String = "")
val parser = new scopt.OptionParser[Config]("RDF Quality Assessment Example") {
head("RDF Quality Assessment Example")
opt[String]('i', "input").required().valueName("<path>").
action((x, c) => c.copy(in = x)).
text("path to file that contains the data (in N-Triples format)")
opt[String]('o', "out").required().valueName("<directory>").
action((x, c) => c.copy(out = x)).
text("the output directory")
help("help").text("prints this usage text")
}
}