From bfd52b2fa46fcd30e813b301ecababc538ed1bb2 Mon Sep 17 00:00:00 2001 From: Claus Stadler Date: Fri, 2 Jul 2021 21:55:11 +0200 Subject: [PATCH] Added support for Lang attribute to RdfSourceFactory. Upgraded source maven plugin version. --- pom.xml | 2 +- .../spark/io/input/api/RdfSourceFactory.java | 21 +++++++++--- .../io/input/impl/RdfSourceFactoryImpl.java | 34 +++++++------------ 3 files changed, 31 insertions(+), 26 deletions(-) diff --git a/pom.xml b/pom.xml index e8bd16707..4deac90a4 100644 --- a/pom.xml +++ b/pom.xml @@ -1722,7 +1722,7 @@ org.apache.maven.plugins maven-source-plugin - 3.0.1 + 3.2.1 attach-sources diff --git a/sansa-rdf/sansa-rdf-spark/src/main/java/net/sansa_stack/rdf/spark/io/input/api/RdfSourceFactory.java b/sansa-rdf/sansa-rdf-spark/src/main/java/net/sansa_stack/rdf/spark/io/input/api/RdfSourceFactory.java index b43dfcd37..adb3c6518 100644 --- a/sansa-rdf/sansa-rdf-spark/src/main/java/net/sansa_stack/rdf/spark/io/input/api/RdfSourceFactory.java +++ b/sansa-rdf/sansa-rdf-spark/src/main/java/net/sansa_stack/rdf/spark/io/input/api/RdfSourceFactory.java @@ -2,6 +2,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.jena.riot.Lang; /** @@ -16,14 +17,26 @@ public interface RdfSourceFactory { default RdfSource get(String sourceStr) { + return get(sourceStr, (Lang) null); + } + + default RdfSource get(String sourceStr, Lang lang) { + Path path = new Path(sourceStr); + return get(path, null, lang); + } + + default RdfSource get(String sourceStr, FileSystem fileSystem) { + Path path = new Path(sourceStr); + return get(path, fileSystem, null); + } + + default RdfSource get(Path path, FileSystem fileSystem, Lang lang) { try { - return create(sourceStr); + return create(path, fileSystem, lang); } catch (Exception e) { throw new RuntimeException(e); } } - RdfSource create(String sourceStr) throws Exception; - RdfSource create(String sourceStr, FileSystem fileSystem) throws Exception; - RdfSource create(Path path, FileSystem fileSystem) throws Exception; + RdfSource create(Path path, FileSystem fileSystem, Lang lang) throws Exception; } diff --git a/sansa-rdf/sansa-rdf-spark/src/main/java/net/sansa_stack/rdf/spark/io/input/impl/RdfSourceFactoryImpl.java b/sansa-rdf/sansa-rdf-spark/src/main/java/net/sansa_stack/rdf/spark/io/input/impl/RdfSourceFactoryImpl.java index 581ca1545..4583f5b40 100644 --- a/sansa-rdf/sansa-rdf-spark/src/main/java/net/sansa_stack/rdf/spark/io/input/impl/RdfSourceFactoryImpl.java +++ b/sansa-rdf/sansa-rdf-spark/src/main/java/net/sansa_stack/rdf/spark/io/input/impl/RdfSourceFactoryImpl.java @@ -36,33 +36,25 @@ public static RdfSourceFactory from(SparkSession sparkSession) { return new RdfSourceFactoryImpl(sparkSession); } - - public RdfSource create(String sourceStr) throws Exception { - Configuration hadoopConf = sparkSession.sparkContext().hadoopConfiguration(); - FileSystem fileSystem = FileSystem.get(hadoopConf); - return create(sourceStr, fileSystem); - } - - @Override - public RdfSource create(String sourceStr, FileSystem fileSystem) throws Exception { - Path path = new Path(sourceStr); + public RdfSource create(Path path, FileSystem fileSystem, Lang lang) throws Exception { - return create(path, fileSystem); - } + if (fileSystem == null) { + Configuration hadoopConf = sparkSession.sparkContext().hadoopConfiguration(); + fileSystem = FileSystem.get(hadoopConf); + } - @Override - public RdfSource create(Path path, FileSystem fileSystem) throws Exception { Path resolvedPath = fileSystem.resolvePath(path); - EntityInfo entityInfo; - try (InputStream in = fileSystem.open(resolvedPath)) { - entityInfo = RDFDataMgrEx.probeEntityInfo(in, RDFDataMgrEx.DEFAULT_PROBE_LANGS); - } + if (lang == null) { + EntityInfo entityInfo; + try (InputStream in = fileSystem.open(resolvedPath)) { + entityInfo = RDFDataMgrEx.probeEntityInfo(in, RDFDataMgrEx.DEFAULT_PROBE_LANGS); + } + lang = RDFLanguages.contentTypeToLang(entityInfo.getContentType()); - Lang lang = RDFLanguages.contentTypeToLang(entityInfo.getContentType()); - - Objects.requireNonNull(lang, "Could not obtain lang for " + entityInfo.getContentType() + " from " + path); + Objects.requireNonNull(lang, "Could not obtain lang for " + entityInfo.getContentType() + " from " + path); + } return new RdfSourceImpl(sparkSession, resolvedPath, lang); }