In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 12 14:20:27 2021

@author: erce
"""
import pathlib
# Get the current path
currentPath = pathlib.Path().absolute()
import sys
sys.path.insert(0, str(currentPath) + '/..')
from pyspark import SparkContext, SparkConf
from pysansa.rdf.rdf import Rdf
import findspark
findspark.init()

In [2]:
"""
Creating SparkConfig, SparkContext and SparkSession
SparkContext uses our SANSA-Stack jar with dependencies included
"""
# Spark Session and Config
conf = SparkConf().set("spark.jars", str(currentPath) + "../../pysansa/myjars/SANSA_all_dep_NO_spark.jar") 
sc = SparkContext.getOrCreate(conf=conf)
# Spark object
spark = sc._jvm.org.apache.spark.sql.SparkSession.builder().master("local") \
                        .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \
                        .config("spark.sql.legacy.allowUntypedScalaUDF", "true").appName("SansaRDF").getOrCreate()


In [3]:
"""
Creating Rdf Object from SANSA-Stack RDF Python Wrapper
"""
# Rdf object
rdf = Rdf(sc)
# Initialize Rdf Reader
rdfReader = rdf.initializeRdfReader(spark)
# Read triples from the given path
triples = rdf.readTriples(rdfReader, path = 'file:///' + str(currentPath) + '../../data/rdf.nt')

In [4]:
"""
Running some examples with RDF Python Wrapper
"""
# Get triples as array
triples = rdf.getTriples(5)
# Print triples
rdf.printTriples(triples)


1. Element: http://commons.dbpedia.org/resource/Template:Cc-by-1.0 @http://commons.dbpedia.org/property/version "1"^^http://www.w3.org/2001/XMLSchema#integer

2. Element: http://commons.dbpedia.org/resource/Category:Events @http://commons.dbpedia.org/property/de "Ereignis, Veranstaltung."@en

3. Element: http://commons.dbpedia.org/resource/Category:Events @http://commons.dbpedia.org/property/en "Events"@en

4. Element: http://commons.dbpedia.org/resource/Category:Events @http://commons.dbpedia.org/property/fr "Événements."@en

5. Element: http://commons.dbpedia.org/resource/Template:Cc-by-sa-1.0 @http://commons.dbpedia.org/property/version "1"^^http://www.w3.org/2001/XMLSchema#integer


In [5]:
# Count triples from the object
size = rdf.count()
# Print size of triples
print("Size of triples: " + str(size))
# Print attributes of RDF/IO
rdf.printRdfIOAttributes()
# Print triple object attribute
rdf.printTripleObjectAttributes()
# Print Rdf class packages
rdf.printRdfClassPackageList()

Size of triples: 106
RDF IO Package methods: 
['RDFDataFrameReader', 'RDFDataFrameWriter', 'RDFQuadsWriter', 'RDFReader', 'RDFWriter', 'SaveMode$', 'fromRow', 'toRow']
RDF Triple methods: 
dict_keys(['io', 'mappings', 'model', 'ops', 'partition', 'qualityassesment', 'stats'])


In [6]:
"""
Example usage of different packages from Rdf class
io package RDFReader
"""
reader = rdf.packagesDict["io"].RDFReader(spark)
print("RDFReader class methods: ")
print(dir(reader))

RDFReader class methods: 
['$anonfun$nquads$1', '$anonfun$ntriples$4', '$anonfun$rdf$3', '$anonfun$rdfxml$2', '$anonfun$rdfxml$3', '$anonfun$trig$1', '$anonfun$trig$2', '$anonfun$trix$1', '$anonfun$trix$2', '$anonfun$turtle$2', '$anonfun$turtle$3', 'datasets', 'equals', 'getClass', 'hashCode', 'notify', 'notifyAll', 'nquads', 'nquads$default$1', 'ntriples', 'ntriples$default$1', 'rdf', 'rdfxml', 'toString', 'trig', 'trix', 'turtle', 'wait']


In [7]:
"""
Example usage of different packages from Rdf class
io package RDFWriter
"""
writer = rdf.packagesDict["io"].RDFWriter(rdf.triples)
print("RDFWriter class methods: ")
print(dir(writer))

RDFWriter class methods: 
['$anonfun$saveAsNTriplesFile$1', 'equals', 'getClass', 'hashCode', 'notify', 'notifyAll', 'saveAsNTriplesFile', 'saveAsNTriplesFile$default$2', 'saveAsNTriplesFile$default$3', 'toString', 'wait']


In [8]:
"""
Example usage of different packages from Rdf class
io package RDFWriter
"""
qualityassesment = rdf.packagesDict["qualityassesment"].QualityAssessmentOperations(rdf.triples)
print("QualityAssessmentOperations class methods: ")
print(dir(qualityassesment))
assesmentTriples = qualityassesment.assessAmountOfTriples()
print("Triples assesment: ", assesmentTriples)
assesmentCoverage = qualityassesment.assessCoverageDetail()
print("Coverage scope assesment: ", assesmentCoverage)
# Stop SparkContext to prevent overloading
sc.stop()

QualityAssessmentOperations class methods: 
['assessAmountOfTriples', 'assessCoverageDetail', 'assessCoverageScope', 'assessDereferenceableBackLinks', 'assessDereferenceableForwardLinks', 'assessDereferenceableUris', 'assessExtensionalConciseness', 'assessExternalSameAsLinks', 'assessHumanReadableLicense', 'assessInterlinkingCompleteness', 'assessLabeledResources', 'assessLiteralNumericRangeChecker', 'assessMachineReadableLicense', 'assessNoHashUris', 'assessPropertyCompleteness', 'assessQueryParamFreeURIs', 'assessSchemaCompleteness', 'assessShortURIs', 'assessXSDDatatypeCompatibleLiterals', 'equals', 'getClass', 'hashCode', 'notify', 'notifyAll', 'toString', 'wait']
Triples assesment:  0.0
Coverage scope assesment:  0.22641509433962265
