In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 11 11:20:27 2021

@author: erce
"""
import pathlib
# Get the current path
currentPath = pathlib.Path().absolute()
import sys
sys.path.insert(0, str(currentPath) + '/..')
import pysansa
import pyspark as ps
from pyspark import SparkContext, SparkConf
from pysansa.rdf.rdf import Rdf
from pysansa.query.query import Query
import findspark
findspark.init()

In [2]:
"""
Creating SparkConfig, SparkContext and SparkSession
SparkContext uses our SANSA-Stack jar with dependencies included
"""
# Spark Session and Config
conf = SparkConf().set("spark.jars", str(currentPath) + "../../pysansa/myjars/SANSA_all_dep_NO_spark.jar") 
sc = SparkContext.getOrCreate(conf=conf)
# Spark object
spark = sc._jvm.org.apache.spark.sql.SparkSession.builder().master("local") \
                        .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \
                        .config("spark.sql.legacy.allowUntypedScalaUDF", "true").appName("SansaRDF").getOrCreate()


In [3]:
"""
Creating Rdf Object from SANSA-Stack RDF Python Wrapper
"""
# Rdf object
rdf = Rdf(sc)
# Initialize Rdf Reader
rdfReader = rdf.initializeRdfReader(spark)
# Read triples from the given path
triples = rdf.readTriples(rdfReader, path = 'file:///' + str(currentPath) + '../../data/rdf.nt')

In [4]:
"""
Running some examples with RDF Python Wrapper
"""
# Count triples from the object
size = rdf.count()
# Print size of triples
print("Size of triples: " + str(size))
# Get triples as array
triples = rdf.getTriples(30)
# Get triples from Rdf object
triples = rdf.triples

Size of triples: 106


In [5]:
"""
Running some examples with Query Python Wrapper
"""
# Create Query object
query = Query(sc)
# Set and create Sparqlify executor object
query.setTriplesToSparqlifyExecutor(triples)
# An example query
query1 = "SELECT * WHERE {?s ?p ?o} LIMIT 106"
# Run the query and return the result object
result = query.runQueryOnSparqlify(query1)
# Print the result to the console as a table
query.show(result)

In [6]:
# Print possible usable functions of the result object
query.printAttributes(result)

['$anonfun$checkpoint$1', '$anonfun$checkpoint$2', '$anonfun$collect$1', '$anonfun$collectAsArrowToPython$1', '$anonfun$collectAsArrowToPython$1$adapted', '$anonfun$collectAsArrowToPython$2', '$anonfun$collectAsArrowToPython$2$adapted', '$anonfun$collectAsArrowToPython$3', '$anonfun$collectAsArrowToPython$3$adapted', '$anonfun$collectAsArrowToPython$4', '$anonfun$collectAsArrowToPython$5', '$anonfun$collectAsArrowToPython$6', '$anonfun$collectAsArrowToPython$7', '$anonfun$collectAsArrowToPython$8', '$anonfun$collectAsArrowToPython$9', '$anonfun$collectAsArrowToPython$9$adapted', '$anonfun$collectAsArrowToR$1', '$anonfun$collectAsArrowToR$1$adapted', '$anonfun$collectAsArrowToR$2', '$anonfun$collectAsArrowToR$2$adapted', '$anonfun$collectAsArrowToR$3', '$anonfun$collectAsArrowToR$4', '$anonfun$collectAsArrowToR$4$adapted', '$anonfun$collectAsList$1', '$anonfun$collectToPython$1', '$anonfun$collectToPython$2', '$anonfun$columns$1', '$anonfun$count$1', '$anonfun$count$1$adapted', '$anonfu

In [7]:
# Convert the result to DataFrame
dfResult = query.convertToDataFrame(result)
# Get 10 rows from the DataFrame
dfResultArray = query.takeFromDataFrame(dfResult, 10)
# Print the row array that was taken from the DataFrame
query.printDF(dfResultArray)


1. Element: [http://commons.dbpedia.org/property/width,null,null,http://commons.dbpedia.org/resource/Category:People,null,100.0,null,null]

2. Element: [http://commons.dbpedia.org/property/date,null,null,http://commons.dbpedia.org/resource/File:Buswachten.jpg,null,null,null,2004-07-22]

3. Element: [http://commons.dbpedia.org/property/date,null,null,http://commons.dbpedia.org/resource/File:Groninger-museum.jpg,null,null,null,2004-08-26]

4. Element: [http://commons.dbpedia.org/property/date,null,null,http://commons.dbpedia.org/resource/File:StationAssen3.jpg,null,null,null,2004-07-22]

5. Element: [http://commons.dbpedia.org/property/date,null,null,http://commons.dbpedia.org/resource/File:De_Slegte,_Groningen.jpg,null,null,null,2004-08-26]

6. Element: [http://commons.dbpedia.org/property/date,null,null,http://commons.dbpedia.org/resource/File:Paddestoel_003.jpg,null,null,null,2004-08-20]

7. Element: [http://commons.dbpedia.org/property/date,null,null,http://commons.dbpedia.org/resou

In [8]:

# Convert to JSON 
json = result.toJSON()
# Print JSON
json.show()
# Get the first JSON element
first = json.first()
# Get the 5 first element from the JSON
taken = json.take(5)
# Get the first element from the JSON Array
taken[0]
# Print the JSON array
# print(dir(taken))
# Get the first row of the DataFrame Array
firstRow = query.getRow(dfResultArray, 0)
# Get the first column of the first row
firstColumnOfFirstRow = query.getColumn(firstRow, 0)
# Get the third column of the first row
firstColumnOfFirstRow = query.getColumn(firstRow, 2)
# Get the fourth column of the first row
firstColumnOfFirstRow = query.getColumn(firstRow, 3)
# Get the second row of the DataFrame Array
secondRow = query.getRow(dfResultArray, 1)
# JSON of the second row
jsonOfSecondRow = secondRow.json()
# Print the JSON of the second row
print(jsonOfSecondRow)
# Print JSON Value
print(secondRow.jsonValue())
# Print Schema
print(secondRow.schema())
# Print pretty JSON
print(secondRow.prettyJson())
# Stop SparkContext to prevent overloading
sc.stop()

{"C_3":"http://commons.dbpedia.org/property/width","C_4":null,"C_5":null,"C_10":"http://commons.dbpedia.org/resource/Category:People","C_6":null,"C_7":"100.0","C_8":null,"C_9":null}
JObject(List((C_3,JString(http://commons.dbpedia.org/property/width)), (C_4,JNull), (C_5,JNull), (C_10,JString(http://commons.dbpedia.org/resource/Category:People)), (C_6,JNull), (C_7,JString(100.0)), (C_8,JNull), (C_9,JNull)))
StructType(StructField(C_3,StringType,false), StructField(C_4,StringType,true), StructField(C_5,StringType,true), StructField(C_10,StringType,true), StructField(C_6,StringType,true), StructField(C_7,StringType,true), StructField(C_8,StringType,true), StructField(C_9,DateType,true))
{
  "C_3" : "http://commons.dbpedia.org/property/width",
  "C_4" : null,
  "C_5" : null,
  "C_10" : "http://commons.dbpedia.org/resource/Category:People",
  "C_6" : null,
  "C_7" : "100.0",
  "C_8" : null,
  "C_9" : null
}
