In [1]:
# Importing CSS
from IPython.core.display import HTML
def css_styling():
    styles = open("./styles.css", "r").read()
    return HTML(styles)
css_styling()

__Omics Technologien - Tutorial__

***

# Exercise 6: Building and manipulating a RDF graph

<div class="logos">
    <div>
        <img src="./figures/UniversitÃ¤t_Bielefeld.png"/>
    </div>
    <div>
        <img src="./figures/isaslogooffizielleform100k.jpg"/>
    </div>
</div>

***

Robert Heyer, Kay Schallert, Maximilian Wolf

__Content__

- Retrieving and manipulating RDF subgraphs
 
__Aim__

- Application of SPARQL to build local RDFs

# 1) Types of SPARQL queries:
***
- read oritented query types (different ways to present results):
    - `SELECT` (Returns all, or a subset of, the variables bound in a query pattern match.)
    - `CONSTRUCT` (Returns an RDF graph constructed by substituting variables in a set of triple templates.)
    - `DESCRIBE` (Returns an RDF graph that describes the resources found.)
    - `ASK` (Returns a boolean indicating whether a query pattern matches or not.)
- write-oriented query types:
    - `INSERT`
    - `DELETE`
    
    https://medium.com/virtuoso-blog/what-is-a-sparql-endpoint-and-why-is-it-important-b3c9e6a20a8b

Updating/Manipulating of existing online RDF structures of course relies on permissions to do so. <br>
But RDFs can of course also be updated and saved locally.<br>
__Motivation of retrieving smaller RDF graphs:__
- reduce time for querying
- contraint on important informations and improve clarity
- promote exploration
- updating vocabulary (persistent uri vs uri?)

In [2]:
import sparql_dataframe
from rdflib import Graph
import SPARQLWrapper 

# 1.1) RDFLib and SPARQLWrapper

- RDFLib is a Python package for working with RDF e.g., including parsers and serialziers for different formats.<br>
 [https://rdflib.readthedocs.io/en/stable/gettingstarted.html]
- SPARQLWrapper is a simple Python wrapper around a SPARQL service to remotely execute your queries <br>
 [https://sparqlwrapper.readthedocs.io/en/latest/main.html]


In [3]:
# Create a Graph
g = Graph()

# Parse in an RDF file hosted on the Internet
g.parse("http://dbpedia.org/resource/Aragorn")

# Print the number of "triples" in the Graph
print(len(g))

# Save the entire Graph in the RDF Turtle format
g.serialize(destination="Aragorn.ttl")

# But how can we retrieve graphs through SPARQL queries? 

332


<Graph identifier=N6e93ab3d6a1341dcb78d977823765e2e (<class 'rdflib.graph.Graph'>)>

# 2) `DESCRIBE`
***
- SPARQL `DESCRIBE` queries return a sequence of triple values (in contrast to for instance select queries)
- `DESCRIBE` query result returns RDF graphs that describe one or more of the given resources.
- prior knowledge about data structure not needed

In [4]:
from SPARQLWrapper import SPARQLWrapper

sparql = SPARQLWrapper("http://dbpedia.org/sparql")

sparql.setQuery("""

    DESCRIBE dbr:Aragorn

""")

results = sparql.queryAndConvert()
print(len(results))

596


In [5]:
sparql = SPARQLWrapper("http://dbpedia.org/sparql")

sparql.setQuery("""

    DESCRIBE ?s 
    WHERE {
    ?s rdfs:label "Aragorn"@en.
    }

""")

results = sparql.queryAndConvert()
results.serialize(destination="AragornDescribe.ttl")
print(len(results))

596


# 3) `CONSTRUCT`
***
- `CONSTRUCT` queries take each solution and substitute it for the variables in the graph or triple template
- `WHERE` clause matches pattern and return variables, which are used to build graph described in the `CONTRUCT` clause


In [6]:
# Match all triples
sparql = SPARQLWrapper("http://dbpedia.org/sparql")

sparql.setQuery("""

    CONSTRUCT {
       ?s ?p ?o .
       }
    WHERE {
    ?s ?p ?o.
    {?s rdfs:label "Aragorn"@en.} UNION
    {?o rdfs:label "Aragorn"@en.}}
""")

results = sparql.queryAndConvert()
results.serialize(destination="AragornConstruct.ttl")
print(len(results))

574


## 3.1) `CONSTRUCT` using nested `SELECT` clause

In [7]:
sparql = SPARQLWrapper("http://dbpedia.org/sparql")

sparql.setQuery("""
    CONSTRUCT {
       ?s <knows> ?o .
       }
    WHERE {
        SELECT ?s ?o
        WHERE
        {?s rdfs:label "Aragorn"@en.
        ?s dbo:spouse ?o .}
        }
""")

results = sparql.queryAndConvert()
results.serialize(destination="AragornConstruct.ttl")
print(len(results))

1


## 3.2) `CONSTRUCT` using more than one pattern

In [8]:
sparql = SPARQLWrapper("http://dbpedia.org/sparql")

sparql.setQuery("""
    CONSTRUCT {
       ?s <knows> ?o .
       ?s <knows> ?g
       }
    WHERE {
        SELECT ?s ?o ?g
        WHERE
        {?s rdfs:label "Aragorn"@en.
        ?s dbo:spouse ?o .
        ?g rdfs:label "Gandalf"@en. }
        }
""")

results = sparql.queryAndConvert()
results.serialize(destination="AragornConstruct.ttl")
print(len(results))

2


## 3.3) `CONSTRUCT` using nested `SELECT` and `FILTER` clause

In [9]:
sparql = SPARQLWrapper("http://dbpedia.org/sparql")

sparql.setQuery("""
    CONSTRUCT {
       ?s <knows> ?o .
       }
    WHERE {
        SELECT ?s ?o
        WHERE
        {?s rdfs:label "Aragorn"@en.
        ?s dbo:wikiPageWikiLink ?o .}
        }
""")

results = sparql.queryAndConvert()
results.serialize(destination="AragornLinks.ttl")
print(len(results))

168


# 4) `ASK` query
***
- determines whether a particular triple pattern exists in the specified data set
- returns true or false, depending on whether the solution or match exists

In [10]:
g = Graph()
g.parse("./AragornDescribe.ttl") #try: dbr:MaximilianWolf

ask_query = """
    prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    prefix dbr: <http://dbpedia.org/resource/>
    prefix dbo: <http://dbpedia.org/ontology/>
    ASK {dbr:Aragorn dbo:wikiPageWikiLink dbr:Gandalf}
    """

qres = g.query(ask_query)
print(bool(qres))

True


# 5) Deleting and Inserting Triples:
***
- `DELETE DATA` / `INSERT DATA`
    - deletion of ground triples data (must not contain varaibles / blank nodes)

- `DELETE` / `INSERT`
    - insertion or deletion of data spcified with a WHERE clause

***
## 5.1) `INSERT DATA`

In [11]:
g = Graph()
g.parse("./AragornLinks.ttl")
print(len(g))
g.update("""
    prefix dbr: <http://dbpedia.org/resource/>
    prefix dbo: <http://dbpedia.org/ontology/>
    prefix smth: <http://example.org/>
    INSERT DATA { dbr:Aragorn  dbo:wikiPageWikiLink smth:MaximilianWolf }""")

print(len(g))
g.serialize(destination="AragornLinks.ttl")

168
169


<Graph identifier=Nf715eece7ffc4db0b2f0e6b6cd850b22 (<class 'rdflib.graph.Graph'>)>

In [12]:
g = Graph()
g.parse("./AragornLinks.ttl")
print(len(g))
g.update("""
    prefix dbr: <http://dbpedia.org/resource/>
    prefix dbo: <http://dbpedia.org/ontology/>
    prefix smth: <http://example.org/>
    INSERT DATA { dbr:Aragorn  a dbr:King }""")

print(len(g))
g.serialize(destination="AragornLinks.ttl")

169
170


<Graph identifier=Ne90feb4e52a54d5d977a2d24e7857ece (<class 'rdflib.graph.Graph'>)>

## 5.2) `DELETE DATA`

In [13]:
g = Graph()
g.parse("./AragornLinks.ttl")
print(len(g))
g.update("""
    prefix dbr: <http://dbpedia.org/resource/>
    prefix dbo: <http://dbpedia.org/ontology/>
    prefix smth: <http://example.org/>
    DELETE DATA { dbr:Aragorn  dbo:wikiPageWikiLink smth:MaximilianWolf }""")

print(len(g)) 
g.serialize(destination="AragornLinks.ttl")

170
169


<Graph identifier=N677127993b514b8c98562d375e4fe296 (<class 'rdflib.graph.Graph'>)>

In [14]:
g = Graph()
g.parse("./AragornLinks.ttl")
print(len(g))
g.update("""
    INSERT { <z:> <q:> <p> }
    WHERE {
    <z:> a <c:>
    }""")

print(len(g)) 
g.serialize(destination="AragornLinks.ttl")

169
169


<Graph identifier=N591b675a9cdf4b1fa35091f275d5dc00 (<class 'rdflib.graph.Graph'>)>

In [15]:
# DELETE WHERE
g = Graph()
g.parse("./AragornLinks.ttl")
print(len(g))
g.update("""
    prefix dbr: <http://dbpedia.org/resource/>
    prefix dbo: <http://dbpedia.org/ontology/>
    DELETE { ?person  ?property ?value }
    WHERE { ?person ?property ?value;
        a dbr:King.
        }""") 

print(len(g)) 
g.serialize(destination="AragornLinks.ttl")

169
0


<Graph identifier=N40bb64d8e7ad428caab63be6525032dd (<class 'rdflib.graph.Graph'>)>