Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re-make of nicholascar's “Concise Bounded Description” PR #968 ... #1502

Merged
merged 1 commit into from Dec 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
53 changes: 53 additions & 0 deletions rdflib/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -1528,6 +1528,59 @@ def do_de_skolemize2(t):

return retval

def cbd(self, resource):
"""Retrieves the Concise Bounded Description of a Resource from a Graph

Concise Bounded Description (CBD) is defined in [1] as:

Given a particular node (the starting node) in a particular RDF graph (the source graph), a subgraph of that
particular graph, taken to comprise a concise bounded description of the resource denoted by the starting node,
can be identified as follows:

1. Include in the subgraph all statements in the source graph where the subject of the statement is the
starting node;

2. Recursively, for all statements identified in the subgraph thus far having a blank node object, include
in the subgraph all statements in the source graph where the subject of the statement is the blank node
in question and which are not already included in the subgraph.

3. Recursively, for all statements included in the subgraph thus far, for all reifications of each statement
in the source graph, include the concise bounded description beginning from the rdf:Statement node of
each reification.

This results in a subgraph where the object nodes are either URI references, literals, or blank nodes not
serving as the subject of any statement in the graph.

[1] https://www.w3.org/Submission/CBD/

:param resource: a URIRef object, of the Resource for queried for
:return: a Graph, subgraph of self

"""
subgraph = Graph()

def add_to_cbd(uri):
for s, p, o in self.triples((uri, None, None)):
subgraph.add((s, p, o))
# recurse 'down' through ll Blank Nodes
if type(o) == BNode and not (o, None, None) in subgraph:
add_to_cbd(o)

# for Rule 3 (reification)
# for any rdf:Statement in the graph with the given URI as the object of rdf:subject,
# get all triples with that rdf:Statement instance as subject

# find any subject s where the predicate is rdf:subject and this uri is the object
# (these subjects are of type rdf:Statement, given the domain of rdf:subject)
for s, p, o in self.triples((None, RDF.subject, uri)):
# find all triples with s as the subject and add these to the subgraph
for s2, p2, o2 in self.triples((s, None, None)):
subgraph.add((s2, p2, o2))

add_to_cbd(resource)

return subgraph


class ConjunctiveGraph(Graph):
"""A ConjunctiveGraph is an (unnamed) aggregation of all the named
Expand Down
111 changes: 111 additions & 0 deletions test/test_graph_cbd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import pytest
from rdflib import Graph, Namespace


"""Tests the Graph class' cbd() function"""

EX = Namespace("http://ex/")


@pytest.fixture
def get_graph():
g = Graph()
# adding example data for testing
g.parse(
data="""
PREFIX ex: <http://ex/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

ex:R1
a rdf:Resource ;
ex:hasChild ex:R2 , ex:R3 .

ex:R2
ex:propOne ex:P1 ;
ex:propTwo ex:P2 .

ex:R3
ex:propOne ex:P3 ;
ex:propTwo ex:P4 ;
ex:propThree [
a rdf:Resource ;
ex:propFour "Some Literal" ;
ex:propFive ex:P5 ;
ex:propSix [
ex:propSeven ex:P7 ;
] ;
] .
""",
format="turtle",
)

g.bind("ex", EX)
yield g
g.close()


def testCbd(get_graph):
g = get_graph
assert len(g.cbd(EX.R1)) == 3, "cbd() for R1 should return 3 triples"

assert len(g.cbd(EX.R2)) == 2, "cbd() for R3 should return 2 triples"

assert len(g.cbd(EX.R3)) == 8, "cbd() for R3 should return 8 triples"

assert len(g.cbd(EX.R4)) == 0, "cbd() for R4 should return 0 triples"


def testCbdReified(get_graph):
g = get_graph
# add some reified triples to the testing graph
g.parse(
data="""
PREFIX ex: <http://ex/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

ex:R5
ex:propOne ex:P1 ;
ex:propTwo ex:P2 ;
ex:propRei ex:Pre1 .

ex:S
a rdf:Statement ;
rdf:subject ex:R5 ;
rdf:predicate ex:propRei ;
rdf:object ex:Pre1 ;
ex:otherReiProp ex:Pre2 .
""",
format="turtle",
)

# this cbd() call should get the 3 basic triples with ex:R5 as subject as well as 5 more from the reified
# statement
assert len(g.cbd(EX.R5)) == (3 + 5), "cbd() for R5 should return 8 triples"

# add crazy reified triples to the testing graph
g.parse(
data="""
PREFIX ex: <http://ex/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
ex:R6
ex:propOne ex:P1 ;
ex:propTwo ex:P2 ;
ex:propRei ex:Pre1 .
ex:S1
a rdf:Statement ;
rdf:subject ex:R6 ;
rdf:predicate ex:propRei ;
rdf:object ex:Pre1 ;
ex:otherReiProp ex:Pre3 .

ex:S2
rdf:subject ex:R6 ;
rdf:predicate ex:propRei2 ;
rdf:object ex:Pre2 ;
ex:otherReiProp ex:Pre4 ;
ex:otherReiProp ex:Pre5 .
""",
format="turtle",
)

assert len(g.cbd(EX.R6)) == (3 + 5 + 5), "cbd() for R6 should return 12 triples"