Skip to content

Commit

Permalink
fix: bug with SELECT * inside SELECT * (#2190)
Browse files Browse the repository at this point in the history
Fixing use of `SELECT *` in sub-select within `SELECT *` parent query as discovered in #1722.

Now when an instance of `SELECT *` is encountered, the query tree/plan builder now correctly considers the projected variables of any sub-select statements when deciding which variables should be projected out. 

Fixes <#1722>.
  • Loading branch information
robons committed Jan 29, 2023
1 parent fbb8279 commit c5c16df
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 1 deletion.
23 changes: 23 additions & 0 deletions rdflib/plugins/sparql/algebra.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,7 @@ def _findVars(x, res: Set[Variable]) -> Optional[CompValue]: # type: ignore[ret
elif x.name == "SubSelect":
if x.projection:
res.update(v.var or v.evar for v in x.projection)

return x


Expand Down Expand Up @@ -637,6 +638,7 @@ def translate(q: CompValue) -> Tuple[CompValue, List[Variable]]:
traverse(q.where, functools.partial(_findVars, res=VS))

# all query types have a where part
# depth-first recursive generation of mapped query tree
M = translateGroupGraphPattern(q.where)

aggregate = False
Expand Down Expand Up @@ -679,6 +681,12 @@ def translate(q: CompValue) -> Tuple[CompValue, List[Variable]]:

if not q.projection:
# select *

# Find the first child projection in each branch of the mapped query tree,
# then include the variables it projects out in our projected variables.
for child_projection in _find_first_child_projections(M):
VS |= set(child_projection.PV)

PV = list(VS)
else:
PV = list()
Expand Down Expand Up @@ -731,6 +739,21 @@ def translate(q: CompValue) -> Tuple[CompValue, List[Variable]]:
return M, PV


def _find_first_child_projections(M: CompValue) -> Iterable[CompValue]:
"""
Recursively find the first child instance of a Projection operation in each of
the branches of the query execution plan/tree.
"""

for child_op in M.values():
if isinstance(child_op, CompValue):
if child_op.name == "Project":
yield child_op
else:
for child_projection in _find_first_child_projections(child_op):
yield child_projection


# type error: Missing return statement
def simplify(n: Any) -> Optional[CompValue]: # type: ignore[return]
"""Remove joins to empty BGPs"""
Expand Down
1 change: 0 additions & 1 deletion rdflib/plugins/sparql/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,6 @@ def evalDistinct(

def evalProject(ctx: QueryContext, project: CompValue):
res = evalPart(ctx, project.p)

return (row.project(project.PV) for row in res)


Expand Down
144 changes: 144 additions & 0 deletions test/test_sparql/test_subselect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
from rdflib import RDFS, Graph, Literal, URIRef

_graph_with_label = Graph()
_graph_with_label.add(
(URIRef("http://example.com/something"), RDFS.label, Literal("Some label"))
)


def test_select_star_sub_select():
"""
This tests the fix for a bug which returned no results when using `SELECT *` in the
parent of a sub-select using `SELECT *`.
"""
results = list(
_graph_with_label.query(
"""
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT *
WHERE {
{
SELECT *
WHERE {
[] rdfs:label ?label.
}
}
}
"""
)
)

assert len(results) == 1
assert results[0].asdict() == {"label": Literal("Some label")}


def test_select_star_multiple_sub_select_star():
"""
Ensure that we can define select * in multiple sub-selects and still select * (all)
of the variables out in the parent.
"""
results = list(
_graph_with_label.query(
"""
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT *
WHERE {
{
SELECT *
WHERE {
[] rdfs:label ?label.
}
}
{
SELECT *
WHERE {
[] rdfs:label ?label2.
}
}
}
"""
)
)

assert len(results) == 1
assert results[0].asdict() == {
"label": Literal("Some label"),
"label2": Literal("Some label"),
}


def test_select_star_multiple_sub_select_mixed_projections():
"""
Ensure that we can define select * from one sub-select and define
projected variables on another sub-select and still select * out of the parent.
"""
results = list(
_graph_with_label.query(
"""
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT *
WHERE {
{
SELECT *
WHERE {
[] rdfs:label ?label.
}
}
{
SELECT ?label2
WHERE {
[] rdfs:label ?label2.
}
}
}
"""
)
)

assert len(results) == 1
assert results[0].asdict() == {
"label": Literal("Some label"),
"label2": Literal("Some label"),
}


def test_select_star_multiple_sub_select_defined_projections():
"""
Ensure that we can define select * from multiple sub-selects which define
projected variables.
"""
results = list(
_graph_with_label.query(
"""
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT *
WHERE {
{
SELECT ?label
WHERE {
[] rdfs:label ?label.
}
}
{
SELECT ?label2
WHERE {
[] rdfs:label ?label2.
}
}
}
"""
)
)

assert len(results) == 1
assert results[0].asdict() == {
"label": Literal("Some label"),
"label2": Literal("Some label"),
}

0 comments on commit c5c16df

Please sign in to comment.