Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parquet: fix crash when calling SetIgnoredFields() after SetAttributeFilter() (fixes qgis/QGIS#53301) #7882

Merged
merged 1 commit into from
Jun 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions autotest/ogr/ogr_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -1339,6 +1339,49 @@ def test_ogr_parquet_attribute_filter(filter):
assert lyr.GetFeatureCount() == ref_fc


def test_ogr_parquet_attribute_filter_and_then_ignored_fields():

ds = ogr.Open("data/parquet/test.parquet")
lyr = ds.GetLayer(0)

assert lyr.SetAttributeFilter("string = 'd'") == ogr.OGRERR_NONE

ignored_fields = []
lyr_defn = lyr.GetLayerDefn()
for i in range(lyr_defn.GetFieldCount()):
name = lyr_defn.GetFieldDefn(i).GetName()
if name != "string":
ignored_fields.append(name)
assert lyr.SetIgnoredFields(ignored_fields) == ogr.OGRERR_NONE

assert lyr.GetFeatureCount() == 1

with pytest.raises(
Exception,
match=r"Constraint on field string cannot be applied due to it being ignored",
):
lyr.SetIgnoredFields(["string"])
assert lyr.GetFeatureCount() == 0


def test_ogr_parquet_ignored_fields_and_then_attribute_filter():

ds = ogr.Open("data/parquet/test.parquet")
lyr = ds.GetLayer(0)

ignored_fields = []
lyr_defn = lyr.GetLayerDefn()
for i in range(lyr_defn.GetFieldCount()):
name = lyr_defn.GetFieldDefn(i).GetName()
if name != "string":
ignored_fields.append(name)
assert lyr.SetIgnoredFields(ignored_fields) == ogr.OGRERR_NONE

assert lyr.SetAttributeFilter("string = 'd'") == ogr.OGRERR_NONE

assert lyr.GetFeatureCount() == 1


def test_ogr_parquet_attribute_filter_and_spatial_filter():

filter = "int8 != 0"
Expand Down
9 changes: 6 additions & 3 deletions ogr/ogrsf_frmts/arrow_common/ogr_arrow.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ class OGRArrowLayer CPL_NON_FINAL
Real,
String,
};
int iField{};
int iArrayIdx{};
int nOperation{};
int iField = -1; // index to a OGRFeatureDefn OGRField
int iArrayIdx = -1; // index to m_poBatchColumns
int nOperation = -1; // SWQ_xxxx
Type eType{};
OGRField sValue{};
std::string osValue{};
Expand Down Expand Up @@ -179,6 +179,9 @@ class OGRArrowLayer CPL_NON_FINAL
m_poBatchColumns = m_poBatch->columns();
}

// Refreshes Constraint.iArrayIdx from iField. To be called by SetIgnoredFields()
void ComputeConstraintsArrayIdx();

virtual bool GetFastExtent(int iGeomField, OGREnvelope *psExtent) const;
static OGRErr GetExtentFromMetadata(const CPLJSONObject &oJSONDef,
OGREnvelope *psExtent);
Expand Down
35 changes: 29 additions & 6 deletions ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2156,28 +2156,42 @@ static bool FillTargetValueFromSrcExpr(const OGRFieldDefn *poFieldDefn,
}

/***********************************************************************/
/* ExploreExprNode() */
/* ComputeConstraintsArrayIdx() */
/***********************************************************************/

inline void OGRArrowLayer::ExploreExprNode(const swq_expr_node *poNode)
inline void OGRArrowLayer::ComputeConstraintsArrayIdx()
{
const auto AddConstraint = [this](Constraint &constraint)
for (auto &constraint : m_asAttributeFilterConstraints)
{
if (m_bIgnoredFields)
{
constraint.iArrayIdx =
m_anMapFieldIndexToArrayIndex[constraint.iField];
if (constraint.iArrayIdx < 0)
return;
{
CPLError(CE_Failure, CPLE_AppDefined,
"Constraint on field %s cannot be applied due to "
"it being ignored",
m_poFeatureDefn->GetFieldDefn(constraint.iField)
->GetNameRef());
}
}
else
{
constraint.iArrayIdx =
m_anMapFieldIndexToArrowColumn[constraint.iField][0];
}
}
}

m_asAttributeFilterConstraints.emplace_back(constraint);
};
/***********************************************************************/
/* ExploreExprNode() */
/***********************************************************************/

inline void OGRArrowLayer::ExploreExprNode(const swq_expr_node *poNode)
{
const auto AddConstraint = [this](Constraint &constraint)
{ m_asAttributeFilterConstraints.emplace_back(constraint); };

if (poNode->eNodeType == SNT_OPERATION && poNode->nOperation == SWQ_AND &&
poNode->nSubExprCount == 2)
Expand Down Expand Up @@ -2299,6 +2313,7 @@ inline OGRErr OGRArrowLayer::SetAttributeFilter(const char *pszFilter)
static_cast<swq_expr_node *>(m_poAttrQuery->GetSWQExpr());
poNode->ReplaceBetweenByGEAndLERecurse();
ExploreExprNode(poNode);
ComputeConstraintsArrayIdx();
}
}

Expand Down Expand Up @@ -2445,6 +2460,14 @@ inline bool OGRArrowLayer::SkipToNextFeatureDueToAttributeFilter() const
{
for (const auto &constraint : m_asAttributeFilterConstraints)
{
if (constraint.iArrayIdx < 0)
{
// can happen if ignoring a field that is needed by the
// attribute filter. ComputeConstraintsArrayIdx() will have
// warned about that
continue;
}

const arrow::Array *array =
m_poBatchColumns[constraint.iArrayIdx].get();

Expand Down
2 changes: 2 additions & 0 deletions ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1209,6 +1209,8 @@ OGRErr OGRParquetLayer::SetIgnoredFields(const char **papszFields)
}
}

ComputeConstraintsArrayIdx();

// Full invalidation
m_iRecordBatch = -1;
m_bSingleBatch = false;
Expand Down