Skip to content

Commit

Permalink
opt: don't double count OR selectivity for joins
Browse files Browse the repository at this point in the history
Previously, an `OR` expression with tight constraints would have its
selecitivy applied in `applyFilters` as expected, without incrementing
`numUnappliedConjuncts`. However, joins call into
`selectivityFromOredEquivalencies`, which would then increment
`numUnappliedConjuncts` for that `OR` if the disjuncts weren't all
conjunctions of equalities. This caused an additional factor of `1/3`
(`memo.unknownFilterSelectivity`) to be applied to the join row count
estimate.

This commit modifies `selectivityFromOredEquivalencies` to avoid
incrementing `numUnappliedConjuncts` for `OR` conditions with tight
constraints. This prevents the double-counting behavior. This commit
also removes a few `FiltersItem` copies from loops.

Fixes cockroachdb#88455

Release note: None
  • Loading branch information
DrewKimball committed Sep 23, 2022
1 parent d390148 commit 7d5b64c
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 3 deletions.
8 changes: 6 additions & 2 deletions pkg/sql/opt/memo/statistics_builder.go
Expand Up @@ -4181,7 +4181,11 @@ func (sb *statisticsBuilder) selectivityFromOredEquivalencies(
var conjunctSelectivity props.Selectivity

for f := 0; f < len(h.filters); f++ {
disjunction := h.filters[f]
disjunction := &h.filters[f]
if disjunction.ScalarProps().TightConstraints {
// applyFilters will have already handled this filter.
continue
}
var disjuncts []opt.ScalarExpr
if orExpr, ok := disjunction.Condition.(*OrExpr); !ok {
continue
Expand Down Expand Up @@ -4638,7 +4642,7 @@ func (sb *statisticsBuilder) buildStatsFromCheckConstraints(
filters := *constraints.(*FiltersExpr)
// For each ANDed check constraint...
for i := 0; i < len(filters); i++ {
filter := filters[i]
filter := &filters[i]
// This must be some type of comparison operation, or an OR or AND
// expression. These operations have at least 2 children.
if filter.Condition.ChildCount() < 2 {
Expand Down
55 changes: 55 additions & 0 deletions pkg/sql/opt/memo/testdata/stats/join
Expand Up @@ -2040,3 +2040,58 @@ project
│ └── 82 [type=int]
└── projections
└── '1971-10-24' [as=date:19, type=date]

# Regression test for #88455 - don't double-count selectivity for OR expressions
# with tight constraints in join ON conditions.
exec-ddl
CREATE TABLE t0_88455 (c0 INT);
----

exec-ddl
CREATE TABLE t1_88455 (c0 INT);
----

exec-ddl
ALTER TABLE t0_88455 INJECT STATISTICS '[
{
"columns": [
"c0"
],
"created_at": "2022-08-09 09:00:00.00000",
"distinct_count": 13,
"name": "__auto__",
"null_count": 0,
"row_count": 13
}
]'
----

exec-ddl
ALTER TABLE t1_88455 INJECT STATISTICS '[
{
"columns": [
"c0"
],
"created_at": "2022-08-09 09:00:00.00000",
"distinct_count": 5,
"name": "__auto__",
"null_count": 0,
"row_count": 5
}
]'
----

opt format=show-stats
SELECT * FROM t0_88455 LEFT OUTER JOIN t1_88455 ON t0_88455.c0<1 OR t0_88455.c0>1;
----
left-join (cross)
├── columns: c0:1 c0:5
├── stats: [rows=21.66667]
├── scan t0_88455
│ ├── columns: t0_88455.c0:1
│ └── stats: [rows=13, distinct(1)=13, null(1)=0]
├── scan t1_88455
│ ├── columns: t1_88455.c0:5
│ └── stats: [rows=5]
└── filters
└── (t0_88455.c0:1 < 1) OR (t0_88455.c0:1 > 1) [outer=(1), constraints=(/1: (/NULL - /0] [/2 - ]; tight)]
2 changes: 1 addition & 1 deletion pkg/sql/opt/xform/general_funcs.go
Expand Up @@ -682,7 +682,7 @@ func (c *CustomFuncs) numAllowedValues(
filters := *constraints.(*memo.FiltersExpr)
// For each ANDed check constraint...
for i := 0; i < len(filters); i++ {
filter := filters[i]
filter := &filters[i]
// This must be some type of comparison operation, or an OR or AND
// expression. These operations have at least 2 children.
if filter.Condition.ChildCount() < 2 {
Expand Down

0 comments on commit 7d5b64c

Please sign in to comment.