Skip to content

Commit

Permalink
MDEV-30605: Wrong result while using index for group-by
Browse files Browse the repository at this point in the history
A GROUP BY query which uses "MIN(pk)" and has "pk<>const" in the
WHERE clause would produce wrong result when handled with "Using index
for group-by".  Here "pk" column is the table's primary key.

The problem was introduced by fix for MDEV-23634. It made the range
optimizer to not produce ranges for conditions in form "pk != const".

However, LooseScan code requires that the optimizer is able to
convert the condition on the MIN/MAX column into an equivalent range.
The range is used to locate the row that has the MIN/MAX value.

LooseScan checks this in check_group_min_max_predicates(). This fix
makes the code in that function to take into account that "pk != const"
does not produce a range.
  • Loading branch information
spetrunia committed Apr 18, 2023
1 parent 6c19609 commit be7ef65
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 18 deletions.
12 changes: 12 additions & 0 deletions mysql-test/main/group_min_max.result
Expand Up @@ -4083,5 +4083,17 @@ MIN(pk)
1
DROP TABLE t1, t2;
#
# MDEV-30605 Wrong result while using index for group-by
#
CREATE TABLE t1 (pk INT primary key, a int, key(a)) engine=innodb;
INSERT INTO t1 VALUES (1,-1),(2,8),(3,5),(4,-1),(5,10), (6,-1);
SELECT MIN(pk), a FROM t1 WHERE pk <> 1 GROUP BY a;
MIN(pk) a
4 -1
3 5
2 8
5 10
DROP TABLE t1;
#
# End of 10.5 tests
#
11 changes: 11 additions & 0 deletions mysql-test/main/group_min_max.test
Expand Up @@ -1737,6 +1737,17 @@ SELECT SQL_BUFFER_RESULT MIN(pk) FROM t1, t2;
SELECT MIN(pk) FROM t1, t2;
DROP TABLE t1, t2;

--echo #
--echo # MDEV-30605 Wrong result while using index for group-by
--echo #

CREATE TABLE t1 (pk INT primary key, a int, key(a)) engine=innodb;
INSERT INTO t1 VALUES (1,-1),(2,8),(3,5),(4,-1),(5,10), (6,-1);

SELECT MIN(pk), a FROM t1 WHERE pk <> 1 GROUP BY a;

DROP TABLE t1;

--echo #
--echo # End of 10.5 tests
--echo #
46 changes: 28 additions & 18 deletions sql/opt_range.cc
Expand Up @@ -461,7 +461,7 @@ void print_range_for_non_indexed_field(String *out, Field *field,
static void print_min_range_operator(String *out, const ha_rkey_function flag);
static void print_max_range_operator(String *out, const ha_rkey_function flag);

static bool is_field_an_unique_index(RANGE_OPT_PARAM *param, Field *field);
static bool is_field_an_unique_index(Field *field);

/*
SEL_IMERGE is a list of possible ways to do index merge, i.e. it is
Expand Down Expand Up @@ -7752,8 +7752,13 @@ SEL_TREE *Item_func_ne::get_func_mm_tree(RANGE_OPT_PARAM *param,
If this condition is a "col1<>...", where there is a UNIQUE KEY(col1),
do not construct a SEL_TREE from it. A condition that excludes just one
row in the table is not selective (unless there are only a few rows)

Note: this logic must be in sync with code in
check_group_min_max_predicates(). That function walks an Item* condition
and checks if the range optimizer would produce an equivalent range for
it.
*/
if (is_field_an_unique_index(param, field))
if (param->using_real_indexes && is_field_an_unique_index(field))
DBUG_RETURN(NULL);
DBUG_RETURN(get_ne_mm_tree(param, field, value, value));
}
Expand Down Expand Up @@ -7865,7 +7870,7 @@ SEL_TREE *Item_func_in::get_func_mm_tree(RANGE_OPT_PARAM *param,
- if there are a lot of constants, the overhead of building and
processing enormous range list is not worth it.
*/
if (is_field_an_unique_index(param, field))
if (param->using_real_indexes && is_field_an_unique_index(field))
DBUG_RETURN(0);

/* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval. */
Expand Down Expand Up @@ -8574,24 +8579,18 @@ SEL_TREE *Item_equal::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
In the future we could also add "almost unique" indexes where any value is
present only in a few rows (but necessarily exactly one row)
*/
static bool is_field_an_unique_index(RANGE_OPT_PARAM *param, Field *field)
static bool is_field_an_unique_index(Field *field)
{
DBUG_ENTER("is_field_an_unique_index");

// The check for using_real_indexes is there because of the heuristics
// this function is used for.
if (param->using_real_indexes)
key_map::Iterator it(field->key_start);
uint key_no;
while ((key_no= it++) != key_map::Iterator::BITMAP_END)
{
key_map::Iterator it(field->key_start);
uint key_no;
while ((key_no= it++) != key_map::Iterator::BITMAP_END)
KEY *key_info= &field->table->key_info[key_no];
if (key_info->user_defined_key_parts == 1 &&
(key_info->flags & HA_NOSAME))
{
KEY *key_info= &field->table->key_info[key_no];
if (key_info->user_defined_key_parts == 1 &&
(key_info->flags & HA_NOSAME))
{
DBUG_RETURN(true);
}
DBUG_RETURN(true);
}
}
DBUG_RETURN(false);
Expand Down Expand Up @@ -13475,7 +13474,7 @@ cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
- (C between const_i and const_j)
- C IS NULL
- C IS NOT NULL
- C != const
- C != const (unless C is the primary key)
SA4. If Q has a GROUP BY clause, there are no other aggregate functions
except MIN and MAX. For queries with DISTINCT, aggregate functions
are allowed.
Expand Down Expand Up @@ -14358,6 +14357,17 @@ check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
if (!simple_pred(pred, args, &inv))
DBUG_RETURN(FALSE);

/*
Follow the logic in Item_func_ne::get_func_mm_tree(): condition
in form "tbl.primary_key <> const" is not used to produce intervals.

If the condition doesn't have an equivalent interval, this means we
fail LooseScan's condition SA3. Return FALSE to indicate this.
*/
if (pred_type == Item_func::NE_FUNC &&
is_field_an_unique_index(min_max_arg_item->field))
DBUG_RETURN(FALSE);

if (args[0] && args[1]) // this is a binary function or BETWEEN
{
DBUG_ASSERT(pred->fixed_type_handler());
Expand Down

0 comments on commit be7ef65

Please sign in to comment.