Skip to content

Commit

Permalink
MDEV-20057 Distinct SUM on CROSS JOIN and grouped returns wrong result
Browse files Browse the repository at this point in the history
SELECT DISTINCT did not work with expressions with sum functions.
Distinct was only done on the values stored in the intermediate temporary
tables, which only stored the value of each sum function.

In other words:
SELECT DISTINCT sum(a),sum(b),avg(c) ... worked.
SELECT DISTINCT sum(a),sum(b) > 2,sum(c)+sum(d) would not work.

The later query would do ONLY apply distinct on the sum(a) part.

Reviewer: Sergei Petrunia <sergey@mariadb.com>


This was fixed by extending remove_dup_with_hash_index() and
remove_dup_with_compare() to take into account the columns in the result
list that where not stored in the temporary table.

Note that in many cases the above dup removal functions are not used as
the optimizer may be able to either remove duplicates early or it will
discover that duplicate remove is not needed. The later happens for
example if the group by fields is part of the result.

Other things:
- Backported from 11.0 the change of Sort_param.tmp_buffer from char* to
  String.
- Changed Type_handler::make_sort_key() to take String as a parameter
  instead of Sort_param. This was done to allow make_sort_key() functions
  to be reused by distinct elimination functions.
  This makes Type_handler_string_result::make_sort_key() similar to code
  in 11.0
- Simplied error handling in remove_dup_with_compare() to remove code
  duplication.
  • Loading branch information
montywi committed Feb 17, 2023
1 parent bd0d7ea commit 476b24d
Show file tree
Hide file tree
Showing 6 changed files with 242 additions and 71 deletions.
37 changes: 37 additions & 0 deletions mysql-test/main/distinct.result
Expand Up @@ -1070,3 +1070,40 @@ UNION
1
drop table t1;
End of 5.5 tests
#
# MDEV-20057 Distinct SUM on CROSS JOIN and grouped returns wrong result
#
create table t1 (c int, d int);
insert into t1 values (5, 1), (0, 3);
select distinct sum(distinct 1), sum(t1.d) > 2 from (t1 e join t1) group by t1.c;
sum(distinct 1) sum(t1.d) > 2
1 1
1 0
select distinct sum(distinct 1), sum(t1.d) > 2, t1.c from (t1 e join t1) group by t1.c;
sum(distinct 1) sum(t1.d) > 2 c
1 1 0
1 0 5
insert into t1 values (6,6);
select distinct sum(distinct 1), sum(t1.d) > 5 from (t1 e join t1) group by t1.c;
sum(distinct 1) sum(t1.d) > 5
1 1
1 0
select distinct sum(distinct 1), sum(t1.d) > 5, t1.c from (t1 e join t1) group by t1.c;
sum(distinct 1) sum(t1.d) > 5 c
1 1 0
1 0 5
1 1 6
set @@sort_buffer_size=1024;
insert into t1 select -seq,-seq from seq_1_to_100;
select distinct sum(distinct 1), sum(t1.d) > 2, length(group_concat(t1.d)) > 1000 from (t1 e join t1) group by t1.c having t1.c > -2 ;
sum(distinct 1) sum(t1.d) > 2 length(group_concat(t1.d)) > 1000
1 0 0
1 1 0
select distinct sum(distinct 1), sum(t1.d) > 2, length(group_concat(t1.d)) > 1000,t1.c from (t1 e join t1) group by t1.c having t1.c > -2;
sum(distinct 1) sum(t1.d) > 2 length(group_concat(t1.d)) > 1000 c
1 0 0 -1
1 1 0 0
1 1 0 5
1 1 0 6
drop table t1;
# End of 10.4 tests
23 changes: 23 additions & 0 deletions mysql-test/main/distinct.test
Expand Up @@ -4,6 +4,7 @@
#

--source include/default_optimizer_switch.inc
--source include/have_sequence.inc
--disable_warnings
drop table if exists t1,t2,t3;
--enable_warnings
Expand Down Expand Up @@ -818,3 +819,25 @@ UNION
drop table t1;

--echo End of 5.5 tests

--echo #
--echo # MDEV-20057 Distinct SUM on CROSS JOIN and grouped returns wrong result
--echo #

create table t1 (c int, d int);
insert into t1 values (5, 1), (0, 3);
select distinct sum(distinct 1), sum(t1.d) > 2 from (t1 e join t1) group by t1.c;
select distinct sum(distinct 1), sum(t1.d) > 2, t1.c from (t1 e join t1) group by t1.c;

insert into t1 values (6,6);
select distinct sum(distinct 1), sum(t1.d) > 5 from (t1 e join t1) group by t1.c;
select distinct sum(distinct 1), sum(t1.d) > 5, t1.c from (t1 e join t1) group by t1.c;

# Force usage of remove_dup_with_compare() algorithm
set @@sort_buffer_size=1024;
insert into t1 select -seq,-seq from seq_1_to_100;
select distinct sum(distinct 1), sum(t1.d) > 2, length(group_concat(t1.d)) > 1000 from (t1 e join t1) group by t1.c having t1.c > -2 ;
select distinct sum(distinct 1), sum(t1.d) > 2, length(group_concat(t1.d)) > 1000,t1.c from (t1 e join t1) group by t1.c having t1.c > -2;
drop table t1;

--echo # End of 10.4 tests
42 changes: 20 additions & 22 deletions sql/filesort.cc
Expand Up @@ -159,7 +159,7 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
MYSQL_FILESORT_START(table->s->db.str, table->s->table_name.str);
DEBUG_SYNC(thd, "filesort_start");

if (!(sort= new SORT_INFO))
if (!(sort= new SORT_INFO)) // Note that this is not automatically freed!
return 0;

if (subselect && subselect->filesort_buffer.is_allocated())
Expand All @@ -186,10 +186,6 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
sort->addon_buf= param.addon_buf;
sort->addon_field= param.addon_field;
sort->unpack= unpack_addon_fields;
if (multi_byte_charset &&
!(param.tmp_buffer= (char*) my_malloc(param.sort_length,
MYF(MY_WME | MY_THREAD_SPECIFIC))))
goto err;

if (select && select->quick)
thd->inc_status_sort_range();
Expand Down Expand Up @@ -254,6 +250,9 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
tracker->report_sort_buffer_size(sort->sort_buffer_size());
}

if (param.tmp_buffer.alloc(param.sort_length))
goto err;

if (open_cached_file(&buffpek_pointers,mysql_tmpdir,TEMP_PREFIX,
DISK_BUFFER_SIZE, MYF(MY_WME)))
goto err;
Expand Down Expand Up @@ -337,7 +336,7 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
error= 0;

err:
my_free(param.tmp_buffer);
param.tmp_buffer.free();
if (!subselect || !subselect->is_uncacheable())
{
sort->free_sort_buffer();
Expand Down Expand Up @@ -977,17 +976,15 @@ static inline void store_length(uchar *to, uint length, uint pack_length)
void
Type_handler_string_result::make_sort_key(uchar *to, Item *item,
const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const
String *tmp_buffer) const
{
CHARSET_INFO *cs= item->collation.collation;
bool maybe_null= item->maybe_null;

if (maybe_null)
*to++= 1;
char *tmp_buffer= param->tmp_buffer ? param->tmp_buffer : (char*) to;
String tmp(tmp_buffer, param->tmp_buffer ? param->sort_length :
sort_field->length, cs);
String *res= item->str_result(&tmp);

Binary_string *res= item->str_result(tmp_buffer);
if (!res)
{
if (maybe_null)
Expand Down Expand Up @@ -1015,11 +1012,11 @@ Type_handler_string_result::make_sort_key(uchar *to, Item *item,
size_t tmp_length=
#endif
cs->coll->strnxfrm(cs, to, sort_field->length,
item->max_char_length() *
cs->strxfrm_multiply,
(uchar*) res->ptr(), res->length(),
MY_STRXFRM_PAD_WITH_SPACE |
MY_STRXFRM_PAD_TO_MAXLEN);
item->max_char_length() *
cs->strxfrm_multiply,
(uchar*) res->ptr(), res->length(),
MY_STRXFRM_PAD_WITH_SPACE |
MY_STRXFRM_PAD_TO_MAXLEN);
DBUG_ASSERT(tmp_length == sort_field->length);
}
else
Expand Down Expand Up @@ -1050,7 +1047,7 @@ Type_handler_string_result::make_sort_key(uchar *to, Item *item,
void
Type_handler_int_result::make_sort_key(uchar *to, Item *item,
const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const
String *tmp_buffer) const
{
longlong value= item->val_int_result();
make_sort_key_longlong(to, item->maybe_null, item->null_value,
Expand All @@ -1061,7 +1058,7 @@ Type_handler_int_result::make_sort_key(uchar *to, Item *item,
void
Type_handler_temporal_result::make_sort_key(uchar *to, Item *item,
const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const
String *tmp_buffer) const
{
MYSQL_TIME buf;
// This is a temporal type. No nanoseconds. Rounding mode is not important.
Expand All @@ -1083,7 +1080,7 @@ Type_handler_temporal_result::make_sort_key(uchar *to, Item *item,
void
Type_handler_timestamp_common::make_sort_key(uchar *to, Item *item,
const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const
String *tmp_buffer) const
{
THD *thd= current_thd;
uint binlen= my_timestamp_binary_length(item->decimals);
Expand Down Expand Up @@ -1147,7 +1144,7 @@ Type_handler::make_sort_key_longlong(uchar *to,
void
Type_handler_decimal_result::make_sort_key(uchar *to, Item *item,
const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const
String *tmp_buffer) const
{
my_decimal dec_buf, *dec_val= item->val_decimal_result(&dec_buf);
if (item->maybe_null)
Expand All @@ -1167,7 +1164,7 @@ Type_handler_decimal_result::make_sort_key(uchar *to, Item *item,
void
Type_handler_real_result::make_sort_key(uchar *to, Item *item,
const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const
String *tmp_buffer) const
{
double value= item->val_result();
if (item->maybe_null)
Expand Down Expand Up @@ -1205,7 +1202,8 @@ static void make_sortkey(Sort_param *param, uchar *to, uchar *ref_pos)
else
{ // Item
sort_field->item->type_handler()->make_sort_key(to, sort_field->item,
sort_field, param);
sort_field,
&param->tmp_buffer);
if ((maybe_null= sort_field->item->maybe_null))
to++;
}
Expand Down

0 comments on commit 476b24d

Please sign in to comment.