Skip to content

Commit 86377d0

Browse files
committed
Fixes done while working on MDEV-4119:
Fixed several optimizer issues relatied to GROUP BY: a) Refering to a SELECT column in HAVING sometimes calculated it twice, which caused problems with non determinstic functions b) Removing duplicate fields and constants from GROUP BY was done too late for "using index for group by" optimization to work c) EXPLAIN SELECT ... GROUP BY did wrongly show 'Using filesort' in some cases involving "Using index for group-by" a) was fixed by: - Changed last argument to Item::split_sum_func2() from bool to int to allow more flags - Added flag argument to Item::split_sum_func() to allow on to specify if the item was in the SELECT part - Mark all split_sum_func() calls from SELECT with SPLIT_SUM_SELECT - Changed split_sum_func2() to do nothing if called with an argument that is not a sum function and doesn't include sum functions, if we are not an argument to SELECT. This ensures that in a case like select a*sum(b) as f1 from t1 where a=1 group by c having f1 <= 10; That 'a' in the SELECT part is stored as a reference in the temporary table togeher with sum(b) while the 'a' in having isn't (not needed as 'a' is already a reference to a column in the result) b) was fixed by: - Added an extra remove_const() pass for GROUP BY arguments before make_join_statistics() in case of one table SELECT. This allowes get_best_group_min_max() to optimize things better. c) was fixed by: - Added test for group by optimization in JOIN::exec_inner for select->quick->get_type() == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX item.cc: - Simplifed Item::split_sum_func2() - Split test to make them faster and easier to read - Changed last argument to Item::split_sum_func2() from bool to int to allow more flags - Added flag argument to Item::split_sum_func() to allow on to specify if the item was in the SELECT part - Changed split_sum_func2() to do nothing if called with an argument that is not a sum function and doesn't include sum functions, if we are not an argument to SELECT. opt_range.cc: - Simplified get_best_group_min_max() by calcuating first how many group_by elements. - Use join->group instead of join->group_list to test if group by, as join->group_list may be NULL if everything was optimized away. sql_select.cc: - Added an extra remove_const() pass for GROUP BY arguments before make_join_statistics() in case of one table SELECT. - Use group instead of group_list to test if group by, as group_list may be NULL if everything was optimized away. - Moved printing of "Error in remove_const" to remove_const() instead of having it in caller. - Simplified some if tests by re-ordering code. - update_depend_map_for_order() and remove_const() fixed to handle the case where make_join_statistics() has not yet been called (join->join_tab is 0 in this case)
1 parent 79af0b3 commit 86377d0

File tree

14 files changed

+329
-136
lines changed

14 files changed

+329
-136
lines changed

mysql-test/r/group_by.result

Lines changed: 85 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,7 @@ NULL 9
522522
3
523523
b 1
524524
drop table t1;
525+
set big_tables=0;
525526
create table t1 (a int not null, b int not null);
526527
insert into t1 values (1,1),(1,2),(3,1),(3,2),(2,2),(2,1);
527528
create table t2 (a int not null, b int not null, key(a));
@@ -659,7 +660,10 @@ insert into t1 (a,b) values (1,2),(1,3),(2,5);
659660
select a, 0.1*0+1 r2, sum(1) r1 from t1 where a = 1 group by a having r1>1 and r2=1;
660661
a r2 r1
661662
1 1.0 2
662-
select a, round(rand(100)*10) r2, sum(1) r1 from t1 where a = 1 group by a having r1>1 and r2<=2;
663+
select a, round(rand(100)*10) r2, sum(1) r1 from t1 where a = 1 group by a having r1>1 and r2<=2;
664+
a r2 r1
665+
1 2 2
666+
select a, round(rand(100)*10) r2, sum(1) r1 from t1 group by a having r1>1 and r2<=2;
663667
a r2 r1
664668
1 2 2
665669
select a,sum(b) from t1 where a=1 group by c;
@@ -668,6 +672,12 @@ a sum(b)
668672
select a*sum(b) from t1 where a=1 group by c;
669673
a*sum(b)
670674
5
675+
select a*sum(b) as f1 from t1 where a=1 group by c having f1 <= 10;
676+
f1
677+
5
678+
select a,a*sum(b) as f1 from t1 where a=1 group by c having a*sum(b)+0 <= 10;
679+
a f1
680+
1 5
671681
select sum(a)*sum(b) from t1 where a=1 group by c;
672682
sum(a)*sum(b)
673683
10
@@ -1988,12 +1998,12 @@ SHOW SESSION STATUS LIKE 'Sort_scan%';
19881998
Variable_name Value
19891999
Sort_scan 0
19902000
EXPLAIN SELECT SQL_BIG_RESULT col1 AS field1, col1 AS field2
1991-
FROM t1 GROUP BY field1, field2;;
2001+
FROM t1 GROUP BY field1, field2;
19922002
id select_type table type possible_keys key key_len ref rows Extra
19932003
1 SIMPLE t1 index NULL idx 5 NULL 20 Using index; Using filesort
19942004
FLUSH STATUS;
19952005
SELECT SQL_BIG_RESULT col1 AS field1, col1 AS field2
1996-
FROM t1 GROUP BY field1, field2;;
2006+
FROM t1 GROUP BY field1, field2;
19972007
field1 field2
19982008
1 1
19992009
2 2
@@ -2082,6 +2092,58 @@ f1 f2
20822092
19 19
20832093
20 20
20842094
explain
2095+
select col1 f1, col1 f2 from t1 group by f1;
2096+
id select_type table type possible_keys key key_len ref rows Extra
2097+
1 SIMPLE t1 index NULL idx 5 NULL 20 Using index
2098+
select col1 f1, col1 f2 from t1 group by f1;
2099+
f1 f2
2100+
1 1
2101+
2 2
2102+
3 3
2103+
4 4
2104+
5 5
2105+
6 6
2106+
7 7
2107+
8 8
2108+
9 9
2109+
10 10
2110+
11 11
2111+
12 12
2112+
13 13
2113+
14 14
2114+
15 15
2115+
16 16
2116+
17 17
2117+
18 18
2118+
19 19
2119+
20 20
2120+
explain
2121+
select col1 f1, col1 f2 from t1 group by f1, f2;
2122+
id select_type table type possible_keys key key_len ref rows Extra
2123+
1 SIMPLE t1 index NULL idx 5 NULL 20 Using index
2124+
select col1 f1, col1 f2 from t1 group by f1, f2;
2125+
f1 f2
2126+
1 1
2127+
2 2
2128+
3 3
2129+
4 4
2130+
5 5
2131+
6 6
2132+
7 7
2133+
8 8
2134+
9 9
2135+
10 10
2136+
11 11
2137+
12 12
2138+
13 13
2139+
14 14
2140+
15 15
2141+
16 16
2142+
17 17
2143+
18 18
2144+
19 19
2145+
20 20
2146+
explain
20852147
select col1 f1, col1 f2 from t1 group by f2 order by f2, f1;
20862148
id select_type table type possible_keys key key_len ref rows Extra
20872149
1 SIMPLE t1 index NULL idx 5 NULL 20 Using index
@@ -2141,6 +2203,22 @@ INSERT INTO t2(col1, col2) VALUES
21412203
(1,20),(2,19),(3,18),(4,17),(5,16),(6,15),(7,14),(8,13),(9,12),(10,11),
21422204
(11,10),(12,9),(13,8),(14,7),(15,6),(16,5),(17,4),(18,3),(19,2),(20,1);
21432205
explain
2206+
select col1 f1, col2 f2, col1 f3 from t2 group by f1;
2207+
id select_type table type possible_keys key key_len ref rows Extra
2208+
1 SIMPLE t2 range NULL idx 5 NULL 7 Using index for group-by
2209+
explain
2210+
select SQL_BIG_RESULT col1 f1, col2 f2, col1 f3 from t2 group by f1;
2211+
id select_type table type possible_keys key key_len ref rows Extra
2212+
1 SIMPLE t2 range NULL idx 5 NULL 7 Using index for group-by
2213+
explain
2214+
select col1 f1, col2 f2, col1 f3 from t2 group by f1, f2;
2215+
id select_type table type possible_keys key key_len ref rows Extra
2216+
1 SIMPLE t2 index NULL idx 10 NULL 20 Using index
2217+
explain
2218+
select col1 f1, col1 f2 from t2 group by f1, 1+1;
2219+
id select_type table type possible_keys key key_len ref rows Extra
2220+
1 SIMPLE t2 range NULL idx 5 NULL 7 Using index for group-by
2221+
explain
21442222
select col1 f1, col2 f2, col1 f3 from t2 group by f1, f2, f3+0;
21452223
id select_type table type possible_keys key key_len ref rows Extra
21462224
1 SIMPLE t2 index NULL idx 10 NULL 20 Using index; Using temporary; Using filesort
@@ -2167,6 +2245,10 @@ f1 f2 f3
21672245
19 2 19
21682246
20 1 20
21692247
explain
2248+
select col1 f1, col2 f2, col1 f3 from t2 order by f1,f2;
2249+
id select_type table type possible_keys key key_len ref rows Extra
2250+
1 SIMPLE t2 index NULL idx 10 NULL 20 Using index
2251+
explain
21702252
select col1 f1, col2 f2, col1 f3 from t2 order by f1, f2, f3+0;
21712253
id select_type table type possible_keys key key_len ref rows Extra
21722254
1 SIMPLE t2 index NULL idx 10 NULL 20 Using index; Using filesort

mysql-test/t/group_by.test

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,7 @@ select a,count(*) from t1 group by a;
394394
set big_tables=1;
395395
select a,count(*) from t1 group by a;
396396
drop table t1;
397+
set big_tables=0;
397398

398399
#
399400
# Test of GROUP BY ... ORDER BY NULL optimization
@@ -485,9 +486,12 @@ create table t1 (a integer, b integer, c integer);
485486
insert into t1 (a,b) values (1,2),(1,3),(2,5);
486487
select a, 0.1*0+1 r2, sum(1) r1 from t1 where a = 1 group by a having r1>1 and r2=1;
487488
# rand(100)*10 will be < 2 only for the first row (of 6)
488-
select a, round(rand(100)*10) r2, sum(1) r1 from t1 where a = 1 group by a having r1>1 and r2<=2;
489+
select a, round(rand(100)*10) r2, sum(1) r1 from t1 where a = 1 group by a having r1>1 and r2<=2;
490+
select a, round(rand(100)*10) r2, sum(1) r1 from t1 group by a having r1>1 and r2<=2;
489491
select a,sum(b) from t1 where a=1 group by c;
490492
select a*sum(b) from t1 where a=1 group by c;
493+
select a*sum(b) as f1 from t1 where a=1 group by c having f1 <= 10;
494+
select a,a*sum(b) as f1 from t1 where a=1 group by c having a*sum(b)+0 <= 10;
491495
select sum(a)*sum(b) from t1 where a=1 group by c;
492496
select a,sum(b) from t1 where a=1 group by c having a=1;
493497
select a as d,sum(b) from t1 where a=1 group by c having d=1;
@@ -1346,9 +1350,9 @@ let $query=SELECT SQL_BIG_RESULT col1 AS field1, col1 AS field2
13461350
FROM t1 GROUP BY field1, field2;
13471351

13481352
# Needs to be range to exercise bug
1349-
--eval EXPLAIN $query;
1353+
--eval EXPLAIN $query
13501354
FLUSH STATUS;
1351-
--eval $query;
1355+
--eval $query
13521356
SHOW SESSION STATUS LIKE 'Sort_scan%';
13531357

13541358
CREATE VIEW v1 AS SELECT * FROM t1;
@@ -1368,6 +1372,14 @@ explain
13681372
select col1 f1, col1 f2 from t1 order by f2, f1+0;
13691373
select col1 f1, col1 f2 from t1 order by f2, f1+0;
13701374

1375+
explain
1376+
select col1 f1, col1 f2 from t1 group by f1;
1377+
select col1 f1, col1 f2 from t1 group by f1;
1378+
1379+
explain
1380+
select col1 f1, col1 f2 from t1 group by f1, f2;
1381+
select col1 f1, col1 f2 from t1 group by f1, f2;
1382+
13711383
explain
13721384
select col1 f1, col1 f2 from t1 group by f2 order by f2, f1;
13731385
select col1 f1, col1 f2 from t1 group by f2 order by f2, f1;
@@ -1376,6 +1388,7 @@ explain
13761388
select col1 f1, col1 f2 from t1 group by f1, f2 order by f2, f1;
13771389
select col1 f1, col1 f2 from t1 group by f1, f2 order by f2, f1;
13781390

1391+
13791392
CREATE TABLE t2(
13801393
col1 int,
13811394
col2 int,
@@ -1385,10 +1398,21 @@ INSERT INTO t2(col1, col2) VALUES
13851398
(1,20),(2,19),(3,18),(4,17),(5,16),(6,15),(7,14),(8,13),(9,12),(10,11),
13861399
(11,10),(12,9),(13,8),(14,7),(15,6),(16,5),(17,4),(18,3),(19,2),(20,1);
13871400

1401+
explain
1402+
select col1 f1, col2 f2, col1 f3 from t2 group by f1;
1403+
explain
1404+
select SQL_BIG_RESULT col1 f1, col2 f2, col1 f3 from t2 group by f1;
1405+
explain
1406+
select col1 f1, col2 f2, col1 f3 from t2 group by f1, f2;
1407+
explain
1408+
select col1 f1, col1 f2 from t2 group by f1, 1+1;
1409+
13881410
explain
13891411
select col1 f1, col2 f2, col1 f3 from t2 group by f1, f2, f3+0;
13901412
select col1 f1, col2 f2, col1 f3 from t2 group by f1, f2, f3+0;
13911413

1414+
explain
1415+
select col1 f1, col2 f2, col1 f3 from t2 order by f1,f2;
13921416
explain
13931417
select col1 f1, col2 f2, col1 f3 from t2 order by f1, f2, f3+0;
13941418
select col1 f1, col2 f2, col1 f3 from t2 order by f1, f2, f3+0;

sql/item.cc

Lines changed: 75 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1705,69 +1705,98 @@ class Item_aggregate_ref : public Item_ref
17051705
17061706
@param thd Thread handler
17071707
@param ref_pointer_array Pointer to array of reference fields
1708-
@param fields All fields in select
1708+
@param fields All fields in select
17091709
@param ref Pointer to item
1710-
@param skip_registered <=> function be must skipped for registered
1711-
SUM items
1710+
@param split_flags Zero or more of the following flags
1711+
SPLIT_FUNC_SKIP_REGISTERED:
1712+
Function be must skipped for registered SUM
1713+
SUM items
1714+
SPLIT_FUNC_SELECT
1715+
We are called on the select level and have to
1716+
register items operated on sum function
17121717
17131718
@note
1714-
This is from split_sum_func2() for items that should be split
1715-
17161719
All found SUM items are added FIRST in the fields list and
17171720
we replace the item with a reference.
17181721
1722+
If this is an item in the SELECT list then we also have to split out
1723+
all arguments to functions used together with the sum function.
1724+
For example in case of SELECT A*sum(B) we have to split out both
1725+
A and sum(B).
1726+
This is not needed for ORDER BY, GROUP BY or HAVING as all references
1727+
to items in the select list are already of type REF
1728+
17191729
thd->fatal_error() may be called if we are out of memory
17201730
*/
17211731

17221732
void Item::split_sum_func2(THD *thd, Item **ref_pointer_array,
17231733
List<Item> &fields, Item **ref,
1724-
bool skip_registered)
1734+
uint split_flags)
17251735
{
1726-
/* An item of type Item_sum is registered <=> ref_by != 0 */
1727-
if (type() == SUM_FUNC_ITEM && skip_registered &&
1728-
((Item_sum *) this)->ref_by)
1729-
return;
1730-
if ((type() != SUM_FUNC_ITEM && with_sum_func) ||
1731-
(type() == FUNC_ITEM &&
1732-
(((Item_func *) this)->functype() == Item_func::ISNOTNULLTEST_FUNC ||
1733-
((Item_func *) this)->functype() == Item_func::TRIG_COND_FUNC)))
1736+
if (unlikely(type() == SUM_FUNC_ITEM))
17341737
{
1735-
/* Will split complicated items and ignore simple ones */
1736-
split_sum_func(thd, ref_pointer_array, fields);
1738+
/* An item of type Item_sum is registered if ref_by != 0 */
1739+
if ((split_flags & SPLIT_SUM_SKIP_REGISTERED) &&
1740+
((Item_sum *) this)->ref_by)
1741+
return;
17371742
}
1738-
else if ((type() == SUM_FUNC_ITEM || (used_tables() & ~PARAM_TABLE_BIT)) &&
1739-
type() != SUBSELECT_ITEM &&
1740-
(type() != REF_ITEM ||
1741-
((Item_ref*)this)->ref_type() == Item_ref::VIEW_REF))
1743+
else
17421744
{
1743-
/*
1744-
Replace item with a reference so that we can easily calculate
1745-
it (in case of sum functions) or copy it (in case of fields)
1746-
1747-
The test above is to ensure we don't do a reference for things
1748-
that are constants (PARAM_TABLE_BIT is in effect a constant)
1749-
or already referenced (for example an item in HAVING)
1750-
Exception is Item_direct_view_ref which we need to convert to
1751-
Item_ref to allow fields from view being stored in tmp table.
1752-
*/
1753-
Item_aggregate_ref *item_ref;
1754-
uint el= fields.elements;
1755-
/*
1756-
If this is an item_ref, get the original item
1757-
This is a safety measure if this is called for things that is
1758-
already a reference.
1759-
*/
1760-
Item *real_itm= real_item();
1745+
/* Not a SUM() function */
1746+
if (unlikely((!with_sum_func && !(split_flags & SPLIT_SUM_SELECT))))
1747+
{
1748+
/*
1749+
This is not a SUM function and there are no SUM functions inside.
1750+
Nothing more to do.
1751+
*/
1752+
return;
1753+
}
1754+
if (likely(with_sum_func ||
1755+
(type() == FUNC_ITEM &&
1756+
(((Item_func *) this)->functype() ==
1757+
Item_func::ISNOTNULLTEST_FUNC ||
1758+
((Item_func *) this)->functype() ==
1759+
Item_func::TRIG_COND_FUNC))))
1760+
{
1761+
/* Will call split_sum_func2() for all items */
1762+
split_sum_func(thd, ref_pointer_array, fields, split_flags);
1763+
return;
1764+
}
17611765

1762-
ref_pointer_array[el]= real_itm;
1763-
if (!(item_ref= new Item_aggregate_ref(&thd->lex->current_select->context,
1764-
ref_pointer_array + el, 0, name)))
1765-
return; // fatal_error is set
1766-
if (type() == SUM_FUNC_ITEM)
1767-
item_ref->depended_from= ((Item_sum *) this)->depended_from();
1768-
fields.push_front(real_itm);
1769-
thd->change_item_tree(ref, item_ref);
1766+
if (unlikely((!(used_tables() & ~PARAM_TABLE_BIT) ||
1767+
type() == SUBSELECT_ITEM ||
1768+
(type() == REF_ITEM &&
1769+
((Item_ref*)this)->ref_type() != Item_ref::VIEW_REF))))
1770+
return;
17701771
}
1772+
1773+
/*
1774+
Replace item with a reference so that we can easily calculate
1775+
it (in case of sum functions) or copy it (in case of fields)
1776+
1777+
The test above is to ensure we don't do a reference for things
1778+
that are constants (PARAM_TABLE_BIT is in effect a constant)
1779+
or already referenced (for example an item in HAVING)
1780+
Exception is Item_direct_view_ref which we need to convert to
1781+
Item_ref to allow fields from view being stored in tmp table.
1782+
*/
1783+
Item_aggregate_ref *item_ref;
1784+
uint el= fields.elements;
1785+
/*
1786+
If this is an item_ref, get the original item
1787+
This is a safety measure if this is called for things that is
1788+
already a reference.
1789+
*/
1790+
Item *real_itm= real_item();
1791+
1792+
ref_pointer_array[el]= real_itm;
1793+
if (!(item_ref= new Item_aggregate_ref(&thd->lex->current_select->context,
1794+
ref_pointer_array + el, 0, name)))
1795+
return; // fatal_error is set
1796+
if (type() == SUM_FUNC_ITEM)
1797+
item_ref->depended_from= ((Item_sum *) this)->depended_from();
1798+
fields.push_front(real_itm);
1799+
thd->change_item_tree(ref, item_ref);
17711800
}
17721801

17731802

sql/item.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ char_to_byte_length_safe(uint32 char_length_arg, uint32 mbmaxlen_arg)
7474
}
7575

7676

77+
/* Bits for the split_sum_func() function */
78+
#define SPLIT_SUM_SKIP_REGISTERED 1 /* Skip registered funcs */
79+
#define SPLIT_SUM_SELECT 2 /* SELECT item; Split all parts */
80+
7781
/*
7882
"Declared Type Collation"
7983
A combination of collation and its derivation.
@@ -1169,10 +1173,10 @@ class Item: public Type_std_attributes
11691173
return false;
11701174
}
11711175
virtual void split_sum_func(THD *thd, Item **ref_pointer_array,
1172-
List<Item> &fields) {}
1176+
List<Item> &fields, uint flags) {}
11731177
/* Called for items that really have to be split */
11741178
void split_sum_func2(THD *thd, Item **ref_pointer_array, List<Item> &fields,
1175-
Item **ref, bool skip_registered);
1179+
Item **ref, uint flags);
11761180
virtual bool get_date(MYSQL_TIME *ltime, ulonglong fuzzydate);
11771181
bool get_time(MYSQL_TIME *ltime)
11781182
{ return get_date(ltime, TIME_TIME_ONLY | TIME_INVALID_DATES); }

0 commit comments

Comments
 (0)