-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
MDEV-6080: Allowing storage engine to shortcut group by queries
This task is to allow storage engines that can execute GROUP BY or summary queries efficiently to intercept a full query or sub query from MariaDB and deliver the result either to the client or to a temporary table for further processing. - Added code in sql_select.cc to intercept GROUP BY queries. Creation of group_by_handler is done after all optimizations to allow storage engine to benefit of an optimized WHERE clause and suggested indexes to use. - Added group by handler to sequence engine and a group_by test suite as a way to test the new interface. - Intercept EXPLAIN with a message "Storage engine handles GROUP BY" libmysqld/CMakeLists.txt: Added new group_by_handler files sql/CMakeLists.txt: Added new group_by_handler files sql/group_by_handler.cc: Implementation of group_by_handler functions sql/group_by_handler.h: Definition of group_by_handler class sql/handler.h: Added handlerton function to create a group_by_handler, if the storage engine can intercept the query. sql/item_cmpfunc.cc: Allow one to evaluate item_equal any time. sql/sql_select.cc: Added code to intercept GROUP BY queries - If all tables are from the same storage engine and the query is using sum functions, call create_group_by() to check if the storage engine can intercept the query. - If yes: - create a temporary table to hold a GROUP_BY row or result - In do_select() intercept normal query execution by instead calling the group_by_handler to get the result - Intercept EXPLAIN sql/sql_select.h: Added handling of group_by_handler Added caching of the original join tab (needed for cleanup after group_by handler) storage/sequence/mysql-test/sequence/group_by.result: Test group_by_handler interface storage/sequence/mysql-test/sequence/group_by.test: Test group_by_handler interface storage/sequence/sequence.cc: Added simple group_by_engine for handling COUNT(*) and SUM(primary_key). This was done as a test of the group_by_handler interface
- Loading branch information
Showing
10 changed files
with
761 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
/* | ||
Copyright (c) 2014, SkySQL Ab & MariaDB Foundation | ||
This program is free software; you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
the Free Software Foundation; version 2 of the License. | ||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU General Public License for more details. | ||
You should have received a copy of the GNU General Public License | ||
along with this program; if not, write to the Free Software | ||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ | ||
|
||
/* | ||
This file implements the group_by_handler code. This interface | ||
can be used by storage handlers that can intercept summary or GROUP | ||
BY queries from MariaDB and itself return the result to the user or | ||
upper level. | ||
*/ | ||
|
||
#ifdef USE_PRAGMA_IMPLEMENTATION | ||
#pragma implementation // gcc: Class implementation | ||
#endif | ||
|
||
#include "sql_priv.h" | ||
#include "sql_select.h" | ||
|
||
/* | ||
Same return values as do_select(); | ||
@retval | ||
0 if ok | ||
@retval | ||
1 if error is sent | ||
@retval | ||
-1 if error should be sent | ||
*/ | ||
|
||
int group_by_handler::execute(JOIN *join) | ||
{ | ||
int err; | ||
ha_rows max_limit; | ||
ha_rows *reset_limit= 0; | ||
Item **reset_item= 0; | ||
DBUG_ENTER("group_by_handler"); | ||
|
||
if ((err= init_scan())) | ||
goto error; | ||
|
||
if (store_data_in_temp_table) | ||
{ | ||
max_limit= join->tmp_table_param.end_write_records; | ||
reset_limit= &join->unit->select_limit_cnt; | ||
} | ||
else | ||
{ | ||
max_limit= join->unit->select_limit_cnt; | ||
if (join->unit->fake_select_lex) | ||
reset_item= &join->unit->fake_select_lex->select_limit; | ||
} | ||
|
||
while (!(err= next_row())) | ||
{ | ||
if (thd->check_killed()) | ||
{ | ||
thd->send_kill_message(); | ||
(void) end_scan(); | ||
DBUG_RETURN(-1); | ||
} | ||
|
||
/* Check if we can accept the row */ | ||
if (!having || having->val_bool()) | ||
{ | ||
if (store_data_in_temp_table) | ||
{ | ||
if ((err= table->file->ha_write_tmp_row(table->record[0]))) | ||
{ | ||
bool is_duplicate; | ||
if (!table->file->is_fatal_error(err, HA_CHECK_DUP)) | ||
continue; // Distinct elimination | ||
|
||
if (create_internal_tmp_table_from_heap(thd, table, | ||
join->tmp_table_param. | ||
start_recinfo, | ||
&join->tmp_table_param. | ||
recinfo, | ||
err, 1, &is_duplicate)) | ||
DBUG_RETURN(1); | ||
if (is_duplicate) | ||
continue; | ||
} | ||
} | ||
else | ||
{ | ||
if (join->do_send_rows) | ||
{ | ||
int error; | ||
/* result < 0 if row was not accepted and should not be counted */ | ||
if ((error= join->result->send_data(*join->fields))) | ||
{ | ||
(void) end_scan(); | ||
DBUG_RETURN(error < 0 ? 0 : -1); | ||
} | ||
} | ||
} | ||
|
||
/* limit handling */ | ||
if (++join->send_records >= max_limit && join->do_send_rows) | ||
{ | ||
if (!(join->select_options & OPTION_FOUND_ROWS)) | ||
break; // LIMIT reached | ||
join->do_send_rows= 0; // Calculate FOUND_ROWS() | ||
if (reset_limit) | ||
*reset_limit= HA_POS_ERROR; | ||
if (reset_item) | ||
*reset_item= 0; | ||
} | ||
} | ||
} | ||
if (err != 0 && err != HA_ERR_END_OF_FILE) | ||
goto error; | ||
|
||
if ((err= end_scan())) | ||
goto error_2; | ||
if (!store_data_in_temp_table && join->result->send_eof()) | ||
DBUG_RETURN(1); // Don't send error to client | ||
|
||
DBUG_RETURN(0); | ||
|
||
error: | ||
(void) end_scan(); | ||
error_2: | ||
print_error(err, MYF(0)); | ||
DBUG_RETURN(-1); // Error not sent to client | ||
} | ||
|
||
|
||
void group_by_handler::print_error(int error, myf errflag) | ||
{ | ||
my_error(ER_GET_ERRNO, MYF(0), error, hton_name(ht)->str); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
/* | ||
Copyright (c) 2014, SkySQL Ab & MariaDB Foundation | ||
This program is free software; you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
the Free Software Foundation; version 2 of the License. | ||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU General Public License for more details. | ||
You should have received a copy of the GNU General Public License | ||
along with this program; if not, write to the Free Software | ||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ | ||
|
||
/* | ||
This file implements the group_by_handler interface. This interface | ||
can be used by storage handlers that can intercept summary or GROUP | ||
BY queries from MariaDB and itself return the result to the user or | ||
upper level. | ||
Both main and sub queries are supported. Here are some examples of what the | ||
storage engine could intersept: | ||
SELECT count(*) FROM t1; | ||
SELECT a,count(*) FROM t1 group by a; | ||
SELECT a,count(*) as sum FROM t1 where b > 10 group by a, order by sum; | ||
SELECT a,count(*) FROM t1,t2; | ||
SELECT a, (select sum(*) from t2 where t1.a=t2.a) from t2; | ||
See https://mariadb.atlassian.net/browse/MDEV-6080 for more information. | ||
*/ | ||
|
||
class JOIN; | ||
|
||
class group_by_handler | ||
{ | ||
public: | ||
/* Arguments for group_by_handler, for usage later */ | ||
THD *thd; | ||
SELECT_LEX *select_lex; | ||
List<Item> *fields; | ||
TABLE_LIST *table_list; | ||
ORDER *group_by, *order_by; | ||
Item *where, *having; | ||
handlerton *ht; /* storage engine of this handler */ | ||
|
||
/* | ||
Bit's of things the storage engine can do for this query. | ||
Should be initialized on object creation. | ||
*/ | ||
/* Temporary table where all results should be stored in record[0] */ | ||
TABLE *table; | ||
|
||
bool store_data_in_temp_table; /* Set by mariadb */ | ||
|
||
group_by_handler(THD *thd_arg, SELECT_LEX *select_lex_arg, | ||
List<Item> *fields_arg, | ||
TABLE_LIST *table_list_arg, ORDER *group_by_arg, | ||
ORDER *order_by_arg, Item *where_arg, | ||
Item *having_arg, handlerton *ht_arg) | ||
: thd(thd_arg), select_lex(select_lex_arg), fields(fields_arg), | ||
table_list(table_list_arg), group_by(group_by_arg), | ||
order_by(order_by_arg), where(where_arg), having(having_arg), | ||
ht(ht_arg), table(0), store_data_in_temp_table(0) | ||
{} | ||
virtual ~group_by_handler() {} | ||
|
||
/* | ||
Store pointer to temporary table and objects modified to point to | ||
the temporary table. This will happen during the optimize phase. | ||
We provide new 'having' and 'order_by' elements here. The differ from the | ||
original ones in that these are modified to point to fields in the | ||
temporary table 'table'. | ||
Return 1 if the storage handler cannot handle the GROUP BY after all, | ||
in which case we have to give an error to the end user for the query. | ||
This is becasue we can't revert back the old having and order_by elements. | ||
*/ | ||
|
||
virtual bool init(TABLE *temporary_table, Item *having_arg, | ||
ORDER *order_by_arg) | ||
{ | ||
table= temporary_table; | ||
having= having_arg; | ||
order_by= order_by_arg; | ||
return 0; | ||
} | ||
|
||
/* | ||
Result data is sorted by the storage engine according to order_by (if it | ||
exists) else according to the group_by. If this is not specified, | ||
MariaDB will store the result set into the temporary table and sort the | ||
result. | ||
*/ | ||
#define GROUP_BY_ORDER_BY 1 | ||
/* The storage engine can handle DISTINCT */ | ||
#define GROUP_BY_DISTINCT 2 | ||
virtual uint flags() { return 0; } | ||
|
||
/* | ||
Functions to scan data. All these returns 0 if ok, error code in case | ||
of error | ||
*/ | ||
|
||
/* | ||
Initialize group_by scan, prepare for next_row(). | ||
If this is a sub query with group by, this can be called many times for | ||
a query. | ||
*/ | ||
virtual int init_scan()= 0; | ||
|
||
/* | ||
Return next group by result in table->record[0]. | ||
Return 0 if row found, HA_ERR_END_OF_FILE if last row and other error | ||
number in case of fatal error. | ||
*/ | ||
virtual int next_row()= 0; | ||
|
||
/* End scanning */ | ||
virtual int end_scan()=0; | ||
|
||
/* Information for optimizer (used by EXPLAIN) */ | ||
virtual int info(uint flag, ha_statistics *stats)= 0; | ||
|
||
/* Function that calls the above scan functions */ | ||
int execute(JOIN *join); | ||
|
||
/* Report errors */ | ||
virtual void print_error(int error, myf errflag); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.