Skip to content

Commit

Permalink
MDEV-18073: get_range_limit_read_cost() doesnt adjust LIMIT for the r…
Browse files Browse the repository at this point in the history
…ange access

The computation about which "fraction" of range/ref access cost we will
need to perform, was incorrect.

Adjusted the computation.
  • Loading branch information
spetrunia committed Jan 23, 2019
1 parent b7a784a commit 3238f2a
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 10 deletions.
8 changes: 4 additions & 4 deletions mysql-test/main/range_vs_index_merge.result
Original file line number Diff line number Diff line change
Expand Up @@ -1320,7 +1320,7 @@ WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H'))
AND (Population >= 100000 AND Population < 120000)
ORDER BY Population LIMIT 5;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE City index_merge Country,Name,Population Name,Country 35,3 NULL # Using sort_union(Name,Country); Using where; Using filesort
1 SIMPLE City range Country,Name,Population Population 4 NULL # Using where
FLUSH STATUS;
SELECT * FROM City
WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H'))
Expand All @@ -1335,12 +1335,12 @@ ID Name Country Population
SHOW STATUS LIKE 'Handler_read_%';
Variable_name Value
Handler_read_first 0
Handler_read_key 2
Handler_read_key 1
Handler_read_last 0
Handler_read_next 385
Handler_read_next 59
Handler_read_prev 0
Handler_read_retry 0
Handler_read_rnd 377
Handler_read_rnd 0
Handler_read_rnd_deleted 0
Handler_read_rnd_next 0
set @tmp_mdev585=@@optimizer_use_condition_selectivity;
Expand Down
32 changes: 32 additions & 0 deletions mysql-test/main/range_vs_index_merge_innodb.result
Original file line number Diff line number Diff line change
Expand Up @@ -1344,6 +1344,38 @@ Handler_read_retry 0
Handler_read_rnd 0
Handler_read_rnd_deleted 0
Handler_read_rnd_next 0
set @tmp_mdev585=@@optimizer_use_condition_selectivity;
set optimizer_use_condition_selectivity=1;
EXPLAIN
SELECT * FROM City
WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H'))
AND (Population >= 100000 AND Population < 120000)
ORDER BY Population LIMIT 5;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE City range Country,Name,Population Population 4 NULL # Using where
FLUSH STATUS;
SELECT * FROM City
WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'H'))
AND (Population >= 100000 AND Population < 120000)
ORDER BY Population LIMIT 5;
ID Name Country Population
519 Worthing GBR 100000
638 al-Arish EGY 100447
518 Basildon GBR 100924
707 Marbella ESP 101144
3792 Tartu EST 101246
SHOW STATUS LIKE 'Handler_read_%';
Variable_name Value
Handler_read_first 0
Handler_read_key 1
Handler_read_last 0
Handler_read_next 59
Handler_read_prev 0
Handler_read_retry 0
Handler_read_rnd 0
Handler_read_rnd_deleted 0
Handler_read_rnd_next 0
set optimizer_use_condition_selectivity=@tmp_mdev585;
set optimizer_switch='index_merge=off';
EXPLAIN
SELECT * FROM City
Expand Down
43 changes: 37 additions & 6 deletions sql/sql_select.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26685,16 +26685,22 @@ void JOIN::cache_const_exprs()


/*
Get a cost of reading rows_limit rows through index keynr.
Get the cost of using index keynr to read #LIMIT matching rows

@detail
- If there is a quick select, we try to use it.
- if there is a ref(const) access, we try to use it, too.
- quick and ref(const) use different cost formulas, so if both are possible
we should make a cost-based choice.


rows_limit is the number of rows we would need to read when using a full
index scan. This is generally higher than the N from "LIMIT N" clause,
because there's a WHERE condition (a part of which is used to construct a
range access we are considering using here)

@param tab JOIN_TAB with table access (is NULL for single-table
UPDATE/DELETE)
@param rows_limit See explanation above
@param read_time OUT Cost of reading using quick or ref(const) access.


Expand All @@ -26707,6 +26713,7 @@ void JOIN::cache_const_exprs()

static bool get_range_limit_read_cost(const JOIN_TAB *tab,
const TABLE *table,
ha_rows table_records,
uint keynr,
ha_rows rows_limit,
double *read_time)
Expand Down Expand Up @@ -26773,8 +26780,32 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab,
}
}
}

/*
Consider an example:

SELECT *
FROM t1
WHERE key1 BETWEEN 10 AND 20 AND col2='foo'
ORDER BY key1 LIMIT 10

If we were using a full index scan on key1, we would need to read this
many rows to get 10 matches:

10 / selectivity(key1 BETWEEN 10 AND 20 AND col2='foo')

This is the number we get in rows_limit.
But we intend to use range access on key1. The rows returned by quick
select will satisfy the range part of the condition,
"key1 BETWEEN 10 and 20". We will still need to filter them with
the remainder condition, (col2='foo').

The selectivity of the range access is (best_rows/table_records). We need
to discount it from the rows_limit:
*/
double rows_limit_for_quick= rows_limit * (best_rows / table_records);

if (best_rows > rows_limit)
if (best_rows > rows_limit_for_quick)
{
/*
LIMIT clause specifies that we will need to read fewer records than
Expand All @@ -26783,7 +26814,7 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab,
only need 1/3rd of records, it will cost us 1/3rd of quick select's
read time)
*/
best_cost *= rows_limit / best_rows;
best_cost *= rows_limit_for_quick / best_rows;
}
*read_time= best_cost;
res= true;
Expand Down Expand Up @@ -27076,8 +27107,8 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
index_scan_time= select_limit/rec_per_key *
MY_MIN(rec_per_key, table->file->scan_time());
double range_scan_time;
if (get_range_limit_read_cost(tab, table, nr, select_limit,
&range_scan_time))
if (get_range_limit_read_cost(tab, table, table_records, nr,
select_limit, &range_scan_time))
{
if (range_scan_time < index_scan_time)
index_scan_time= range_scan_time;
Expand Down

0 comments on commit 3238f2a

Please sign in to comment.