Skip to content

Commit

Permalink
Fix calculation of selectivity
Browse files Browse the repository at this point in the history
calculate_cond_selectivity_for_table() is largely rewritten:
- Process keys in the order of rows found, smaller ranges first. If two
  ranges has equal number of rows, use the one with more key parts.
  This helps us to mark more used fields to not be used for further
  selectivity calculations. See cmp_quick_ranges().
- Ignore keys with fields that where used by previous keys
- Don't use rec_per_key[] to calculate selectivity for smaller
  secondary key parts.  This does not work as rec_per_key[] value
  is calculated in the context of the previous key parts, not for the
  key part itself. The one exception is if the previous key parts
  are all constants.

Other things:
- Ensure that select->cond_selectivity is always between 0 and 1.
- Ensure that select->opt_range_condition_rows is never updated to
  a higher value. It is initially set to the number of rows in table.
- We now store in table->opt_range_condition_rows the lowest number of
  rows that any row-read-method has found so far. Before it was only done
  for QUICK_SELECT_I::QS_TYPE_ROR_UNION and
  QUICK_SELECT_I::QS_TYPE_INDEX_MERGE.
  Now it is done for a lot more methods. See
  calculate_cond_selectivity_for_table() for details.
- Calculate and use selectivity for the first key part of a multiple key
  part if the first key part is a constant.
  WHERE key1_part1=5 and key2_part1=5.  IF key1 is used, then we can still
  use selectivity for key2

Changes in test results:
- 'filtered' is slightly changed, usually to something slightly smaller.
- A few cases where for group by queries the table order changed. This was
  because the number of resulting rows from a group by query with MIN/MAX
  is now set to be smaller.
- A few index was changed as we now prefer index with more key parts if
  the number of resulting rows is the same.
  • Loading branch information
montywi authored and spetrunia committed Feb 2, 2023
1 parent 7d0bef6 commit dc2f0d1
Show file tree
Hide file tree
Showing 9 changed files with 620 additions and 103 deletions.
4 changes: 2 additions & 2 deletions mysql-test/main/group_min_max.result
Original file line number Diff line number Diff line change
Expand Up @@ -2460,8 +2460,8 @@ id select_type table type possible_keys key key_len ref rows Extra
EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE
a IN (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1_outer index a a 10 NULL 15 Using where; Using index
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 test.t1_outer.a 1
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 2
1 PRIMARY t1_outer ref a a 5 <subquery2>.max(b) 3 Using index
2 MATERIALIZED t1 range a a 5 NULL 5 Using where; Using index
EXPLAIN SELECT 1 FROM t1 AS t1_outer GROUP BY a HAVING
a > (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2);
Expand Down
2 changes: 1 addition & 1 deletion mysql-test/main/mdev-25830.result
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ WHERE task2.`sys_id` LIKE '8e7792a7dbfffb00fff8a345ca961934%'
ORDER BY sysapproval_approver0.`order`
LIMIT 0, 50 ;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE task2 range PRIMARY,sys_class_name_2,sys_domain_path PRIMARY 96 NULL 1 0.00 98.00 100.00 Using where; Using temporary; Using filesort
1 SIMPLE task2 range PRIMARY,sys_class_name_2,sys_domain_path PRIMARY 96 NULL 1 0.00 100.00 100.00 Using where; Using temporary; Using filesort
1 SIMPLE task1 ref PRIMARY,task_parent,sys_class_name_2,sys_domain_path task_parent 99 test.task2.sys_id 1 NULL 100.00 NULL Using index condition; Using where
1 SIMPLE sysapproval_approver0 ref sysapproval_approver_ref5,sys_domain_path,sysapproval_approver_CHG1975376 sysapproval_approver_ref5 99 test.task1.sys_id 1 NULL 100.00 NULL Using index condition; Using where
drop table sysapproval_approver,task;
Expand Down
6 changes: 5 additions & 1 deletion mysql-test/main/opt_trace_index_merge.result
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,11 @@ explain select * from t1 where a=1 or b=1 {
}
},
{
"selectivity_for_indexes": [],
"selectivity_for_indexes": [
{
"use_opt_range_condition_rows_selectivity": 0.002
}
],
"selectivity_for_columns": [],
"cond_selectivity": 0.002
}
Expand Down
341 changes: 341 additions & 0 deletions mysql-test/main/opt_trace_selectivity.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,341 @@
create or replace table t1 (a int, b int, c int, key(a,c), key(b,c), key (c,b)) engine=aria;
insert into t1 select seq/100+1, mod(seq,10), mod(seq,15) from seq_1_to_10000;
insert into t1 select seq/100+1, mod(seq,10), 10 from seq_1_to_1000;
optimize table t1;
Table Op Msg_type Msg_text
test.t1 optimize status OK
select count(*) from t1 where a=2;
count(*)
200
select count(*) from t1 where b=5;
count(*)
1100
select count(*) from t1 where c=5;
count(*)
667
select count(*) from t1 where c=10;
count(*)
1667
select count(*) from t1 where a=2 and b=5;
count(*)
20
select count(*) from t1 where c=10 and b=5;
count(*)
433
select count(*) from t1 where c=5 and b=5;
count(*)
334
set optimizer_trace="enabled=on";
select count(*) from t1 where a=2 and b=5 and c=10;
count(*)
14
select JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE;
JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans'))
[
[
{
"plan_prefix":
[],
"get_costs_for_tables":
[
{
"best_access_path":
{
"table": "t1",
"considered_access_paths":
[
{
"access_type": "ref",
"index": "a",
"used_range_estimates": true,
"rows": 104,
"cost": 104.16562,
"chosen": true
},
{
"access_type": "ref",
"index": "b",
"used_range_estimates": true,
"rows": 340,
"cost": 340.2577963,
"chosen": false,
"cause": "cost"
},
{
"access_type": "ref",
"index": "c",
"used_range_estimates": true,
"rows": 632,
"cost": 632.3718449,
"chosen": false,
"cause": "cost"
},
{
"access_type": "index_merge",
"resulting_rows": 7,
"cost": 2.173416331,
"chosen": true
}
],
"chosen_access_method":
{
"type": "index_merge",
"records": 7,
"cost": 2.173416331,
"uses_join_buffering": false
}
}
}
]
},
{
"plan_prefix":
[],
"table": "t1",
"rows_for_plan": 7,
"cost_for_plan": 3.573416331
}
]
]
select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE;
JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes'))
[
[
{
"index_name": "a",
"selectivity_from_index": 0.009454545
},
{
"index_name": "b",
"selectivity_from_index": 0.1
},
{
"use_opt_range_condition_rows_selectivity": 6.363636e-4
}
]
]
select count(*) from t1 where a=2 and b=5 and c=5;
count(*)
3
select JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE;
JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans'))
[
[
{
"plan_prefix":
[],
"get_costs_for_tables":
[
{
"best_access_path":
{
"table": "t1",
"considered_access_paths":
[
{
"access_type": "ref",
"index": "a",
"used_range_estimates": true,
"rows": 6,
"cost": 6.127343464,
"chosen": true
},
{
"access_type": "ref",
"index": "b",
"used_range_estimates": true,
"rows": 232,
"cost": 232.2156139,
"chosen": false,
"cause": "cost"
},
{
"access_type": "ref",
"index": "c",
"used_range_estimates": true,
"rows": 293,
"cost": 293.2394392,
"chosen": false,
"cause": "cost"
},
{
"access_type": "index_merge",
"resulting_rows": 0.6,
"cost": 2.172957403,
"chosen": true
}
],
"chosen_access_method":
{
"type": "index_merge",
"records": 0.6,
"cost": 2.172957403,
"uses_join_buffering": false
}
}
}
]
},
{
"plan_prefix":
[],
"table": "t1",
"rows_for_plan": 0.6,
"cost_for_plan": 2.292957403
}
]
]
select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE;
JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes'))
[
[
{
"index_name": "a",
"selectivity_from_index": 5.454545e-4
},
{
"index_name": "b",
"selectivity_from_index": 0.1
}
]
]
# Ensure that we only use selectivity from non used index for simple cases
select count(*) from t1 where (a=2 and b= 5);
count(*)
20
select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE;
JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes'))
[
[
{
"index_name": "a",
"selectivity_from_index": 0.017545455
},
{
"index_name": "b",
"selectivity_from_index": 0.073181818
}
]
]
# All of the following should have selectivity=1 for index 'b'
select count(*) from t1 where (a=2 and b between 0 and 100);
count(*)
200
select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE;
JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes'))
[
[
{
"index_name": "a",
"selectivity_from_index": 0.017545455
},
{
"index_name": "b",
"selectivity_from_index": 1
}
]
]
select count(*) from t1 where (a in (2,3) and b between 0 and 100);
count(*)
400
select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE;
JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes'))
[
[
{
"index_name": "a",
"selectivity_from_index": 0.035090909
},
{
"index_name": "b",
"selectivity_from_index": 1
}
]
]
select count(*) from t1 where (a>2 and b between 0 and 100);
count(*)
10702
select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE;
JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes'))
[
[
{
"index_name": "a",
"selectivity_from_index": 0.973909091
},
{
"index_name": "b",
"selectivity_from_index": 1
}
]
]
select count(*) from t1 where (a>=2 and b between 0 and 100);
count(*)
10902
select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE;
JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes'))
[
[
{
"index_name": "a",
"selectivity_from_index": 0.991454545
},
{
"index_name": "b",
"selectivity_from_index": 1
}
]
]
select count(*) from t1 where (a<=2 and b between 0 and 100);
count(*)
298
select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE;
JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes'))
[
[
{
"index_name": "a",
"selectivity_from_index": 0.026181818
},
{
"index_name": "b",
"selectivity_from_index": 1
}
]
]
select count(*) from t1 where (a<2 and b between 0 and 100);
count(*)
98
select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE;
JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes'))
[
[
{
"index_name": "a",
"selectivity_from_index": 0.008636364
},
{
"index_name": "b",
"selectivity_from_index": 1
}
]
]
select count(*) from t1 where (a between 2 and 3 and b between 0 and 100);
count(*)
400
select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE;
JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes'))
[
[
{
"index_name": "a",
"selectivity_from_index": 0.035090909
},
{
"index_name": "b",
"selectivity_from_index": 1
}
]
]
drop table t1;
set optimizer_trace='enabled=off';
Loading

0 comments on commit dc2f0d1

Please sign in to comment.