Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner,stats: don't re-calculate the index's ranges #12856

Merged
merged 9 commits into from Dec 4, 2019
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmd/explaintest/r/explain_easy.result
Expand Up @@ -79,9 +79,9 @@ TopN_7 1.00 root Column#2:asc, offset:0, count:1
└─TableScan_13 10000.00 cop[tikv] table:t2, range:[-inf,+inf], keep order:false, stats:pseudo
explain select * from t1 where c1 > 1 and c2 = 1 and c3 < 1;
id count task operator info
IndexLookUp_11 1.11 root
IndexLookUp_11 11.08 root
├─IndexScan_8 33.33 cop[tikv] table:t1, index:c2, range:(1 1,1 +inf], keep order:false, stats:pseudo
└─Selection_10 1.11 cop[tikv] lt(Column#3, 1)
└─Selection_10 11.08 cop[tikv] lt(Column#3, 1)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change is a bug related with statistics.
Since we take the RowID column into consideration when the column of index are all be used.
But when calculating the selecitivity. https://github.com/pingcap/tidb/blob/master/statistics/selectivity.go#L215 Here we doesn't consider the RowID column.
So the rowid column is calculated twice, which makes the final row count smaller.

I've tried but it's not very easy to fix. So we can make this pr merged first. Then i'll try to fix it in another pr.

└─TableScan_9 33.33 cop[tikv] table:t1, keep order:false, stats:pseudo
explain select * from t1 where c1 = 1 and c2 > 1;
id count task operator info
Expand Down
69 changes: 35 additions & 34 deletions planner/core/exhaust_physical_plans.go
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/pingcap/tidb/expression/aggregation"
"github.com/pingcap/tidb/kv"
"github.com/pingcap/tidb/planner/property"
"github.com/pingcap/tidb/planner/util"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/chunk"
Expand Down Expand Up @@ -309,7 +310,7 @@ func (p *LogicalJoin) constructIndexJoin(
innerTask task,
ranges []*ranger.Range,
keyOff2IdxOff []int,
path *accessPath,
path *util.AccessPath,
compareFilters *ColWithCmpFuncManager,
) []PhysicalPlan {
joinType := p.JoinType
Expand Down Expand Up @@ -362,7 +363,7 @@ func (p *LogicalJoin) constructIndexJoin(
CompareFilters: compareFilters,
}.Init(p.ctx, p.stats.ScaleByExpectCnt(prop.ExpectedCnt), p.blockOffset, chReqProps...)
if path != nil {
join.IdxColLens = path.idxColLens
join.IdxColLens = path.IdxColLens
}
join.SetSchema(p.schema)
return []PhysicalPlan{join}
Expand All @@ -374,7 +375,7 @@ func (p *LogicalJoin) constructIndexMergeJoin(
innerTask task,
ranges []*ranger.Range,
keyOff2IdxOff []int,
path *accessPath,
path *util.AccessPath,
compareFilters *ColWithCmpFuncManager,
) []PhysicalPlan {
indexJoins := p.constructIndexJoin(prop, outerIdx, innerTask, ranges, keyOff2IdxOff, path, compareFilters)
Expand Down Expand Up @@ -435,7 +436,7 @@ func (p *LogicalJoin) constructIndexHashJoin(
innerTask task,
ranges []*ranger.Range,
keyOff2IdxOff []int,
path *accessPath,
path *util.AccessPath,
compareFilters *ColWithCmpFuncManager,
) []PhysicalPlan {
indexJoins := p.constructIndexJoin(prop, outerIdx, innerTask, ranges, keyOff2IdxOff, path, compareFilters)
Expand Down Expand Up @@ -512,9 +513,9 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *property.PhysicalProperty, ou
func (p *LogicalJoin) buildIndexJoinInner2TableScan(
prop *property.PhysicalProperty, ds *DataSource, innerJoinKeys, outerJoinKeys []*expression.Column,
outerIdx int, us *LogicalUnionScan, avgInnerRowCnt float64) (joins []PhysicalPlan) {
var tblPath *accessPath
var tblPath *util.AccessPath
for _, path := range ds.possibleAccessPaths {
if path.isTablePath && path.storeType == kv.TiKV {
if path.IsTablePath && path.StoreType == kv.TiKV {
tblPath = path
break
}
Expand Down Expand Up @@ -561,7 +562,7 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan(
outerIdx int, us *LogicalUnionScan, avgInnerRowCnt float64) (joins []PhysicalPlan) {
helper := &indexJoinBuildHelper{join: p}
for _, path := range ds.possibleAccessPaths {
if path.isTablePath {
if path.IsTablePath {
continue
}
emptyRange, err := helper.analyzeLookUpFilters(path, ds, innerJoinKeys)
Expand All @@ -585,7 +586,7 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan(
}
}
joins = make([]PhysicalPlan, 0, 3)
rangeInfo := helper.buildRangeDecidedByInformation(helper.chosenPath.idxCols, outerJoinKeys)
rangeInfo := helper.buildRangeDecidedByInformation(helper.chosenPath.IdxCols, outerJoinKeys)
innerTask := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, false, false, avgInnerRowCnt)

joins = append(joins, p.constructIndexJoin(prop, outerIdx, innerTask, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager)...)
Expand Down Expand Up @@ -613,7 +614,7 @@ type indexJoinBuildHelper struct {
idxOff2KeyOff []int
lastColManager *ColWithCmpFuncManager
chosenRanges []*ranger.Range
chosenPath *accessPath
chosenPath *util.AccessPath

curPossibleUsedKeys []*expression.Column
curNotUsedIndexCols []*expression.Column
Expand Down Expand Up @@ -713,7 +714,7 @@ func (p *LogicalJoin) constructInnerUnionScan(us *LogicalUnionScan, reader Physi
// constructInnerIndexScanTask is specially used to construct the inner plan for PhysicalIndexJoin.
func (p *LogicalJoin) constructInnerIndexScanTask(
ds *DataSource,
path *accessPath,
path *util.AccessPath,
filterConds []expression.Expression,
outerJoinKeys []*expression.Column,
us *LogicalUnionScan,
Expand All @@ -727,9 +728,9 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
TableAsName: ds.TableAsName,
DBName: ds.DBName,
Columns: ds.Columns,
Index: path.index,
IdxCols: path.idxCols,
IdxColLens: path.idxColLens,
Index: path.Index,
IdxCols: path.IdxCols,
IdxColLens: path.IdxColLens,
dataSourceSchema: ds.schema,
KeepOrder: keepOrder,
Ranges: ranger.FullRange(),
Expand All @@ -745,7 +746,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
tblCols: ds.TblCols,
keepOrder: is.KeepOrder,
}
if !isCoveringIndex(ds.schema.Columns, path.fullIdxCols, path.fullIdxColLens, is.Table.PKIsHandle) {
if !isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, is.Table.PKIsHandle) {
// On this way, it's double read case.
ts := PhysicalTableScan{
Columns: ds.Columns,
Expand All @@ -761,24 +762,24 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
}
cop.tablePlan = ts
}
is.initSchema(path.index, path.fullIdxCols, cop.tablePlan != nil)
rowSize := is.indexScanRowSize(path.index, ds, true)
is.initSchema(path.Index, path.FullIdxCols, cop.tablePlan != nil)
rowSize := is.indexScanRowSize(path.Index, ds, true)
sessVars := ds.ctx.GetSessionVars()
cop.cst = rowCount * rowSize * sessVars.ScanFactor
indexConds, tblConds := splitIndexFilterConditions(filterConds, path.fullIdxCols, path.fullIdxColLens, ds.tableInfo)
tmpPath := &accessPath{
indexFilters: indexConds,
tableFilters: tblConds,
countAfterAccess: rowCount,
indexConds, tblConds := splitIndexFilterConditions(filterConds, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo)
tmpPath := &util.AccessPath{
IndexFilters: indexConds,
TableFilters: tblConds,
CountAfterAccess: rowCount,
}
// Assume equal conditions used by index join and other conditions are independent.
if len(indexConds) > 0 {
selectivity, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, indexConds)
selectivity, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, indexConds, nil)
if err != nil {
logutil.BgLogger().Debug("calculate selectivity failed, use selection factor", zap.Error(err))
selectivity = selectionFactor
}
tmpPath.countAfterIndex = rowCount * selectivity
tmpPath.CountAfterIndex = rowCount * selectivity
}
selectivity := ds.stats.RowCount / ds.tableStats.RowCount
finalStats := ds.stats.ScaleByExpectCnt(selectivity * rowCount)
Expand Down Expand Up @@ -980,15 +981,15 @@ func (ijHelper *indexJoinBuildHelper) removeUselessEqAndInFunc(
return notKeyEqAndIn, nil
}

func (ijHelper *indexJoinBuildHelper) analyzeLookUpFilters(path *accessPath, innerPlan *DataSource, innerJoinKeys []*expression.Column) (emptyRange bool, err error) {
if len(path.idxCols) == 0 {
func (ijHelper *indexJoinBuildHelper) analyzeLookUpFilters(path *util.AccessPath, innerPlan *DataSource, innerJoinKeys []*expression.Column) (emptyRange bool, err error) {
if len(path.IdxCols) == 0 {
return false, nil
}
accesses := make([]expression.Expression, 0, len(path.idxCols))
ijHelper.resetContextForIndex(innerJoinKeys, path.idxCols, path.idxColLens)
accesses := make([]expression.Expression, 0, len(path.IdxCols))
ijHelper.resetContextForIndex(innerJoinKeys, path.IdxCols, path.IdxColLens)
notKeyEqAndIn, remained, rangeFilterCandidates := ijHelper.findUsefulEqAndInFilters(innerPlan)
var remainedEqAndIn []expression.Expression
notKeyEqAndIn, remainedEqAndIn = ijHelper.removeUselessEqAndInFunc(path.idxCols, notKeyEqAndIn)
notKeyEqAndIn, remainedEqAndIn = ijHelper.removeUselessEqAndInFunc(path.IdxCols, notKeyEqAndIn)
matchedKeyCnt := len(ijHelper.curPossibleUsedKeys)
// If no join key is matched while join keys actually are not empty. We don't choose index join for now.
if matchedKeyCnt <= 0 && len(innerJoinKeys) > 0 {
Expand All @@ -1003,7 +1004,7 @@ func (ijHelper *indexJoinBuildHelper) analyzeLookUpFilters(path *accessPath, inn
return false, nil
}
// If all the index columns are covered by eq/in conditions, we don't need to consider other conditions anymore.
if lastColPos == len(path.idxCols) {
if lastColPos == len(path.IdxCols) {
// If there's join key matched index column. Then choose hash join is always a better idea.
// e.g. select * from t1, t2 where t2.a=1 and t2.b=1. And t2 has index(a, b).
// If we don't have the following check, TiDB will build index join for this case.
Expand All @@ -1021,10 +1022,10 @@ func (ijHelper *indexJoinBuildHelper) analyzeLookUpFilters(path *accessPath, inn
ijHelper.updateBestChoice(ranges, path, accesses, remained, nil)
return false, nil
}
lastPossibleCol := path.idxCols[lastColPos]
lastPossibleCol := path.IdxCols[lastColPos]
lastColManager := &ColWithCmpFuncManager{
targetCol: lastPossibleCol,
colLength: path.idxColLens[lastColPos],
colLength: path.IdxColLens[lastColPos],
affectedColSchema: expression.NewSchema(),
}
lastColAccess := ijHelper.buildLastColManager(lastPossibleCol, innerPlan, lastColManager)
Expand All @@ -1040,7 +1041,7 @@ func (ijHelper *indexJoinBuildHelper) analyzeLookUpFilters(path *accessPath, inn
var ranges, nextColRange []*ranger.Range
var err error
if len(colAccesses) > 0 {
nextColRange, err = ranger.BuildColumnRange(colAccesses, ijHelper.join.ctx.GetSessionVars().StmtCtx, lastPossibleCol.RetType, path.idxColLens[lastColPos])
nextColRange, err = ranger.BuildColumnRange(colAccesses, ijHelper.join.ctx.GetSessionVars().StmtCtx, lastPossibleCol.RetType, path.IdxColLens[lastColPos])
if err != nil {
return false, err
}
Expand All @@ -1053,7 +1054,7 @@ func (ijHelper *indexJoinBuildHelper) analyzeLookUpFilters(path *accessPath, inn
return true, nil
}
remained = append(remained, colRemained...)
if path.idxColLens[lastColPos] != types.UnspecifiedLength {
if path.IdxColLens[lastColPos] != types.UnspecifiedLength {
remained = append(remained, colAccesses...)
}
accesses = append(accesses, colAccesses...)
Expand All @@ -1073,7 +1074,7 @@ func (ijHelper *indexJoinBuildHelper) analyzeLookUpFilters(path *accessPath, inn
return false, nil
}

func (ijHelper *indexJoinBuildHelper) updateBestChoice(ranges []*ranger.Range, path *accessPath, accesses,
func (ijHelper *indexJoinBuildHelper) updateBestChoice(ranges []*ranger.Range, path *util.AccessPath, accesses,
remained []expression.Expression, lastColManager *ColWithCmpFuncManager) {
// We choose the index by the number of used columns of the range, the much the better.
// Notice that there may be the cases like `t1.a=t2.a and b > 2 and b < 1`. So ranges can be nil though the conditions are valid.
Expand Down
7 changes: 4 additions & 3 deletions planner/core/exhaust_physical_plans_test.go
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/pingcap/parser/model"
"github.com/pingcap/parser/mysql"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/planner/util"
"github.com/pingcap/tidb/types"
)

Expand Down Expand Up @@ -120,9 +121,9 @@ func (s *testUnitTestSuit) TestIndexJoinAnalyzeLookUpFilters(c *C) {
DBName: model.NewCIStr("test"),
})
joinNode.SetSchema(expression.MergeSchema(dsSchema, outerChildSchema))
path := &accessPath{
idxCols: append(make([]*expression.Column, 0, 4), dsSchema.Columns...),
idxColLens: []int{types.UnspecifiedLength, types.UnspecifiedLength, 2, types.UnspecifiedLength},
path := &util.AccessPath{
IdxCols: append(make([]*expression.Column, 0, 4), dsSchema.Columns...),
IdxColLens: []int{types.UnspecifiedLength, types.UnspecifiedLength, 2, types.UnspecifiedLength},
}
joinColNames := append(dsNames.Shallow(), outerChildNames...)

Expand Down