Skip to content

Commit

Permalink
stats: specially handle unqiue key when estimate (#13354) (#13382)
Browse files Browse the repository at this point in the history
  • Loading branch information
alivxxx authored and sre-bot committed Nov 12, 2019
1 parent 2b08825 commit 64de8d5
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 6 deletions.
2 changes: 1 addition & 1 deletion statistics/handle/update.go
Expand Up @@ -941,7 +941,7 @@ func (h *Handle) RecalculateExpectCount(q *statistics.QueryFeedback) error {
expected *= idx.GetIncreaseFactor(t.Count)
} else {
c := t.Columns[id]
expected, err = c.GetColumnRowCount(sc, ranges, t.ModifyCount)
expected, err = c.GetColumnRowCount(sc, ranges, t.ModifyCount, true)
expected *= c.GetIncreaseFactor(t.Count)
}
if err != nil {
Expand Down
12 changes: 11 additions & 1 deletion statistics/histogram.go
Expand Up @@ -727,7 +727,7 @@ func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum, mo
}

// GetColumnRowCount estimates the row count by a slice of Range.
func (c *Column) GetColumnRowCount(sc *stmtctx.StatementContext, ranges []*ranger.Range, modifyCount int64) (float64, error) {
func (c *Column) GetColumnRowCount(sc *stmtctx.StatementContext, ranges []*ranger.Range, modifyCount int64, pkIsHandle bool) (float64, error) {
var rowCount float64
for _, rg := range ranges {
cmp, err := rg.LowVal[0].CompareDatum(sc, &rg.HighVal[0])
Expand All @@ -737,6 +737,11 @@ func (c *Column) GetColumnRowCount(sc *stmtctx.StatementContext, ranges []*range
if cmp == 0 {
// the point case.
if !rg.LowExclude && !rg.HighExclude {
// In this case, the row count is at most 1.
if pkIsHandle {
rowCount += 1
continue
}
var cnt float64
cnt, err = c.equalRowCount(sc, rg.LowVal[0], modifyCount)
if err != nil {
Expand Down Expand Up @@ -839,6 +844,11 @@ func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, indexRanges []*range
continue
}
if fullLen {
// At most 1 in this case.
if idx.Info.Unique {
totalCount += 1
continue
}
count, err := idx.equalRowCount(sc, lb, modifyCount)
if err != nil {
return 0, err
Expand Down
4 changes: 2 additions & 2 deletions statistics/histogram_test.go
Expand Up @@ -50,9 +50,9 @@ func (s *testStatisticsSuite) TestNewHistogramBySelectivity(c *C) {
node.Ranges = append(node.Ranges, &ranger.Range{LowVal: types.MakeDatums(25), HighVal: []types.Datum{types.MaxValueDatum()}})
intColResult := `column:1 ndv:16 totColSize:0
num: 30 lower_bound: 0 upper_bound: 2 repeats: 10
num: 20 lower_bound: 6 upper_bound: 8 repeats: 0
num: 11 lower_bound: 6 upper_bound: 8 repeats: 0
num: 30 lower_bound: 9 upper_bound: 11 repeats: 0
num: 10 lower_bound: 12 upper_bound: 14 repeats: 0
num: 1 lower_bound: 12 upper_bound: 14 repeats: 0
num: 30 lower_bound: 27 upper_bound: 29 repeats: 0`

stringCol := &Column{}
Expand Down
32 changes: 32 additions & 0 deletions statistics/selectivity_test.go
Expand Up @@ -416,6 +416,38 @@ func (s *testStatsSuite) TestEstimationForUnknownValues(c *C) {
c.Assert(count, Equals, 0.0)
}

func (s *testStatsSuite) TestEstimationUniqueKeyEqualConds(c *C) {
defer cleanEnv(c, s.store, s.do)
testKit := testkit.NewTestKit(c, s.store)
testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a int, b int, c int, unique key(b))")
testKit.MustExec("insert into t values (1,1,1),(2,2,2),(3,3,3),(4,4,4),(5,5,5),(6,6,6),(7,7,7)")
testKit.MustExec("analyze table t")
table, err := s.do.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
c.Assert(err, IsNil)
statsTbl := s.do.StatsHandle().GetTableStats(table.Meta())

sc := &stmtctx.StatementContext{}
idxID := table.Meta().Indices[0].ID
count, err := statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(7, 7))
c.Assert(err, IsNil)
c.Assert(count, Equals, 1.0)

count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(6, 6))
c.Assert(err, IsNil)
c.Assert(count, Equals, 1.0)

colID := table.Meta().Columns[0].ID
count, err = statsTbl.GetRowCountByIntColumnRanges(sc, colID, getRange(7, 7))
c.Assert(err, IsNil)
c.Assert(count, Equals, 1.0)

count, err = statsTbl.GetRowCountByIntColumnRanges(sc, colID, getRange(6, 6))
c.Assert(err, IsNil)
c.Assert(count, Equals, 1.0)
}

func (s *testStatsSuite) TestPrimaryKeySelectivity(c *C) {
defer cleanEnv(c, s.store, s.do)
testKit := testkit.NewTestKit(c, s.store)
Expand Down
10 changes: 8 additions & 2 deletions statistics/table.go
Expand Up @@ -236,7 +236,7 @@ func (coll *HistColl) GetRowCountByIntColumnRanges(sc *stmtctx.StatementContext,
}
return getPseudoRowCountByUnsignedIntRanges(intRanges, float64(coll.Count)), nil
}
result, err := c.GetColumnRowCount(sc, intRanges, coll.ModifyCount)
result, err := c.GetColumnRowCount(sc, intRanges, coll.ModifyCount, true)
result *= c.GetIncreaseFactor(coll.Count)
return result, errors.Trace(err)
}
Expand All @@ -247,7 +247,7 @@ func (coll *HistColl) GetRowCountByColumnRanges(sc *stmtctx.StatementContext, co
if !ok || c.IsInvalid(sc, coll.Pseudo) {
return GetPseudoRowCountByColumnRanges(sc, float64(coll.Count), colRanges, 0)
}
result, err := c.GetColumnRowCount(sc, colRanges, coll.ModifyCount)
result, err := c.GetColumnRowCount(sc, colRanges, coll.ModifyCount, false)
result *= c.GetIncreaseFactor(coll.Count)
return result, errors.Trace(err)
}
Expand Down Expand Up @@ -366,6 +366,12 @@ func (coll *HistColl) getIndexRowCount(sc *stmtctx.StatementContext, idxID int64
totalCount := float64(0)
for _, ran := range indexRanges {
rangePosition := GetOrdinalOfRangeCond(sc, ran)
coverAll := len(ran.LowVal) == len(idx.Info.Columns) && rangePosition == len(ran.LowVal)
// // In this case, the row count is at most 1.
if coverAll && idx.Info.Unique {
totalCount += 1.0
continue
}
// If first one is range, just use the previous way to estimate; if it is [NULL, NULL] range
// on single-column index, use previous way as well, because CMSketch does not contain null
// values in this case.
Expand Down

0 comments on commit 64de8d5

Please sign in to comment.