Skip to content

Commit 28ad128

Browse files
committed
Fix off-by-one error in Histogram_json_hb::find_bucket
1 parent b179640 commit 28ad128

File tree

4 files changed

+70
-13
lines changed

4 files changed

+70
-13
lines changed

mysql-test/main/statistics_json.result

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4093,12 +4093,12 @@ test.t2 analyze status Engine-independent statistics collected
40934093
test.t2 analyze status OK
40944094
explain extended select * from t2 where city = 'Moscow';
40954095
id select_type table type possible_keys key key_len ref rows filtered Extra
4096-
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 50.00 Using where
4096+
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 98.02 Using where
40974097
Warnings:
40984098
Note 1003 select `test`.`t2`.`city` AS `city` from `test`.`t2` where `test`.`t2`.`city` = 'Moscow'
40994099
analyze select * from t2 where city = 'Moscow';
41004100
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
4101-
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 101.00 50.00 98.02 Using where
4101+
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 101.00 98.02 98.02 Using where
41024102
explain extended select * from t2 where city = 'Helsinki';
41034103
id select_type table type possible_keys key key_len ref rows filtered Extra
41044104
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 1.98 Using where

mysql-test/main/statistics_json.test

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,4 +182,3 @@ SET histogram_type= JSON_HB;
182182
ANALYZE TABLE t1 PERSISTENT FOR ALL;
183183
SELECT * FROM t1;
184184
drop table t1;
185-

sql/opt_histogram_json.cc

Lines changed: 67 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -483,12 +483,12 @@ double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
483483

484484
// If the value is outside of the histogram's range, this will "clip" it to
485485
// first or last bucket.
486-
int idx= find_bucket(field, key, false);
486+
bool equal;
487+
int idx= find_bucket(field, key, &equal);
487488

488489
double sel;
489490

490-
if (buckets[idx].ndv == 1 &&
491-
field->key_cmp((uchar*)buckets[idx].start_value.data(), key))
491+
if (buckets[idx].ndv == 1 && !equal)
492492
{
493493
// The bucket has a single value and it doesn't match! Use the global
494494
// average.
@@ -550,7 +550,18 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
550550

551551
// Find the leftmost bucket that contains the lookup value.
552552
// (If the lookup value is to the left of all buckets, find bucket #0)
553-
int idx= find_bucket(field, min_key, exclusive_endp);
553+
bool equal;
554+
int idx= find_bucket(field, min_key, &equal);
555+
if (equal && exclusive_endp && buckets[idx].ndv==1 &&
556+
idx < (int)buckets.size()-1)
557+
{
558+
/*
559+
The range is "col > $CONST" and we've found a bucket that contains
560+
only the value $CONST. Move to the next bucket.
561+
TODO: what if the last value in the histogram is a popular one?
562+
*/
563+
idx++;
564+
}
554565
double left_fract= get_left_fract(idx);
555566
double sel= position_in_interval(field, min_key, min_key_len,
556567
buckets[idx].start_value,
@@ -573,8 +584,18 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
573584
max_key++;
574585
max_key_len--;
575586
}
587+
bool equal;
588+
int idx= find_bucket(field, max_key, &equal);
576589

577-
int idx= find_bucket(field, max_key, inclusive_endp);
590+
if (equal && !inclusive_endp && idx > 0)
591+
{
592+
/*
593+
The range is "col < $CONST" and we've found a bucket starting with
594+
$CONST. Move to the previous bucket.
595+
TODO: what if the first value is the popular one?
596+
*/
597+
idx--;
598+
}
578599
double left_fract= get_left_fract(idx);
579600
double sel= position_in_interval(field, max_key, max_key_len,
580601
buckets[idx].start_value,
@@ -616,22 +637,59 @@ void Histogram_json_hb::serialize(Field *field)
616637
*/
617638

618639
int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val,
619-
bool equal_is_less)
640+
bool *equal)
620641
{
642+
int res;
621643
int low= 0;
622644
int high= (int)buckets.size() - 1;
645+
*equal= false;
623646

624647
while (low + 1 < high)
625648
{
626649
int middle= (low + high) / 2;
627-
int res= field->key_cmp((uchar*)buckets[middle].start_value.data(), lookup_val);
650+
res= field->key_cmp((uchar*)buckets[middle].start_value.data(), lookup_val);
628651
if (!res)
629-
res= equal_is_less? -1: 1;
630-
if (res < 0)
652+
{
653+
*equal= true;
654+
return middle;
655+
}
656+
else if (res < 0)
631657
low= middle;
632658
else //res > 0
633659
high= middle;
634660
}
635661

662+
/*
663+
If low and high were assigned a value in the above loop, then they are not
664+
equal to the lookup value:
665+
666+
bucket[low] < lookup_val < bucket[high]
667+
668+
But there are two special cases: low=0 and high=last_bucket. Handle them
669+
below.
670+
*/
671+
if (low == 0)
672+
{
673+
res= field->key_cmp((uchar*)buckets[0].start_value.data(), lookup_val);
674+
if (!res)
675+
*equal= true;
676+
else if (res < 0)
677+
{
678+
res= field->key_cmp((uchar*)buckets[high].start_value.data(), lookup_val);
679+
if (!res)
680+
*equal= true;
681+
if (res >= 0)
682+
low= high;
683+
}
684+
}
685+
else if (high == (int)buckets.size() - 1)
686+
{
687+
res= field->key_cmp((uchar*)buckets[high].start_value.data(), lookup_val);
688+
if (!res)
689+
*equal= true;
690+
if (res >= 0)
691+
low= high;
692+
}
693+
636694
return low;
637695
}

sql/opt_histogram_json.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,6 @@ class Histogram_json_hb : public Histogram_base
123123
private:
124124
double get_left_fract(int idx);
125125
std::string& get_end_value(int idx);
126-
int find_bucket(Field *field, const uchar *lookup_val, bool equal_is_less);
126+
int find_bucket(Field *field, const uchar *lookup_val, bool *equal);
127127
};
128128

0 commit comments

Comments
 (0)