@@ -483,12 +483,12 @@ double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
483
483
484
484
// If the value is outside of the histogram's range, this will "clip" it to
485
485
// first or last bucket.
486
- int idx= find_bucket (field, key, false );
486
+ bool equal;
487
+ int idx= find_bucket (field, key, &equal);
487
488
488
489
double sel;
489
490
490
- if (buckets[idx].ndv == 1 &&
491
- field->key_cmp ((uchar*)buckets[idx].start_value .data (), key))
491
+ if (buckets[idx].ndv == 1 && !equal)
492
492
{
493
493
// The bucket has a single value and it doesn't match! Use the global
494
494
// average.
@@ -550,7 +550,18 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
550
550
551
551
// Find the leftmost bucket that contains the lookup value.
552
552
// (If the lookup value is to the left of all buckets, find bucket #0)
553
- int idx= find_bucket (field, min_key, exclusive_endp);
553
+ bool equal;
554
+ int idx= find_bucket (field, min_key, &equal);
555
+ if (equal && exclusive_endp && buckets[idx].ndv ==1 &&
556
+ idx < (int )buckets.size ()-1 )
557
+ {
558
+ /*
559
+ The range is "col > $CONST" and we've found a bucket that contains
560
+ only the value $CONST. Move to the next bucket.
561
+ TODO: what if the last value in the histogram is a popular one?
562
+ */
563
+ idx++;
564
+ }
554
565
double left_fract= get_left_fract (idx);
555
566
double sel= position_in_interval (field, min_key, min_key_len,
556
567
buckets[idx].start_value ,
@@ -573,8 +584,18 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
573
584
max_key++;
574
585
max_key_len--;
575
586
}
587
+ bool equal;
588
+ int idx= find_bucket (field, max_key, &equal);
576
589
577
- int idx= find_bucket (field, max_key, inclusive_endp);
590
+ if (equal && !inclusive_endp && idx > 0 )
591
+ {
592
+ /*
593
+ The range is "col < $CONST" and we've found a bucket starting with
594
+ $CONST. Move to the previous bucket.
595
+ TODO: what if the first value is the popular one?
596
+ */
597
+ idx--;
598
+ }
578
599
double left_fract= get_left_fract (idx);
579
600
double sel= position_in_interval (field, max_key, max_key_len,
580
601
buckets[idx].start_value ,
@@ -616,22 +637,59 @@ void Histogram_json_hb::serialize(Field *field)
616
637
*/
617
638
618
639
int Histogram_json_hb::find_bucket (Field *field, const uchar *lookup_val,
619
- bool equal_is_less )
640
+ bool *equal )
620
641
{
642
+ int res;
621
643
int low= 0 ;
622
644
int high= (int )buckets.size () - 1 ;
645
+ *equal= false ;
623
646
624
647
while (low + 1 < high)
625
648
{
626
649
int middle= (low + high) / 2 ;
627
- int res= field->key_cmp ((uchar*)buckets[middle].start_value .data (), lookup_val);
650
+ res= field->key_cmp ((uchar*)buckets[middle].start_value .data (), lookup_val);
628
651
if (!res)
629
- res= equal_is_less? -1 : 1 ;
630
- if (res < 0 )
652
+ {
653
+ *equal= true ;
654
+ return middle;
655
+ }
656
+ else if (res < 0 )
631
657
low= middle;
632
658
else // res > 0
633
659
high= middle;
634
660
}
635
661
662
+ /*
663
+ If low and high were assigned a value in the above loop, then they are not
664
+ equal to the lookup value:
665
+
666
+ bucket[low] < lookup_val < bucket[high]
667
+
668
+ But there are two special cases: low=0 and high=last_bucket. Handle them
669
+ below.
670
+ */
671
+ if (low == 0 )
672
+ {
673
+ res= field->key_cmp ((uchar*)buckets[0 ].start_value .data (), lookup_val);
674
+ if (!res)
675
+ *equal= true ;
676
+ else if (res < 0 )
677
+ {
678
+ res= field->key_cmp ((uchar*)buckets[high].start_value .data (), lookup_val);
679
+ if (!res)
680
+ *equal= true ;
681
+ if (res >= 0 )
682
+ low= high;
683
+ }
684
+ }
685
+ else if (high == (int )buckets.size () - 1 )
686
+ {
687
+ res= field->key_cmp ((uchar*)buckets[high].start_value .data (), lookup_val);
688
+ if (!res)
689
+ *equal= true ;
690
+ if (res >= 0 )
691
+ low= high;
692
+ }
693
+
636
694
return low;
637
695
}
0 commit comments