Skip to content

Commit 6375873

Browse files
committed
Fixes in opt_histogram_json.cc in the last commits
Aslo add more test coverage
1 parent 49a7bbb commit 6375873

File tree

3 files changed

+167
-27
lines changed

3 files changed

+167
-27
lines changed

mysql-test/main/statistics_json.result

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4028,10 +4028,58 @@ analyze select * from t1_json where a > 'zzzzzzzzz';
40284028
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
40294029
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 10.00 0.00 Using where
40304030
drop table ten;
4031-
UPDATE mysql.column_stats SET histogram='["a-1", "a-2", {"a": "b"}, "a-3"]' WHERE table_name='t1_json';
4031+
UPDATE mysql.column_stats
4032+
SET histogram='["not-what-you-expect"]' WHERE table_name='t1_json';
40324033
FLUSH TABLES;
4033-
explain extended select * from t1_json where a between 'a-3a' and 'zzzzzzzzz';
4034-
ERROR HY000: Failed to parse histogram: Root JSON element must be a JSON object at offset 12345.
4034+
explain select * from t1_json limit 1;
4035+
ERROR HY000: Failed to parse histogram: Root JSON element must be a JSON object at offset 0.
4036+
UPDATE mysql.column_stats
4037+
SET histogram='{"histogram_hb_v2":"not-histogram"}' WHERE table_name='t1_json';
4038+
FLUSH TABLES;
4039+
explain select * from t1_json limit 1;
4040+
ERROR HY000: Failed to parse histogram: A JSON array expected at offset 0.
4041+
UPDATE mysql.column_stats
4042+
SET histogram='{"histogram_hb_v2":["not-a-bucket"]}'
4043+
WHERE table_name='t1_json';
4044+
FLUSH TABLES;
4045+
explain select * from t1_json limit 1;
4046+
ERROR HY000: Failed to parse histogram: Object expected at offset 19.
4047+
UPDATE mysql.column_stats
4048+
SET histogram='{"histogram_hb_v2":[{"no-expected-members":1}]}'
4049+
WHERE table_name='t1_json';
4050+
FLUSH TABLES;
4051+
explain select * from t1_json limit 1;
4052+
ERROR HY000: Failed to parse histogram: .start member must be present and be a scalar at offset 20.
4053+
UPDATE mysql.column_stats
4054+
SET histogram='{"histogram_hb_v2":[{"start":{}}]}'
4055+
WHERE table_name='t1_json';
4056+
FLUSH TABLES;
4057+
explain select * from t1_json limit 1;
4058+
ERROR HY000: Failed to parse histogram: .start member must be present and be a scalar at offset 20.
4059+
UPDATE mysql.column_stats
4060+
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":"not-an-integer"}]}'
4061+
WHERE table_name='t1_json';
4062+
FLUSH TABLES;
4063+
explain select * from t1_json limit 1;
4064+
ERROR HY000: Failed to parse histogram: .size member must be present and be a scalar at offset 20.
4065+
UPDATE mysql.column_stats
4066+
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":0.25}]}'
4067+
WHERE table_name='t1_json';
4068+
FLUSH TABLES;
4069+
explain select * from t1_json limit 1;
4070+
ERROR HY000: Failed to parse histogram: .ndv member must be present and be a scalar at offset 20.
4071+
UPDATE mysql.column_stats
4072+
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":0.25, "ndv":1}]}'
4073+
WHERE table_name='t1_json';
4074+
FLUSH TABLES;
4075+
explain select * from t1_json limit 1;
4076+
ERROR HY000: Failed to parse histogram: .end must be present in the last bucket and only there at offset 0.
4077+
UPDATE mysql.column_stats
4078+
SET histogram='{"histogram_hb_v2":[]}'
4079+
WHERE table_name='t1_json';
4080+
FLUSH TABLES;
4081+
explain select * from t1_json limit 1;
4082+
ERROR HY000: Failed to parse histogram: .end must be present in the last bucket and only there at offset 0.
40354083
create table t2 (
40364084
city varchar(100)
40374085
);

mysql-test/main/statistics_json.test

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,69 @@ analyze select * from t1_json where a > 'zzzzzzzzz';
3939

4040
drop table ten;
4141

42-
# test different valid JSON strings that are invalid histograms.
43-
UPDATE mysql.column_stats SET histogram='["a-1", "a-2", {"a": "b"}, "a-3"]' WHERE table_name='t1_json';
42+
#
43+
# Test different valid JSON strings that are invalid histograms.
44+
#
45+
UPDATE mysql.column_stats
46+
SET histogram='["not-what-you-expect"]' WHERE table_name='t1_json';
4447
FLUSH TABLES;
4548
--error ER_JSON_HISTOGRAM_PARSE_FAILED
46-
explain extended select * from t1_json where a between 'a-3a' and 'zzzzzzzzz';
49+
explain select * from t1_json limit 1;
50+
51+
UPDATE mysql.column_stats
52+
SET histogram='{"histogram_hb_v2":"not-histogram"}' WHERE table_name='t1_json';
53+
FLUSH TABLES;
54+
--error ER_JSON_HISTOGRAM_PARSE_FAILED
55+
explain select * from t1_json limit 1;
56+
57+
UPDATE mysql.column_stats
58+
SET histogram='{"histogram_hb_v2":["not-a-bucket"]}'
59+
WHERE table_name='t1_json';
60+
FLUSH TABLES;
61+
--error ER_JSON_HISTOGRAM_PARSE_FAILED
62+
explain select * from t1_json limit 1;
63+
64+
UPDATE mysql.column_stats
65+
SET histogram='{"histogram_hb_v2":[{"no-expected-members":1}]}'
66+
WHERE table_name='t1_json';
67+
FLUSH TABLES;
68+
--error ER_JSON_HISTOGRAM_PARSE_FAILED
69+
explain select * from t1_json limit 1;
70+
71+
UPDATE mysql.column_stats
72+
SET histogram='{"histogram_hb_v2":[{"start":{}}]}'
73+
WHERE table_name='t1_json';
74+
FLUSH TABLES;
75+
--error ER_JSON_HISTOGRAM_PARSE_FAILED
76+
explain select * from t1_json limit 1;
77+
78+
UPDATE mysql.column_stats
79+
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":"not-an-integer"}]}'
80+
WHERE table_name='t1_json';
81+
FLUSH TABLES;
82+
--error ER_JSON_HISTOGRAM_PARSE_FAILED
83+
explain select * from t1_json limit 1;
84+
85+
UPDATE mysql.column_stats
86+
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":0.25}]}'
87+
WHERE table_name='t1_json';
88+
FLUSH TABLES;
89+
--error ER_JSON_HISTOGRAM_PARSE_FAILED
90+
explain select * from t1_json limit 1;
91+
92+
UPDATE mysql.column_stats
93+
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":0.25, "ndv":1}]}'
94+
WHERE table_name='t1_json';
95+
FLUSH TABLES;
96+
--error ER_JSON_HISTOGRAM_PARSE_FAILED
97+
explain select * from t1_json limit 1;
4798

99+
UPDATE mysql.column_stats
100+
SET histogram='{"histogram_hb_v2":[]}'
101+
WHERE table_name='t1_json';
102+
FLUSH TABLES;
103+
--error ER_JSON_HISTOGRAM_PARSE_FAILED
104+
explain select * from t1_json limit 1;
48105

49106
--source include/have_sequence.inc
50107
create table t2 (

sql/opt_histogram_json.cc

Lines changed: 56 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,8 @@ class Histogram_json_builder : public Histogram_builder
9494
{
9595
column->store_field_value((uchar*) elem, col_length);
9696
StringBuffer<MAX_FIELD_WIDTH> val;
97-
column->val_str(&val);
98-
writer.add_member("end").add_str(val.c_ptr());
97+
String *str= column->val_str(&val);
98+
writer.add_member("end").add_str(str->c_ptr_safe());
9999
finalize_bucket();
100100
}
101101

@@ -109,10 +109,10 @@ class Histogram_json_builder : public Histogram_builder
109109
DBUG_ASSERT(bucket.size == 0);
110110
column->store_field_value((uchar*) elem, col_length);
111111
StringBuffer<MAX_FIELD_WIDTH> val;
112-
column->val_str(&val);
112+
String *str= column->val_str(&val);
113113

114114
writer.start_object();
115-
writer.add_member("start").add_str(val.c_ptr());
115+
writer.add_member("start").add_str(str->c_ptr_safe());
116116

117117
bucket.ndv= 1;
118118
bucket.size= cnt;
@@ -264,14 +264,17 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
264264
const char *err;
265265
DBUG_ENTER("Histogram_json_hb::parse");
266266
DBUG_ASSERT(type_arg == JSON_HB);
267+
const char *err_pos= hist_data;
267268
const char *obj1;
268269
int obj1_len;
269270
double cumulative_size= 0.0;
271+
size_t end_member_index= (size_t)-1;
270272

271273
if (JSV_OBJECT != json_type(hist_data, hist_data + hist_data_len,
272274
&obj1, &obj1_len))
273275
{
274276
err= "Root JSON element must be a JSON object";
277+
err_pos= hist_data;
275278
goto error;
276279
}
277280

@@ -281,6 +284,7 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
281284
"histogram_hb_v2", &hist_array,
282285
&hist_array_len))
283286
{
287+
err_pos= obj1;
284288
err= "A JSON array expected";
285289
goto error;
286290
}
@@ -296,11 +300,13 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
296300
break;
297301
if (ret == JSV_BAD_JSON)
298302
{
303+
err_pos= hist_array;
299304
err= "JSON parse error";
300305
goto error;
301306
}
302307
if (ret != JSV_OBJECT)
303308
{
309+
err_pos= hist_array;
304310
err= "Object expected";
305311
goto error;
306312
}
@@ -313,6 +319,7 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
313319
"start", &val, &val_len);
314320
if (ret != JSV_STRING && ret != JSV_NUMBER)
315321
{
322+
err_pos= bucket_info;
316323
err= ".start member must be present and be a scalar";
317324
goto error;
318325
}
@@ -324,6 +331,7 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
324331
"size", &size, &size_len);
325332
if (ret != JSV_NUMBER)
326333
{
334+
err_pos= bucket_info;
327335
err= ".size member must be present and be a scalar";
328336
goto error;
329337
}
@@ -333,6 +341,7 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
333341
double size_d= my_strtod(size, &size_end, &conv_err);
334342
if (conv_err)
335343
{
344+
err_pos= size;
336345
err= ".size member must be a floating-point value";
337346
goto error;
338347
}
@@ -345,48 +354,66 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
345354
"ndv", &ndv, &ndv_len);
346355
if (ret != JSV_NUMBER)
347356
{
357+
err_pos= bucket_info;
348358
err= ".ndv member must be present and be a scalar";
349359
goto error;
350360
}
351361
char *ndv_end= (char*)ndv + ndv_len;
352362
longlong ndv_ll= my_strtoll10(ndv, &ndv_end, &conv_err);
353363
if (conv_err)
354364
{
365+
err_pos= ndv;
355366
err= ".ndv member must be an integer value";
356367
goto error;
357368
}
358369

370+
371+
uchar buf[MAX_KEY_LENGTH];
372+
uint len_to_copy= field->key_length();
373+
field->store_text(val, val_len, &my_charset_bin);
374+
uint bytes= field->get_key_image(buf, len_to_copy, Field::itRAW);
375+
376+
buckets.push_back({std::string((char*)buf, bytes), cumulative_size,
377+
ndv_ll});
378+
379+
// Read the "end" field
359380
const char *end_val;
360381
int end_val_len;
361382
ret= json_get_object_key(bucket_info, bucket_info+bucket_info_len,
362383
"end", &end_val, &end_val_len);
363384
if (ret != JSV_NOTHING && ret != JSV_STRING && ret !=JSV_NUMBER)
364385
{
386+
err_pos= bucket_info;
365387
err= ".end member must be a scalar";
366388
goto error;
367389
}
368390
if (ret != JSV_NOTHING)
369-
last_bucket_end_endp.assign(end_val, end_val_len);
370-
371-
buckets.push_back({std::string(val, val_len), NULL, cumulative_size,
372-
ndv_ll});
373-
374-
if (buckets.size())
375391
{
376-
auto& prev_bucket= buckets[buckets.size()-1];
377-
if (prev_bucket.ndv == 1)
378-
prev_bucket.end_value= &prev_bucket.start_value;
379-
else
380-
prev_bucket.end_value= &buckets.back().start_value;
392+
field->store_text(end_val, end_val_len, &my_charset_bin);
393+
uint bytes= field->get_key_image(buf, len_to_copy, Field::itRAW);
394+
last_bucket_end_endp.assign((char*)buf, bytes);
395+
if (end_member_index == (size_t)-1)
396+
end_member_index= buckets.size();
381397
}
382398
}
383-
buckets.back().end_value= &last_bucket_end_endp;
384399
size= buckets.size();
385400

401+
if (end_member_index != buckets.size())
402+
{
403+
err= ".end must be present in the last bucket and only there";
404+
err_pos= hist_data;
405+
goto error;
406+
}
407+
if (!buckets.size())
408+
{
409+
err= ".end member is allowed only in last bucket";
410+
err_pos= hist_data;
411+
goto error;
412+
}
413+
386414
DBUG_RETURN(false);
387415
error:
388-
my_error(ER_JSON_HISTOGRAM_PARSE_FAILED, MYF(0), err,
389-
12345);
416+
my_error(ER_JSON_HISTOGRAM_PARSE_FAILED, MYF(0), err, err_pos - hist_data);
390417
DBUG_RETURN(true);
391418
}
392419

@@ -469,7 +496,7 @@ double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
469496
* The bucket has one value and this is the value we are looking for.
470497
* The bucket has multiple values. Then, assume
471498
*/
472-
sel= (get_left_fract(idx) - buckets[idx].cum_fract) / buckets[idx].ndv;
499+
sel= (buckets[idx].cum_fract - get_left_fract(idx)) / buckets[idx].ndv;
473500
}
474501
return sel;
475502
}
@@ -483,6 +510,14 @@ double Histogram_json_hb::get_left_fract(int idx)
483510
return buckets[idx-1].cum_fract;
484511
}
485512

513+
std::string& Histogram_json_hb::get_end_value(int idx)
514+
{
515+
if (idx == (int)buckets.size()-1)
516+
return last_bucket_end_endp;
517+
else
518+
return buckets[idx+1].start_value;
519+
}
520+
486521
/*
487522
@param field The table field histogram is for. We don't care about the
488523
field's current value, we only need its virtual functions to
@@ -514,7 +549,7 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
514549
double left_fract= get_left_fract(idx);
515550
double sel= position_in_interval(field, min_key, min_key_len,
516551
buckets[idx].start_value,
517-
*buckets[idx].end_value);
552+
get_end_value(idx));
518553

519554
min= left_fract + sel * (buckets[idx].cum_fract - left_fract);
520555
}
@@ -538,7 +573,7 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
538573
double left_fract= get_left_fract(idx);
539574
double sel= position_in_interval(field, max_key, max_key_len,
540575
buckets[idx].start_value,
541-
*buckets[idx].end_value);
576+
get_end_value(idx));
542577
max= left_fract + sel * (buckets[idx].cum_fract - left_fract);
543578
}
544579
else

0 commit comments

Comments
 (0)