Skip to content

Commit cce76e7

Browse files
committed
MDEV-36765: followup 4: Fixups to previous fixes
- Add a testcase showing JSON_HB histograms handle multi-byte characters correctly. - Make Item_func_json_unquote::val_str() handle situation where it is reading non-UTF8 "JSON" and transcoding it into UTF-8. (the JSON spec only allows UTF8 but MariaDB's implementation supports non-UTF8 as well) - Make Item_func_json_search::compare_json_value_wild() handle json_unescape()'s return values in the same way its done in other places. - Coding style fixes.
1 parent 12c1071 commit cce76e7

File tree

3 files changed

+284
-5
lines changed

3 files changed

+284
-5
lines changed

mysql-test/main/statistics_json.result

Lines changed: 214 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7812,5 +7812,218 @@ min_value max_value hist_type
78127812
部門 部門
78137813
部門 JSON_HB
78147814
DROP TABLE t1;
7815-
DELETE FROM mysql.column_stats;
7815+
create table t1 (
7816+
col1 varchar(10) charset utf8
7817+
);
7818+
set names utf8;
7819+
select hex('б'), collation('б');
7820+
hex('б') collation('б')
7821+
D0B1 utf8mb3_general_ci
7822+
insert into t1 values
7823+
('а'),('б'),('в'),('г'),('д'),('е'),('ж'),('з'),('и'),('й');
7824+
analyze table t1 persistent for all;
7825+
Table Op Msg_type Msg_text
7826+
test.t1 analyze status Engine-independent statistics collected
7827+
test.t1 analyze status OK
7828+
select hex(col1) from t1;
7829+
hex(col1)
7830+
D0B0
7831+
D0B1
7832+
D0B2
7833+
D0B3
7834+
D0B4
7835+
D0B5
7836+
D0B6
7837+
D0B7
7838+
D0B8
7839+
D0B9
7840+
select json_detailed(json_extract(histogram, '$**.histogram_hb'))
7841+
from mysql.column_stats where db_name=database() and table_name='t1';
7842+
json_detailed(json_extract(histogram, '$**.histogram_hb'))
7843+
[
7844+
[
7845+
{
7846+
"start": "а",
7847+
"size": 0.1,
7848+
"ndv": 1
7849+
},
7850+
{
7851+
"start": "б",
7852+
"size": 0.1,
7853+
"ndv": 1
7854+
},
7855+
{
7856+
"start": "в",
7857+
"size": 0.1,
7858+
"ndv": 1
7859+
},
7860+
{
7861+
"start": "г",
7862+
"size": 0.1,
7863+
"ndv": 1
7864+
},
7865+
{
7866+
"start": "д",
7867+
"size": 0.1,
7868+
"ndv": 1
7869+
},
7870+
{
7871+
"start": "е",
7872+
"size": 0.1,
7873+
"ndv": 1
7874+
},
7875+
{
7876+
"start": "ж",
7877+
"size": 0.1,
7878+
"ndv": 1
7879+
},
7880+
{
7881+
"start": "з",
7882+
"size": 0.1,
7883+
"ndv": 1
7884+
},
7885+
{
7886+
"start": "и",
7887+
"size": 0.1,
7888+
"ndv": 1
7889+
},
7890+
{
7891+
"start": "й",
7892+
"end": "й",
7893+
"size": 0.1,
7894+
"ndv": 1
7895+
}
7896+
]
7897+
]
7898+
explain extended select * from t1 where col1 < 'а';
7899+
id select_type table type possible_keys key key_len ref rows filtered Extra
7900+
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 Using where
7901+
Warnings:
7902+
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` < 'а'
7903+
explain extended select * from t1 where col1 < 'в';
7904+
id select_type table type possible_keys key key_len ref rows filtered Extra
7905+
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 20.00 Using where
7906+
Warnings:
7907+
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` < 'в'
7908+
explain extended select * from t1 where col1 < 'д';
7909+
id select_type table type possible_keys key key_len ref rows filtered Extra
7910+
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 40.00 Using where
7911+
Warnings:
7912+
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` < 'д'
7913+
explain extended select * from t1 where col1 < 'ж';
7914+
id select_type table type possible_keys key key_len ref rows filtered Extra
7915+
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 60.00 Using where
7916+
Warnings:
7917+
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` < 'ж'
7918+
explain extended select * from t1 where col1 < 'й';
7919+
id select_type table type possible_keys key key_len ref rows filtered Extra
7920+
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 90.00 Using where
7921+
Warnings:
7922+
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` < 'й'
7923+
delete from t1;
7924+
insert into t1 values
7925+
('"а'),('"б'),('"в'),('"г'),('"д'),('"е'),('"ж'),('"з'),('"и'),('"й');
7926+
analyze table t1 persistent for all;
7927+
Table Op Msg_type Msg_text
7928+
test.t1 analyze status Engine-independent statistics collected
7929+
test.t1 analyze status OK
7930+
select json_detailed(json_extract(histogram, '$**.histogram_hb'))
7931+
from mysql.column_stats where db_name=database() and table_name='t1';
7932+
json_detailed(json_extract(histogram, '$**.histogram_hb'))
7933+
[
7934+
[
7935+
{
7936+
"start": "\"а",
7937+
"size": 0.1,
7938+
"ndv": 1
7939+
},
7940+
{
7941+
"start": "\"б",
7942+
"size": 0.1,
7943+
"ndv": 1
7944+
},
7945+
{
7946+
"start": "\"в",
7947+
"size": 0.1,
7948+
"ndv": 1
7949+
},
7950+
{
7951+
"start": "\"г",
7952+
"size": 0.1,
7953+
"ndv": 1
7954+
},
7955+
{
7956+
"start": "\"д",
7957+
"size": 0.1,
7958+
"ndv": 1
7959+
},
7960+
{
7961+
"start": "\"е",
7962+
"size": 0.1,
7963+
"ndv": 1
7964+
},
7965+
{
7966+
"start": "\"ж",
7967+
"size": 0.1,
7968+
"ndv": 1
7969+
},
7970+
{
7971+
"start": "\"з",
7972+
"size": 0.1,
7973+
"ndv": 1
7974+
},
7975+
{
7976+
"start": "\"и",
7977+
"size": 0.1,
7978+
"ndv": 1
7979+
},
7980+
{
7981+
"start": "\"й",
7982+
"end": "\"й",
7983+
"size": 0.1,
7984+
"ndv": 1
7985+
}
7986+
]
7987+
]
7988+
select hex(col1) from t1;
7989+
hex(col1)
7990+
22D0B9
7991+
22D0B8
7992+
22D0B7
7993+
22D0B6
7994+
22D0B5
7995+
22D0B4
7996+
22D0B3
7997+
22D0B2
7998+
22D0B1
7999+
22D0B0
8000+
explain extended select * from t1 where col1 < '"а';
8001+
id select_type table type possible_keys key key_len ref rows filtered Extra
8002+
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 Using where
8003+
Warnings:
8004+
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` < '"а'
8005+
explain extended select * from t1 where col1 < '"в';
8006+
id select_type table type possible_keys key key_len ref rows filtered Extra
8007+
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 20.00 Using where
8008+
Warnings:
8009+
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` < '"в'
8010+
explain extended select * from t1 where col1 < '"д';
8011+
id select_type table type possible_keys key key_len ref rows filtered Extra
8012+
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 40.00 Using where
8013+
Warnings:
8014+
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` < '"д'
8015+
explain extended select * from t1 where col1 < '"ж';
8016+
id select_type table type possible_keys key key_len ref rows filtered Extra
8017+
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 60.00 Using where
8018+
Warnings:
8019+
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` < '"ж'
8020+
explain extended select * from t1 where col1 < '"й';
8021+
id select_type table type possible_keys key key_len ref rows filtered Extra
8022+
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 90.00 Using where
8023+
Warnings:
8024+
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` < '"й'
8025+
drop table t1;
8026+
select JSON_UNQUOTE(CONVERT('"ФФ"' using cp1251));
8027+
JSON_UNQUOTE(CONVERT('"ФФ"' using cp1251))
8028+
ФФ
78168029
# End of 10.11 tests

mysql-test/main/statistics_json.test

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,43 @@ SELECT min_value, max_value, hist_type
492492
FROM mysql.column_stats WHERE db_name = 'test' AND table_name = 't1';
493493

494494
DROP TABLE t1;
495-
DELETE FROM mysql.column_stats;
495+
496+
create table t1 (
497+
col1 varchar(10) charset utf8
498+
);
499+
set names utf8;
500+
select hex('б'), collation('б');
501+
insert into t1 values
502+
('а'),('б'),('в'),('г'),('д'),('е'),('ж'),('з'),('и'),('й');
503+
504+
analyze table t1 persistent for all;
505+
select hex(col1) from t1;
506+
select json_detailed(json_extract(histogram, '$**.histogram_hb'))
507+
from mysql.column_stats where db_name=database() and table_name='t1';
508+
509+
explain extended select * from t1 where col1 < 'а';
510+
explain extended select * from t1 where col1 < 'в';
511+
explain extended select * from t1 where col1 < 'д';
512+
explain extended select * from t1 where col1 < 'ж';
513+
explain extended select * from t1 where col1 < 'й';
514+
515+
delete from t1;
516+
insert into t1 values
517+
('"а'),('"б'),('"в'),('"г'),('"д'),('"е'),('"ж'),('"з'),('"и'),('"й');
518+
519+
analyze table t1 persistent for all;
520+
select json_detailed(json_extract(histogram, '$**.histogram_hb'))
521+
from mysql.column_stats where db_name=database() and table_name='t1';
522+
select hex(col1) from t1;
523+
explain extended select * from t1 where col1 < '"а';
524+
explain extended select * from t1 where col1 < '"в';
525+
explain extended select * from t1 where col1 < '"д';
526+
explain extended select * from t1 where col1 < '"ж';
527+
explain extended select * from t1 where col1 < '"й';
528+
529+
drop table t1;
530+
531+
# JSON_UNQUOTE was touched by this patch also
532+
select JSON_UNQUOTE(CONVERT('"ФФ"' using cp1251));
496533

497534
--echo # End of 10.11 tests

sql/item_jsonfunc.cc

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -916,14 +916,25 @@ String *Item_func_json_unquote::val_str(String *str)
916916
if (unlikely(je.s.error) || je.value_type != JSON_VALUE_STRING)
917917
return js;
918918

919+
int buf_len= je.value_len;
920+
if (js->charset()->cset != my_charset_utf8mb4_bin.cset)
921+
{
922+
/*
923+
json_unquote() will be transcoding between charsets. We don't know
924+
how much buffer space we'll need. Assume that each byte in the source
925+
will require mbmaxlen bytes in the output.
926+
*/
927+
buf_len *= my_charset_utf8mb4_bin.mbmaxlen;
928+
}
929+
919930
str->length(0);
920931
str->set_charset(&my_charset_utf8mb4_bin);
921932

922-
if (str->realloc_with_extra_if_needed(je.value_len) ||
933+
if (str->realloc_with_extra_if_needed(buf_len) ||
923934
(c_len= json_unescape(js->charset(),
924935
je.value, je.value + je.value_len,
925936
&my_charset_utf8mb4_bin,
926-
(uchar *) str->ptr(), (uchar *) (str->ptr() + je.value_len))) < 0)
937+
(uchar *) str->ptr(), (uchar *) (str->ptr() + buf_len))) < 0)
927938
goto error;
928939

929940
str->length(c_len);
@@ -933,7 +944,7 @@ String *Item_func_json_unquote::val_str(String *str)
933944
if (current_thd)
934945
{
935946
if (c_len == JSON_ERROR_OUT_OF_SPACE)
936-
my_error(ER_OUTOFMEMORY, MYF(0), je.value_len);
947+
my_error(ER_OUTOFMEMORY, MYF(0), buf_len);
937948
else if (c_len == JSON_ERROR_ILLEGAL_SYMBOL)
938949
push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
939950
ER_JSON_BAD_CHR, ER_THD(current_thd, ER_JSON_BAD_CHR),
@@ -3937,7 +3948,21 @@ int Item_func_json_search::compare_json_value_wild(json_engine_t *je,
39373948
(uchar *) (esc_value.ptr() +
39383949
esc_value.alloced_length()));
39393950
if (esc_len <= 0)
3951+
{
3952+
if (current_thd)
3953+
{
3954+
if (esc_len == JSON_ERROR_OUT_OF_SPACE)
3955+
my_error(ER_OUTOFMEMORY, MYF(0), je->value_len);
3956+
else if (esc_len == JSON_ERROR_ILLEGAL_SYMBOL)
3957+
{
3958+
push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
3959+
ER_JSON_BAD_CHR, ER_THD(current_thd, ER_JSON_BAD_CHR),
3960+
0, "comparison",
3961+
(int)(je->s.c_str - je->value));
3962+
}
3963+
}
39403964
return 0;
3965+
}
39413966

39423967
return collation.collation->wildcmp(
39433968
esc_value.ptr(), esc_value.ptr() + esc_len,
@@ -4207,9 +4232,11 @@ int Arg_comparator::compare_json_str_basic(Item *j, Item *s)
42074232
if (c_len == JSON_ERROR_OUT_OF_SPACE)
42084233
my_error(ER_OUTOFMEMORY, MYF(0), je.value_len);
42094234
else if (c_len == JSON_ERROR_ILLEGAL_SYMBOL)
4235+
{
42104236
push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
42114237
ER_JSON_BAD_CHR, ER_THD(current_thd, ER_JSON_BAD_CHR),
42124238
0, "comparison", (int)((const char *) je.s.c_str - js->ptr()));
4239+
}
42134240
}
42144241
goto error;
42154242
}
@@ -4271,9 +4298,11 @@ int Arg_comparator::compare_e_json_str_basic(Item *j, Item *s)
42714298
if (c_len == JSON_ERROR_OUT_OF_SPACE)
42724299
my_error(ER_OUTOFMEMORY, MYF(0), value_len);
42734300
else if (c_len == JSON_ERROR_ILLEGAL_SYMBOL)
4301+
{
42744302
push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
42754303
ER_JSON_BAD_CHR, ER_THD(current_thd, ER_JSON_BAD_CHR),
42764304
0, "equality comparison", 0);
4305+
}
42774306
}
42784307
return 1;
42794308
}

0 commit comments

Comments
 (0)