Skip to content

Commit ccbcafc

Browse files
committed
MDEV-35614: JSON_UNQUOTE doesn't work with emojis
emojis are a 4 byte utf sequence. Fix the conversion in JSON_UNQUOTE to utf8mb4_bin by default.
1 parent 5a536ad commit ccbcafc

File tree

4 files changed

+63
-4
lines changed

4 files changed

+63
-4
lines changed

mysql-test/main/func_json.result

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1766,6 +1766,43 @@ FROM JSON_TABLE (@data, '$[*]' COLUMNS (data text PATH '$.Data')) AS t;
17661766
data
17671767
<root language="de"></root>
17681768
#
1769+
# MDEV-35614 JSON_UNQUOTE doesn't work with emojis
1770+
#
1771+
SELECT HEX(JSON_UNQUOTE('"\\ud83d\\ude0a"')) as hex_smiley;
1772+
hex_smiley
1773+
F09F988A
1774+
set names utf8mb4;
1775+
SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') as smiley;
1776+
smiley
1777+
😊
1778+
SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') = JSON_UNQUOTE('"\\ud83d\\ude0a"') as equal_smileys;
1779+
equal_smileys
1780+
1
1781+
SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') <= JSON_UNQUOTE('"\\ud83d\\ude0a"') as less_or_equal_smileys;
1782+
less_or_equal_smileys
1783+
1
1784+
set @v='{ "color":"😊" }';
1785+
select @v as v, collation(@v) as collation_v;
1786+
v collation_v
1787+
{ "color":"😊" } utf8mb4_general_ci
1788+
select json_valid(@v) as valid;
1789+
valid
1790+
1
1791+
select json_extract(@v,'$.color') as color_extraction, collation(json_extract(@v,'$.color')) as color_extraction_collation;
1792+
color_extraction color_extraction_collation
1793+
"😊" utf8mb4_general_ci
1794+
select json_unquote(json_extract(@v,'$.color')) as unquoted, collation(json_unquote(json_extract(@v,'$.color'))) as unquoted_collation;
1795+
unquoted unquoted_collation
1796+
😊 utf8mb4_bin
1797+
SELECT JSON_UNQUOTE('"\\uc080\\ude0a"') as invalid_utf8mb4;
1798+
invalid_utf8mb4
1799+
"\uc080\ude0a"
1800+
Warnings:
1801+
Warning 4035 Broken JSON string in argument 1 to function 'json_unquote' at position 13
1802+
show warnings;
1803+
Level Code Message
1804+
Warning 4035 Broken JSON string in argument 1 to function 'json_unquote' at position 13
1805+
#
17691806
# End of 10.6 tests
17701807
#
17711808
#

mysql-test/main/func_json.test

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1194,6 +1194,7 @@ SELECT JSON_EXTRACT('{"a": 1,"b": 2}','$.a');
11941194

11951195
SET @@collation_connection= @save_collation_connection;
11961196

1197+
11971198
--echo #
11981199
--echo # End of 10.5 tests
11991200
--echo #
@@ -1231,6 +1232,27 @@ SELECT
12311232
data
12321233
FROM JSON_TABLE (@data, '$[*]' COLUMNS (data text PATH '$.Data')) AS t;
12331234

1235+
1236+
--echo #
1237+
--echo # MDEV-35614 JSON_UNQUOTE doesn't work with emojis
1238+
--echo #
1239+
1240+
SELECT HEX(JSON_UNQUOTE('"\\ud83d\\ude0a"')) as hex_smiley;
1241+
set names utf8mb4;
1242+
SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') as smiley;
1243+
1244+
SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') = JSON_UNQUOTE('"\\ud83d\\ude0a"') as equal_smileys;
1245+
SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') <= JSON_UNQUOTE('"\\ud83d\\ude0a"') as less_or_equal_smileys;
1246+
1247+
set @v='{ "color":"😊" }';
1248+
select @v as v, collation(@v) as collation_v;
1249+
select json_valid(@v) as valid;
1250+
select json_extract(@v,'$.color') as color_extraction, collation(json_extract(@v,'$.color')) as color_extraction_collation;
1251+
select json_unquote(json_extract(@v,'$.color')) as unquoted, collation(json_unquote(json_extract(@v,'$.color'))) as unquoted_collation;
1252+
1253+
SELECT JSON_UNQUOTE('"\\uc080\\ude0a"') as invalid_utf8mb4;
1254+
show warnings;
1255+
12341256
--echo #
12351257
--echo # End of 10.6 tests
12361258
--echo #

mysql-test/suite/json/r/json_no_table.result

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2886,7 +2886,7 @@ json_unquote(json_compact('["a", "b", "c"]'))
28862886
["a", "b", "c"]
28872887
select charset(json_unquote('"abc"'));
28882888
charset(json_unquote('"abc"'))
2889-
utf8mb3
2889+
utf8mb4
28902890
select json_quote(convert(X'e68891' using utf8));
28912891
json_quote(convert(X'e68891' using utf8))
28922892
"我"

sql/item_jsonfunc.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -851,7 +851,7 @@ String *Item_func_json_quote::val_str(String *str)
851851

852852
bool Item_func_json_unquote::fix_length_and_dec(THD *thd)
853853
{
854-
collation.set(&my_charset_utf8mb3_general_ci,
854+
collation.set(&my_charset_utf8mb4_bin,
855855
DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII);
856856
max_length= args[0]->max_char_length() * collation.collation->mbmaxlen;
857857
set_maybe_null();
@@ -894,12 +894,12 @@ String *Item_func_json_unquote::val_str(String *str)
894894
return js;
895895

896896
str->length(0);
897-
str->set_charset(&my_charset_utf8mb3_general_ci);
897+
str->set_charset(&my_charset_utf8mb4_bin);
898898

899899
if (str->realloc_with_extra_if_needed(je.value_len) ||
900900
(c_len= json_unescape(js->charset(),
901901
je.value, je.value + je.value_len,
902-
&my_charset_utf8mb3_general_ci,
902+
&my_charset_utf8mb4_bin,
903903
(uchar *) str->ptr(), (uchar *) (str->ptr() + je.value_len))) < 0)
904904
goto error;
905905

0 commit comments

Comments
 (0)