Skip to content

Commit

Permalink
Fix for mariadb_convert_string - charset names for utf16 and utf32 ar…
Browse files Browse the repository at this point in the history
…e changed so iconv understands it. Also if endianness is not specified, BE charsets used by default, to avoid BOMs

Names mapped for both source and destination charsets.
Also the regression test for this change is added to charset.c
  • Loading branch information
lawrinn authored and 9EOR9 committed Oct 28, 2015
1 parent 3431674 commit 8bf167b
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 9 deletions.
59 changes: 50 additions & 9 deletions libmariadb/my_charset.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
#endif
#include <my_global.h>
#include <m_ctype.h>
#include <m_string.h>

#include <iconv.h>

Expand Down Expand Up @@ -1121,13 +1122,55 @@ int madb_get_windows_cp(const char *charset)
#endif
/* }}} */


/* {{{ map_charset_name
Changing charset name into something iconv understands, if necessary.
Another purpose it to avoid BOMs in result string, adding BE if necessary
e.g.UTF16 does not work form iconv, while UTF-16 does.
*/
static void map_charset_name(const char *cs_name, my_bool target_cs, char *buffer, size_t buff_len)
{
char *ptr= buffer, digits[3], endianness[3]= "BE";

if (sscanf(cs_name, "UTF%2[0-9]%2[LBE]", digits, endianness))
{
/* We should have at least digits. Endianness we write either default(BE), or what we found in the string */
ptr= strnmov(ptr, "UTF-", buff_len);
ptr= strnmov(ptr, digits, buff_len - (ptr - buffer));
ptr= strnmov(ptr, endianness, buff_len - (ptr - buffer));
}
else
{
/* Not our client - copy as is*/
ptr= strnmov(ptr, cs_name, buff_len);
}

if (target_cs)
{
strnmov(ptr, "//TRANSLIT", buff_len - (ptr - buffer));
}
}
/* }}} */

/* {{{ mariadb_convert_string
Converts string from one charset to another, and writes converted string to given buffer
@param[in] from
@param[in/out] from_len
@param[in] from_cs
@param[out] to
@param[in/out] to_len
@param[in] to_cs
@param[out] errorcode
@return -1 in case of error, bytes used in the "to" buffer, otherwise
*/
size_t STDCALL mariadb_convert_string(const char *from, size_t *from_len, CHARSET_INFO *from_cs,
char *to, size_t *to_len, CHARSET_INFO *to_cs, int *errorcode)
{
iconv_t conv= 0;
size_t rc= -1;
size_t save_len= *to_len;
char to_encoding[128];
char to_encoding[128], from_encoding[128];

*errorcode= 0;

Expand All @@ -1138,14 +1181,11 @@ size_t STDCALL mariadb_convert_string(const char *from, size_t *from_len, CHARSE
*errorcode= EINVAL;
return rc;
}
/* UTF16 does not work form iconv, while UTF-16 does.
Besides we don't want iconv to generate BOM, thus we used either UTF-16LE or BE by default
TODO: Need to do the same for UTF-32(at leased re BOM) */
snprintf(to_encoding, 128, "%s//TRANSLIT", strncmp(to_cs->encoding, "UTF16", 5) == 0
? (strcmp(to_cs->encoding + 5, "LE") == 0 ? "UTF-16LE" : "UTF-16BE")
: to_cs->encoding);

if ((conv= iconv_open(to_encoding, from_cs->encoding)) == (iconv_t)-1)

map_charset_name(to_cs->encoding, 1, to_encoding, sizeof(to_encoding));
map_charset_name(from_cs->encoding, 0, from_encoding, sizeof(from_encoding));

if ((conv= iconv_open(to_encoding, from_encoding)) == (iconv_t)-1)
{
*errorcode= errno;
goto error;
Expand All @@ -1161,4 +1201,5 @@ size_t STDCALL mariadb_convert_string(const char *from, size_t *from_len, CHARSE
iconv_close(conv);
return rc;
}
/* }}} */

70 changes: 70 additions & 0 deletions unittest/libmariadb/charset.c
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,75 @@ static int test_bug_54100(MYSQL *mysql)
}


/* We need this internal function for the test */
CHARSET_INFO * mysql_find_charset_name(const char *name);

static int test_utf16_utf32_noboms(MYSQL *mysql)
{
char *csname[]= {"utf16", "utf16le", "utf32", "utf8"};
CHARSET_INFO *csinfo[sizeof(csname)/sizeof(char*)];

const int UTF8= sizeof(csname)/sizeof(char*) - 1;

unsigned char in_string[][8]= {"\xd8\x02\xdc\x60\0", /* utf16(be) */
"\x02\xd8\x60\xdc\0", /* utf16le */
"\x00\x01\x08\x60\0\0\0", /* utf32(be) */
"\xF0\x90\xA1\xA0" }; /* utf8 */
size_t in_oct_len[]= {6, 6, 8, 5};

char buffer[8], as_hex[16];
int i, error;
size_t rc, in_len, out_len;

for (i= 0; i < sizeof(csname)/sizeof(char*); ++i)
{
csinfo[i]= mysql_find_charset_name(csname[i]);

if (csinfo[i] == NULL)
{
diag("Could not get cs info for %s", csname[i]);
return FAIL;
}
}

for (i= 0; i < UTF8; ++i)
{
in_len= in_oct_len[i];
out_len= sizeof(buffer);

diag("Converting %s->%s", csname[i], csname[UTF8]);
rc= mariadb_convert_string(in_string[i], &in_len, csinfo[i], buffer, &out_len, csinfo[UTF8], &error);

FAIL_IF(rc == -1, "Conversion failed");
FAIL_IF(rc != in_oct_len[UTF8], "Incorrect number of written bytes");

if (memcmp(buffer, in_string[UTF8], rc) != 0)
{
mysql_hex_string(as_hex, buffer, rc);
diag("Converted string(%s) does not match the expected one", as_hex);
return FAIL;
}

in_len= in_oct_len[UTF8];
out_len= sizeof(buffer);

diag("Converting %s->%s", csname[UTF8], csname[i]);
rc= mariadb_convert_string(in_string[UTF8], &in_len, csinfo[UTF8], buffer, &out_len, csinfo[i], &error);

FAIL_IF(rc==-1, "Conversion failed");
FAIL_IF(rc != in_oct_len[i], "Incorrect number of written bytes");

if (memcmp(buffer, in_string[i], rc) != 0)
{
mysql_hex_string(as_hex, buffer, rc);
diag("Converted string(%s) does not match the expected one", as_hex);
return FAIL;
}
}

return OK;
}

struct my_tests_st my_tests[] = {
{"bug_8378: mysql_real_escape with gbk", bug_8378, TEST_CONNECTION_NEW, 0, opt_bug8378, NULL},
{"test_client_character_set", test_client_character_set, TEST_CONNECTION_DEFAULT, 0, NULL, NULL},
Expand All @@ -667,6 +736,7 @@ struct my_tests_st my_tests[] = {
{"test_bug30472", test_bug30472, TEST_CONNECTION_NEW, 0, NULL, NULL},
{"test_ps_i18n", test_ps_i18n, TEST_CONNECTION_DEFAULT, 0, NULL, NULL},
{"test_bug_54100", test_bug_54100, TEST_CONNECTION_NEW, 0, NULL, NULL},
{"test_utf16_utf32_noboms", test_utf16_utf32_noboms, TEST_CONNECTION_DEFAULT, 0, NULL, NULL},
{NULL, NULL, 0, 0, NULL, 0}
};

Expand Down

0 comments on commit 8bf167b

Please sign in to comment.