Skip to content
Permalink
Browse files

MDEV-19750 mysql command wrong encoding

Restore the detection of default charset in command line utilities.
It worked up to 10.1, but was broken by Connector/C.

Moved code for detection of default charset from sql-common/client.c
to mysys, and make command line utilities to use this code if charset
was not specified on the command line.
  • Loading branch information...
vaintroub committed Jun 17, 2019
1 parent 81f60e8 commit 5804bb4ef0acd7ac42e628c2d8c404957dc86cf9
@@ -40,6 +40,7 @@
#include "my_readline.h"
#include <signal.h>
#include <violite.h>
#include <my_sys.h>

#if defined(USE_LIBEDIT_INTERFACE) && defined(HAVE_LOCALE_H)
#include <locale.h>
@@ -4701,7 +4702,8 @@ sql_real_connect(char *host,char *database,char *user,char *password,
select_limit,max_join_size);
mysql_options(&mysql, MYSQL_INIT_COMMAND, init_command);
}

if (!strcmp(default_charset,MYSQL_AUTODETECT_CHARSET_NAME))
default_charset= (char *)my_default_csname();
mysql_options(&mysql, MYSQL_SET_CHARSET_NAME, default_charset);

if (!do_connect(&mysql, host, user, password, database,
@@ -26,6 +26,7 @@
#include <welcome_copyright_notice.h>
#include <my_rnd.h>
#include <password.h>
#include <my_sys.h>

#define ADMIN_VERSION "9.1"
#define MAX_MYSQL_VAR 512
@@ -371,6 +372,8 @@ int main(int argc,char *argv[])
if (shared_memory_base_name)
mysql_options(&mysql,MYSQL_SHARED_MEMORY_BASE_NAME,shared_memory_base_name);
#endif
if (!strcmp(default_charset,MYSQL_AUTODETECT_CHARSET_NAME))
default_charset= (char *)my_default_csname();
mysql_options(&mysql, MYSQL_SET_CHARSET_NAME, default_charset);
error_flags= (myf)(opt_nobeep ? 0 : ME_BELL);

@@ -440,8 +440,10 @@ static int get_options(int *argc, char ***argv)
else
default_charset= (char*) MYSQL_AUTODETECT_CHARSET_NAME;
}
if (strcmp(default_charset, MYSQL_AUTODETECT_CHARSET_NAME) &&
!get_charset_by_csname(default_charset, MY_CS_PRIMARY, MYF(MY_WME)))
if (!strcmp(default_charset, MYSQL_AUTODETECT_CHARSET_NAME))
default_charset= (char *)my_default_csname();

if (!get_charset_by_csname(default_charset, MY_CS_PRIMARY, MYF(MY_WME)))
{
printf("Unsupported character set: %s\n", default_charset);
DBUG_RETURN(1);
@@ -30,6 +30,8 @@
#define IMPORT_VERSION "3.7"

#include "client_priv.h"
#include <my_sys.h>

#include "mysql_version.h"

#include <welcome_copyright_notice.h> /* ORACLE_WELCOME_COPYRIGHT_NOTICE */
@@ -472,8 +474,9 @@ static MYSQL *db_connect(char *host, char *database,

if (opt_default_auth && *opt_default_auth)
mysql_options(mysql, MYSQL_DEFAULT_AUTH, opt_default_auth);

mysql_options(mysql, MYSQL_SET_CHARSET_NAME, default_charset);
if (!strcmp(default_charset,MYSQL_AUTODETECT_CHARSET_NAME))
default_charset= (char *)my_default_csname();
mysql_options(mysql, MYSQL_SET_CHARSET_NAME, my_default_csname());
mysql_options(mysql, MYSQL_OPT_CONNECT_ATTR_RESET, 0);
mysql_options4(mysql, MYSQL_OPT_CONNECT_ATTR_ADD,
"program_name", "mysqlimport");
@@ -135,6 +135,8 @@ int main(int argc, char **argv)
if (shared_memory_base_name)
mysql_options(&mysql,MYSQL_SHARED_MEMORY_BASE_NAME,shared_memory_base_name);
#endif
if (!strcmp(default_charset,MYSQL_AUTODETECT_CHARSET_NAME))
default_charset= (char *)my_default_csname();
mysql_options(&mysql, MYSQL_SET_CHARSET_NAME, default_charset);

if (opt_plugin_dir && *opt_plugin_dir)
@@ -1043,6 +1043,7 @@ extern char *get_tty_password(const char *opt_message);
/* File system character set */
extern CHARSET_INFO *fs_character_set(void);
#endif
extern const char *my_default_csname(void);
extern size_t escape_quotes_for_mysql(CHARSET_INFO *charset_info,
char *to, size_t to_length,
const char *from, size_t length);
@@ -0,0 +1,2 @@
@@character_set_client
cp1257
@@ -0,0 +1,2 @@
--source include/windows.inc
--exec chcp 1257 > NUL && $MYSQL --default-character-set=auto -e "select @@character_set_client"
@@ -20,7 +20,12 @@
#include <m_string.h>
#include <my_dir.h>
#include <my_xml.h>

#ifdef HAVE_LANGINFO_H
#include <langinfo.h>
#endif
#ifdef HAVE_LOCALE_H
#include <locale.h>
#endif

/*
The code below implements this functionality:
@@ -1216,3 +1221,214 @@ size_t escape_quotes_for_mysql(CHARSET_INFO *charset_info,
*to= 0;
return overflow ? (ulong)~0 : (ulong) (to - to_start);
}


typedef enum my_cs_match_type_enum
{
/* MySQL and OS charsets are fully compatible */
my_cs_exact,
/* MySQL charset is very close to OS charset */
my_cs_approx,
/*
MySQL knows this charset, but it is not supported as client character set.
*/
my_cs_unsupp
} my_cs_match_type;


typedef struct str2str_st
{
const char* os_name;
const char* my_name;
my_cs_match_type param;
} MY_CSET_OS_NAME;

static const MY_CSET_OS_NAME charsets[] =
{
#ifdef _WIN32
{"cp437", "cp850", my_cs_approx},
{"cp850", "cp850", my_cs_exact},
{"cp852", "cp852", my_cs_exact},
{"cp858", "cp850", my_cs_approx},
{"cp866", "cp866", my_cs_exact},
{"cp874", "tis620", my_cs_approx},
{"cp932", "cp932", my_cs_exact},
{"cp936", "gbk", my_cs_approx},
{"cp949", "euckr", my_cs_approx},
{"cp950", "big5", my_cs_exact},
{"cp1200", "utf16le", my_cs_unsupp},
{"cp1201", "utf16", my_cs_unsupp},
{"cp1250", "cp1250", my_cs_exact},
{"cp1251", "cp1251", my_cs_exact},
{"cp1252", "latin1", my_cs_exact},
{"cp1253", "greek", my_cs_exact},
{"cp1254", "latin5", my_cs_exact},
{"cp1255", "hebrew", my_cs_approx},
{"cp1256", "cp1256", my_cs_exact},
{"cp1257", "cp1257", my_cs_exact},
{"cp10000", "macroman", my_cs_exact},
{"cp10001", "sjis", my_cs_approx},
{"cp10002", "big5", my_cs_approx},
{"cp10008", "gb2312", my_cs_approx},
{"cp10021", "tis620", my_cs_approx},
{"cp10029", "macce", my_cs_exact},
{"cp12001", "utf32", my_cs_unsupp},
{"cp20107", "swe7", my_cs_exact},
{"cp20127", "latin1", my_cs_approx},
{"cp20866", "koi8r", my_cs_exact},
{"cp20932", "ujis", my_cs_exact},
{"cp20936", "gb2312", my_cs_approx},
{"cp20949", "euckr", my_cs_approx},
{"cp21866", "koi8u", my_cs_exact},
{"cp28591", "latin1", my_cs_approx},
{"cp28592", "latin2", my_cs_exact},
{"cp28597", "greek", my_cs_exact},
{"cp28598", "hebrew", my_cs_exact},
{"cp28599", "latin5", my_cs_exact},
{"cp28603", "latin7", my_cs_exact},
#ifdef UNCOMMENT_THIS_WHEN_WL_4579_IS_DONE
{"cp28605", "latin9", my_cs_exact},
#endif
{"cp38598", "hebrew", my_cs_exact},
{"cp51932", "ujis", my_cs_exact},
{"cp51936", "gb2312", my_cs_exact},
{"cp51949", "euckr", my_cs_exact},
{"cp51950", "big5", my_cs_exact},
#ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE
{"cp54936", "gb18030", my_cs_exact},
#endif
{"cp65001", "utf8", my_cs_exact},

#else /* not Windows */

{"646", "latin1", my_cs_approx}, /* Default on Solaris */
{"ANSI_X3.4-1968", "latin1", my_cs_approx},
{"ansi1251", "cp1251", my_cs_exact},
{"armscii8", "armscii8", my_cs_exact},
{"armscii-8", "armscii8", my_cs_exact},
{"ASCII", "latin1", my_cs_approx},
{"Big5", "big5", my_cs_exact},
{"cp1251", "cp1251", my_cs_exact},
{"cp1255", "hebrew", my_cs_approx},
{"CP866", "cp866", my_cs_exact},
{"eucCN", "gb2312", my_cs_exact},
{"euc-CN", "gb2312", my_cs_exact},
{"eucJP", "ujis", my_cs_exact},
{"euc-JP", "ujis", my_cs_exact},
{"eucKR", "euckr", my_cs_exact},
{"euc-KR", "euckr", my_cs_exact},
#ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE
{"gb18030", "gb18030", my_cs_exact},
#endif
{"gb2312", "gb2312", my_cs_exact},
{"gbk", "gbk", my_cs_exact},
{"georgianps", "geostd8", my_cs_exact},
{"georgian-ps", "geostd8", my_cs_exact},
{"IBM-1252", "cp1252", my_cs_exact},

{"iso88591", "latin1", my_cs_approx},
{"ISO_8859-1", "latin1", my_cs_approx},
{"ISO8859-1", "latin1", my_cs_approx},
{"ISO-8859-1", "latin1", my_cs_approx},

{"iso885913", "latin7", my_cs_exact},
{"ISO_8859-13", "latin7", my_cs_exact},
{"ISO8859-13", "latin7", my_cs_exact},
{"ISO-8859-13", "latin7", my_cs_exact},

#ifdef UNCOMMENT_THIS_WHEN_WL_4579_IS_DONE
{"iso885915", "latin9", my_cs_exact},
{"ISO_8859-15", "latin9", my_cs_exact},
{"ISO8859-15", "latin9", my_cs_exact},
{"ISO-8859-15", "latin9", my_cs_exact},
#endif

{"iso88592", "latin2", my_cs_exact},
{"ISO_8859-2", "latin2", my_cs_exact},
{"ISO8859-2", "latin2", my_cs_exact},
{"ISO-8859-2", "latin2", my_cs_exact},

{"iso88597", "greek", my_cs_exact},
{"ISO_8859-7", "greek", my_cs_exact},
{"ISO8859-7", "greek", my_cs_exact},
{"ISO-8859-7", "greek", my_cs_exact},

{"iso88598", "hebrew", my_cs_exact},
{"ISO_8859-8", "hebrew", my_cs_exact},
{"ISO8859-8", "hebrew", my_cs_exact},
{"ISO-8859-8", "hebrew", my_cs_exact},

{"iso88599", "latin5", my_cs_exact},
{"ISO_8859-9", "latin5", my_cs_exact},
{"ISO8859-9", "latin5", my_cs_exact},
{"ISO-8859-9", "latin5", my_cs_exact},

{"koi8r", "koi8r", my_cs_exact},
{"KOI8-R", "koi8r", my_cs_exact},
{"koi8u", "koi8u", my_cs_exact},
{"KOI8-U", "koi8u", my_cs_exact},

{"roman8", "hp8", my_cs_exact}, /* Default on HP UX */

{"Shift_JIS", "sjis", my_cs_exact},
{"SJIS", "sjis", my_cs_exact},
{"shiftjisx0213", "sjis", my_cs_exact},

{"tis620", "tis620", my_cs_exact},
{"tis-620", "tis620", my_cs_exact},

{"ujis", "ujis", my_cs_exact},

{"US-ASCII", "latin1", my_cs_approx},

{"utf8", "utf8", my_cs_exact},
{"utf-8", "utf8", my_cs_exact},
#endif
{NULL, NULL, 0}
};


static const char*
my_os_charset_to_mysql_charset(const char* csname)
{
const MY_CSET_OS_NAME* csp;
for (csp = charsets; csp->os_name; csp++)
{
if (!strcasecmp(csp->os_name, csname))
{
switch (csp->param)
{
case my_cs_exact:
return csp->my_name;

case my_cs_approx:
/*
Maybe we should print a warning eventually:
character set correspondence is not exact.
*/
return csp->my_name;

default:
return NULL;
}
}
}
return NULL;
}

const char* my_default_csname()
{
const char* csname = NULL;
#ifdef _WIN32
char cpbuf[64];
int cp = GetConsoleCP();
if (cp == 0)
cp = GetACP();
snprintf(cpbuf, sizeof(cpbuf), "cp%d", (int)cp);
csname = my_os_charset_to_mysql_charset(cpbuf);
#elif defined(HAVE_SETLOCALE) && defined(HAVE_NL_LANGINFO)
if (setlocale(LC_CTYPE, "") && (csname = nl_langinfo(CODESET)))
csname = my_os_charset_to_mysql_charset(csname);
#endif
return csname ? csname : MYSQL_DEFAULT_CHARSET_NAME;
}

0 comments on commit 5804bb4

Please sign in to comment.
You can’t perform that action at this time.