From 2dac22d2c95816cbe88bab851c36517d3562532f Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Sun, 24 Feb 2013 11:43:29 +0000 Subject: [PATCH] Fix double iconv conversion Double iconv is used if system iconv is not able to convert directly from a charset to another. This happen with some iconv implementations (like Solaris or HP-UX). Use always tds_iconv instead of tds_sys_iconv (mainly in tds_iconv_fread). Do not read too much characters in tds_iconv but only required ones to be able to continue reading correctly on next call. --- include/tdsiconv.h | 26 ++- src/dblib/bcp.c | 15 +- src/odbc/convert_tds2sql.c | 10 +- src/odbc/odbc_util.c | 2 +- src/odbc/sql2tds.c | 4 +- src/tds/challenge.c | 2 +- src/tds/iconv.c | 370 ++++++++++++++++++++------------ src/tds/login.c | 2 +- src/tds/query.c | 4 +- src/tds/read.c | 4 +- src/tds/token.c | 14 +- src/tds/unittests/iconv_fread.c | 26 ++- src/tds/unittests/utf8_3.c | 2 +- src/tds/write.c | 3 +- 14 files changed, 294 insertions(+), 190 deletions(-) diff --git a/include/tdsiconv.h b/include/tdsiconv.h index 107610d638..98b379c44c 100644 --- a/include/tdsiconv.h +++ b/include/tdsiconv.h @@ -85,22 +85,27 @@ typedef struct tds_errno_message_flags { unsigned int einval:1; } TDS_ERRNO_MESSAGE_FLAGS; +typedef struct tdsiconvdir +{ + TDS_ENCODING charset; + + iconv_t cd; + iconv_t cd2; + + unsigned char num_got; + unsigned char num_left; + char left[6]; +} TDSICONVDIR; + struct tdsiconvinfo { - TDS_ENCODING client_charset; - TDS_ENCODING server_charset; + struct tdsiconvdir to, from; #define TDS_ENCODING_INDIRECT 1 #define TDS_ENCODING_SWAPBYTE 2 #define TDS_ENCODING_MEMCPY 4 unsigned int flags; - iconv_t to_wire; /* conversion from client charset to server's format */ - iconv_t from_wire; /* conversion from server's format to client charset */ - - iconv_t to_wire2; /* conversion from client charset to server's format - indirect */ - iconv_t from_wire2; /* conversion from server's format to client charset - indirect */ - /* * Suppress error messages that would otherwise be emitted by tds_iconv(). * Functions that process large buffers ask tds_iconv to convert it in "chunks". @@ -110,6 +115,7 @@ struct tdsiconvinfo * can prepopulate it. */ TDS_ERRNO_MESSAGE_FLAGS suppress; + }; /* We use ICONV_CONST for tds_iconv(), even if we don't have iconv() */ @@ -117,8 +123,8 @@ struct tdsiconvinfo # define ICONV_CONST const #endif -size_t tds_iconv_fread(iconv_t cd, FILE * stream, size_t field_len, size_t term_len, char *outbuf, size_t * outbytesleft); -size_t tds_iconv(TDSSOCKET * tds, const TDSICONV * char_conv, TDS_ICONV_DIRECTION io, +size_t tds_iconv_fread(TDSSOCKET * tds, TDSICONV * conv, FILE * stream, size_t field_len, size_t term_len, char *outbuf, size_t * outbytesleft); +size_t tds_iconv(TDSSOCKET * tds, TDSICONV * char_conv, TDS_ICONV_DIRECTION io, const char **inbuf, size_t * inbytesleft, char **outbuf, size_t * outbytesleft); const char *tds_canonical_charset_name(const char *charset_name); const char *tds_sybase_charset_name(const char *charset_name); diff --git a/src/dblib/bcp.c b/src/dblib/bcp.c index bb725a527a..9035d98bca 100644 --- a/src/dblib/bcp.c +++ b/src/dblib/bcp.c @@ -1230,7 +1230,7 @@ _bcp_read_hostfile(DBPROCESS * dbproc, FILE * hostfile, int *row_error) int file_len; size_t col_bytes_left; offset_type file_bytes_left, len; - iconv_t cd; + TDSICONV * conv; len = _bcp_measure_terminated_field(hostfile, hostcol->terminator, hostcol->term_len); if (len > 0x7fffffffl || len < 0) { @@ -1250,14 +1250,13 @@ _bcp_read_hostfile(DBPROCESS * dbproc, FILE * hostfile, int *row_error) */ file_len = collen; if (bcpcol->char_conv) { - TDSICONV *char_conv = bcpcol->char_conv; - collen *= char_conv->server_charset.max_bytes_per_char; - collen += char_conv->client_charset.min_bytes_per_char - 1; - collen /= char_conv->client_charset.min_bytes_per_char; - cd = char_conv->to_wire; + conv = bcpcol->char_conv; + collen *= conv->to.charset.max_bytes_per_char; + collen += conv->from.charset.min_bytes_per_char - 1; + collen /= conv->from.charset.min_bytes_per_char; tdsdump_log(TDS_DBG_FUNC, "Adjusted collen is %d.\n", collen); } else { - cd = (iconv_t) - 1; + conv = NULL; } coldata = calloc(1, 1 + collen); @@ -1273,7 +1272,7 @@ _bcp_read_hostfile(DBPROCESS * dbproc, FILE * hostfile, int *row_error) */ col_bytes_left = collen; /* TODO make tds_iconv_fread handle terminator directly to avoid fseek in _bcp_measure_terminated_field */ - file_bytes_left = tds_iconv_fread(cd, hostfile, file_len, hostcol->term_len, coldata, &col_bytes_left); + file_bytes_left = tds_iconv_fread(NULL, conv, hostfile, file_len, hostcol->term_len, coldata, &col_bytes_left); collen -= (int)col_bytes_left; /* tdsdump_log(TDS_DBG_FUNC, "collen is %d after tds_iconv_fread()\n", collen); */ diff --git a/src/odbc/convert_tds2sql.c b/src/odbc/convert_tds2sql.c index af052a0dc9..b13086b67f 100644 --- a/src/odbc/convert_tds2sql.c +++ b/src/odbc/convert_tds2sql.c @@ -64,12 +64,12 @@ odbc_convert_char(TDS_STMT * stmt, TDSCOLUMN * curcol, TDS_CHAR * src, TDS_UINT conv = tds->char_convs[client2server_chardata]; if (desttype == SQL_C_WCHAR) { /* SQL_C_WCHAR, convert to wide encode */ - conv = tds_iconv_get(tds, ODBC_WIDE_NAME, conv->server_charset.name); + conv = tds_iconv_get(tds, ODBC_WIDE_NAME, conv->to.charset.name); if (!conv) conv = tds_iconv_get(tds, ODBC_WIDE_NAME, "ISO-8859-1"); #ifdef ENABLE_ODBC_WIDE } else { - conv = tds_iconv_get(tds, tds_dstr_cstr(&stmt->dbc->original_charset), conv->server_charset.name); + conv = tds_iconv_get(tds, tds_dstr_cstr(&stmt->dbc->original_charset), conv->to.charset.name); if (!conv) conv = tds_iconv_get(tds, tds_dstr_cstr(&stmt->dbc->original_charset), "ISO-8859-1"); if (!conv) @@ -95,9 +95,9 @@ odbc_convert_char(TDS_STMT * stmt, TDSCOLUMN * curcol, TDS_CHAR * src, TDS_UINT } /* returned size have to take into account buffer left unconverted */ - if (il == 0 || (conv->client_charset.min_bytes_per_char == conv->client_charset.max_bytes_per_char - && conv->server_charset.min_bytes_per_char == conv->server_charset.max_bytes_per_char)) { - ol += il * conv->client_charset.min_bytes_per_char / conv->server_charset.min_bytes_per_char; + if (il == 0 || (conv->from.charset.min_bytes_per_char == conv->from.charset.max_bytes_per_char + && conv->to.charset.min_bytes_per_char == conv->to.charset.max_bytes_per_char)) { + ol += il * conv->from.charset.min_bytes_per_char / conv->to.charset.min_bytes_per_char; } else { /* TODO convert and discard ?? or return proper SQL_NO_TOTAL values ?? */ return SQL_NO_TOTAL; diff --git a/src/odbc/odbc_util.c b/src/odbc/odbc_util.c index f13837de7e..3ca110dffd 100644 --- a/src/odbc/odbc_util.c +++ b/src/odbc/odbc_util.c @@ -268,7 +268,7 @@ odbc_mb2utf(TDS_DBC *dbc, const char *s, int len) il = len; /* allocate needed buffer (+1 is to exclude 0 case) */ - ol = il * char_conv->server_charset.max_bytes_per_char / char_conv->client_charset.min_bytes_per_char + 1; + ol = il * char_conv->to.charset.max_bytes_per_char / char_conv->from.charset.min_bytes_per_char + 1; assert(ol > 0); buf = (char *) malloc(ol); if (!buf) diff --git a/src/odbc/sql2tds.c b/src/odbc/sql2tds.c index 584e99fba3..c9242c845a 100644 --- a/src/odbc/sql2tds.c +++ b/src/odbc/sql2tds.c @@ -204,7 +204,7 @@ odbc_sql2tds(TDS_STMT * stmt, const struct _drecord *drec_ipd, const struct _dre tds_set_param_type(tds, curcol, dest_type); - curcol->char_conv = tds_iconv_get(tds, ODBC_WIDE_NAME, conv->server_charset.name); + curcol->char_conv = tds_iconv_get(tds, ODBC_WIDE_NAME, conv->to.charset.name); memcpy(curcol->column_collation, tds->collation, sizeof(tds->collation)); } else { #ifdef ENABLE_ODBC_WIDE @@ -214,7 +214,7 @@ odbc_sql2tds(TDS_STMT * stmt, const struct _drecord *drec_ipd, const struct _dre tds_set_param_type(tds, curcol, dest_type); /* use binary format for binary to char */ if (is_char_type(dest_type)) - curcol->char_conv = sql_src_type == SQL_C_BINARY ? NULL : tds_iconv_get(tds, tds_dstr_cstr(&dbc->original_charset), conv->server_charset.name); + curcol->char_conv = sql_src_type == SQL_C_BINARY ? NULL : tds_iconv_get(tds, tds_dstr_cstr(&dbc->original_charset), conv->to.charset.name); #else tds_set_param_type(dbc->tds_socket, curcol, dest_type); /* use binary format for binary to char */ diff --git a/src/tds/challenge.c b/src/tds/challenge.c index d66f6a7151..0728382f5f 100644 --- a/src/tds/challenge.c +++ b/src/tds/challenge.c @@ -120,7 +120,7 @@ convert_to_usc2le_string(TDSSOCKET * tds, const char *s, size_t len, char *out) char *ob; size_t il, ol; - const TDSICONV * char_conv = tds->char_convs[client2ucs2]; + TDSICONV * char_conv = tds->char_convs[client2ucs2]; /* char_conv is only mostly const */ TDS_ERRNO_MESSAGE_FLAGS *suppress = (TDS_ERRNO_MESSAGE_FLAGS *) & char_conv->suppress; diff --git a/src/tds/iconv.c b/src/tds/iconv.c index 5c1cf988a8..683a36d236 100644 --- a/src/tds/iconv.c +++ b/src/tds/iconv.c @@ -248,17 +248,17 @@ tds_iconv_reset(TDSICONV *conv) * (min|max)_bytes_per_char can be used to divide * so init to safe values */ - conv->server_charset.min_bytes_per_char = 1; - conv->server_charset.max_bytes_per_char = 1; - conv->client_charset.min_bytes_per_char = 1; - conv->client_charset.max_bytes_per_char = 1; - - conv->server_charset.name = conv->client_charset.name = ""; - conv->server_charset.canonic = conv->client_charset.canonic = 0; - conv->to_wire = (iconv_t) -1; - conv->to_wire2 = (iconv_t) -1; - conv->from_wire = (iconv_t) -1; - conv->from_wire2 = (iconv_t) -1; + conv->to.charset.min_bytes_per_char = 1; + conv->to.charset.max_bytes_per_char = 1; + conv->from.charset.min_bytes_per_char = 1; + conv->from.charset.max_bytes_per_char = 1; + + conv->to.charset.name = conv->from.charset.name = ""; + conv->to.charset.canonic = conv->from.charset.canonic = 0; + conv->to.cd = (iconv_t) -1; + conv->to.cd2 = (iconv_t) -1; + conv->from.cd = (iconv_t) -1; + conv->from.cd2 = (iconv_t) -1; } /** @@ -324,8 +324,8 @@ tds_iconv_open(TDSSOCKET * tds, const char *charset) int canonic_env_charset = tds->env.charset ? tds_canonical_charset(tds->env.charset) : -1; int fOK, ret; - TDS_ENCODING *client = &tds->char_convs[client2ucs2]->client_charset; - TDS_ENCODING *server = &tds->char_convs[client2ucs2]->server_charset; + TDS_ENCODING *client = &tds->char_convs[client2ucs2]->from.charset; + TDS_ENCODING *server = &tds->char_convs[client2ucs2]->to.charset; tdsdump_log(TDS_DBG_FUNC, "tds_iconv_open(%p, %s)\n", tds, charset); @@ -375,8 +375,8 @@ tds_iconv_open(TDSSOCKET * tds, const char *charset) if (!fOK) return; } else { - tds->char_convs[client2server_chardata]->client_charset = canonic_charsets[canonic_charset]; - tds->char_convs[client2server_chardata]->server_charset = canonic_charsets[canonic_charset]; + tds->char_convs[client2server_chardata]->from.charset = canonic_charsets[canonic_charset]; + tds->char_convs[client2server_chardata]->to.charset = canonic_charsets[canonic_charset]; } /* @@ -406,13 +406,13 @@ tds_iconv_open(TDSSOCKET * tds, const char *charset) static int tds_iconv_info_init(TDSICONV * char_conv, int client_canonical, int server_canonical) { - TDS_ENCODING *client = &char_conv->client_charset; - TDS_ENCODING *server = &char_conv->server_charset; + TDS_ENCODING *client = &char_conv->from.charset; + TDS_ENCODING *server = &char_conv->to.charset; - assert(char_conv->to_wire == (iconv_t) -1); - assert(char_conv->to_wire2 == (iconv_t) -1); - assert(char_conv->from_wire == (iconv_t) -1); - assert(char_conv->from_wire2 == (iconv_t) -1); + assert(char_conv->to.cd == (iconv_t) -1); + assert(char_conv->to.cd2 == (iconv_t) -1); + assert(char_conv->from.cd == (iconv_t) -1); + assert(char_conv->from.cd2 == (iconv_t) -1); if (client_canonical < 0) { tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: client charset name \"%d\" invalid\n", client_canonical); @@ -429,8 +429,8 @@ tds_iconv_info_init(TDSICONV * char_conv, int client_canonical, int server_canon /* special case, same charset, no conversion */ if (client_canonical == server_canonical) { - char_conv->to_wire = (iconv_t) -1; - char_conv->from_wire = (iconv_t) -1; + char_conv->to.cd = (iconv_t) -1; + char_conv->from.cd = (iconv_t) -1; char_conv->flags = TDS_ENCODING_MEMCPY; return 1; } @@ -464,28 +464,28 @@ tds_iconv_info_init(TDSICONV * char_conv, int client_canonical, int server_canon } } - char_conv->to_wire = tds_sys_iconv_open(iconv_names[server_canonical], iconv_names[client_canonical]); - if (char_conv->to_wire == (iconv_t) -1) { + char_conv->to.cd = tds_sys_iconv_open(iconv_names[server_canonical], iconv_names[client_canonical]); + if (char_conv->to.cd == (iconv_t) -1) { tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: cannot convert \"%s\"->\"%s\"\n", client->name, server->name); } - char_conv->from_wire = tds_sys_iconv_open(iconv_names[client_canonical], iconv_names[server_canonical]); - if (char_conv->from_wire == (iconv_t) -1) { + char_conv->from.cd = tds_sys_iconv_open(iconv_names[client_canonical], iconv_names[server_canonical]); + if (char_conv->from.cd == (iconv_t) -1) { tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: cannot convert \"%s\"->\"%s\"\n", server->name, client->name); } /* try indirect conversions */ - if (char_conv->to_wire == (iconv_t) -1 || char_conv->from_wire == (iconv_t) -1) { + if (char_conv->to.cd == (iconv_t) -1 || char_conv->from.cd == (iconv_t) -1) { tds_iconv_info_close(char_conv); /* TODO reuse some conversion, client charset is usually constant in all connection (or ISO8859-1) */ - char_conv->to_wire = tds_sys_iconv_open(iconv_names[POS_UTF8], iconv_names[client_canonical]); - char_conv->to_wire2 = tds_sys_iconv_open(iconv_names[server_canonical], iconv_names[POS_UTF8]); - char_conv->from_wire = tds_sys_iconv_open(iconv_names[POS_UTF8], iconv_names[server_canonical]); - char_conv->from_wire2 = tds_sys_iconv_open(iconv_names[client_canonical], iconv_names[POS_UTF8]); + char_conv->to.cd = tds_sys_iconv_open(iconv_names[POS_UTF8], iconv_names[client_canonical]); + char_conv->to.cd2 = tds_sys_iconv_open(iconv_names[server_canonical], iconv_names[POS_UTF8]); + char_conv->from.cd = tds_sys_iconv_open(iconv_names[POS_UTF8], iconv_names[server_canonical]); + char_conv->from.cd2 = tds_sys_iconv_open(iconv_names[client_canonical], iconv_names[POS_UTF8]); - if (char_conv->to_wire == (iconv_t) -1 || char_conv->to_wire2 == (iconv_t) -1 - || char_conv->from_wire == (iconv_t) -1 || char_conv->from_wire2 == (iconv_t) -1) { + if (char_conv->to.cd == (iconv_t) -1 || char_conv->to.cd2 == (iconv_t) -1 + || char_conv->from.cd == (iconv_t) -1 || char_conv->from.cd2 == (iconv_t) -1) { tds_iconv_info_close(char_conv); tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: cannot convert \"%s\"->\"%s\" indirectly\n", @@ -518,10 +518,10 @@ _iconv_close(iconv_t * cd) static void tds_iconv_info_close(TDSICONV * char_conv) { - _iconv_close(&char_conv->to_wire); - _iconv_close(&char_conv->to_wire2); - _iconv_close(&char_conv->from_wire); - _iconv_close(&char_conv->from_wire2); + _iconv_close(&char_conv->to.cd); + _iconv_close(&char_conv->to.cd2); + _iconv_close(&char_conv->from.cd); + _iconv_close(&char_conv->from.cd2); } void @@ -552,6 +552,166 @@ tds_iconv_free(TDSSOCKET * tds) tds->char_conv_count = 0; } +static size_t +tds_iconv_indirect(TDSICONVDIR *from, TDSICONVDIR *to, int *peilseq_raised, const char **pb1, size_t * pil1, char **ob3, size_t * pil3) +{ +#if ENABLE_EXTRA_CHECKS + char tmp[8]; +#else + char tmp[256]; +#endif + char *pb2; + size_t il1, il2, l; + int temp_errno; + size_t temp_irreversible, irreversible; + + do { + pb2 = tmp; + il2 = sizeof(tmp); + + if (to->num_left) { + if (*pil1 < to->num_got) { + errno = EINVAL; + return (size_t) -1; + } + *pil1 -= to->num_got; + *pb1 += to->num_got; + to->num_got = 0; + + memcpy(tmp, to->left, to->num_left); + pb2 += to->num_left; + il2 -= to->num_left; + to->num_left = 0; + } + + /* compute maximum to translate, making sure we don't get too much + * we can't read more characters than we can put into final output, we have two + * consecutive conversion n * m1..M1 -> n * m2..M2 -> n * m3..M3 + * (n number of characters, m minimum, M maximum), we want to limit n * M3 + * to final output length, so maximum input bytes = n * m1, so + * inputlen = n * m1 = outlen / M3 * m1 + * We must handle the case where we must read a characters but we are + * not sure it fit into output. Ie 1..2 -> 2..4 if we limit input to 1 + * and output to 2 could be that source characters occupy 2 bytes but + * output encoding take 4 bytes. In this case we should record the position + * to return, try to read at least one characters and if we cannot translate + * store it for next step. + */ + il1 = *pil1; + l = *pil3 * from->charset.min_bytes_per_char / to->charset.max_bytes_per_char; + if (il1 > l) + il1 = l; + + errno = 0; + l = il1; + temp_irreversible = tds_sys_iconv(to->cd, (ICONV_CONST char **) pb1, &il1, &pb2, &il2); + *pil1 -= l - il1; + temp_errno = errno; + + /* here are the tricky part, assure we read at least one characters */ + il2 = pb2 - tmp; + pb2 = tmp; + if (il2 == 0) { + l = from->charset.min_bytes_per_char; + while (temp_errno == EINVAL && l < from->charset.max_bytes_per_char && l < *pil1) { + il1 = l; + il2 = sizeof(to->left); + pb2 = to->left; + + errno = 0; + l = il1; + temp_irreversible = tds_sys_iconv(to->cd, (ICONV_CONST char **) pb1, &il1, &pb2, &il2); + to->num_got = l - il1; + if (to->num_got) *pb1 -= to->num_got; + to->num_left = il2 = sizeof(to->left) - il2; + pb2 = to->left; + temp_errno = errno; + if (to->num_left) + break; + ++l; + } + } else { + /* avoid EINVAL cause we make input shorter */ + temp_errno = E2BIG; + } + + /* convert partial */ + for (;;) { + errno = 0; + irreversible = tds_sys_iconv(to->cd2, (ICONV_CONST char **) &pb2, &il2, ob3, pil3); + + /* check if we consumed single character from input */ + if (to->num_left && il2 == 0) { + *pil1 -= to->num_got; + *pb1 += to->num_got; + to->num_got = 0; + + to->num_left = 0; + } + if (irreversible != (size_t) - 1) { + if (pil1 && *pil1) + break; + return irreversible; + } + /* EINVAL should be impossible, all characters came from previous iconv... */ + if (errno == E2BIG || errno == EINVAL) + return irreversible; + + /* + * error should be EILSEQ, not convertible sequence + * skip UTF-8 sequence, replace with '?' + */ + /* avoid infinite recursion */ + *peilseq_raised = 1; + if (*pb2 == '?') + return irreversible; + *pb2 = (char) 0x80; + while (il2 && (*pb2 & 0xC0) == 0x80) + ++pb2, --il2; + --pb2; + ++il2; + *pb2 = '?'; + } + } while (temp_errno == E2BIG); + errno = temp_errno; + return temp_irreversible; +} + +static size_t +tds_iconv_swap(iconv_t cd, const char **inbuf, size_t * inbytesleft, char **outbuf, size_t * outbytesleft) +{ + /* swap bytes if necessary */ +#if ENABLE_EXTRA_CHECKS + char tmp[8]; +#else + char tmp[256]; +#endif + char *pib; + size_t il, n, irreversible; + + do { + pib = tmp; + il = *inbytesleft > sizeof(tmp) ? sizeof(tmp) : *inbytesleft; + for (n = 0; n < il; n += 2) { + tmp[n] = (*inbuf)[n + 1]; + tmp[n + 1] = (*inbuf)[n]; + } + irreversible = tds_sys_iconv(cd, (ICONV_CONST char **) &pib, &il, outbuf, outbytesleft); + il = pib - tmp; + *inbuf += il; + *inbytesleft -= il; + } while (irreversible != (size_t) - 1 && *inbytesleft); + + return irreversible; +} + +static void +tds_iconv_err(TDSSOCKET *tds, int err) +{ + if (tds) + tdserror(tds->tds_ctx, tds, err, 0); +} + /** * Wrapper around iconv(3). Same parameters, with slightly different behavior. * \param tds state information for the socket and the TDS protocol @@ -577,21 +737,20 @@ tds_iconv_free(TDSSOCKET * tds) * On a write error we emit Msg 2402, Severity 16 (EX_USER): * "Error converting client characters into server's character set. Some character(s) could not be converted." * and return an error code. Client libraries relying on this routine should reflect an error back to the application. - * + * * \todo Check for variable multibyte non-UTF-8 input character set. * \todo Use more robust error message generation. * \todo For reads, cope with \a outbuf encodings that don't have the equivalent of an ASCII '?'. * \todo Support alternative to '?' for the replacement character. */ size_t -tds_iconv(TDSSOCKET * tds, const TDSICONV * conv, TDS_ICONV_DIRECTION io, +tds_iconv(TDSSOCKET * tds, TDSICONV * conv, TDS_ICONV_DIRECTION io, const char **inbuf, size_t * inbytesleft, char **outbuf, size_t * outbytesleft) { static const iconv_t invalid = (iconv_t) -1; - const TDS_ENCODING *input_charset = NULL; - const char *output_charset_name = NULL; + TDSICONVDIR *from = NULL; + TDSICONVDIR *to = NULL; - iconv_t cd = invalid, cd2 = invalid; iconv_t error_cd = invalid; char quest_mark[] = "?"; /* best to leave non-const; implementations vary */ @@ -608,16 +767,12 @@ tds_iconv(TDSSOCKET * tds, const TDSICONV * conv, TDS_ICONV_DIRECTION io, switch (io) { case to_server: - cd = conv->to_wire; - cd2 = conv->to_wire2; - input_charset = &conv->client_charset; - output_charset_name = conv->server_charset.name; + from = &conv->from; + to = &conv->to; break; case to_client: - cd = conv->from_wire; - cd2 = conv->from_wire2; - input_charset = &conv->server_charset; - output_charset_name = conv->client_charset.name; + from = &conv->to; + to = &conv->from; break; default: tdsdump_log(TDS_DBG_FUNC, "tds_iconv: unable to determine if %d means in or out. \n", io); @@ -626,7 +781,7 @@ tds_iconv(TDSSOCKET * tds, const TDSICONV * conv, TDS_ICONV_DIRECTION io, } /* silly case, memcpy */ - if (conv->flags & TDS_ENCODING_MEMCPY || cd == invalid) { + if (conv->flags & TDS_ENCODING_MEMCPY || to->cd == invalid) { size_t len = *inbytesleft < *outbytesleft ? *inbytesleft : *outbytesleft; memcpy(*outbuf, *inbuf, len); @@ -645,79 +800,11 @@ tds_iconv(TDSSOCKET * tds, const TDSICONV * conv, TDS_ICONV_DIRECTION io, p = *outbuf; for (;;) { if (conv->flags & TDS_ENCODING_INDIRECT) { -#if ENABLE_EXTRA_CHECKS - char tmp[8]; -#else - char tmp[128]; -#endif - char *pb = tmp; - size_t l = sizeof(tmp); - int temp_errno; - size_t temp_irreversible; - - temp_irreversible = tds_sys_iconv(cd, (ICONV_CONST char **) inbuf, inbytesleft, &pb, &l); - temp_errno = errno; - - /* convert partial */ - pb = tmp; - l = sizeof(tmp) - l; - for (;;) { - errno = 0; - irreversible = tds_sys_iconv(cd2, (ICONV_CONST char **) &pb, &l, outbuf, outbytesleft); - if (irreversible != (size_t) - 1) { - if (inbytesleft && *inbytesleft) - break; - goto end_loop; - } - /* EINVAL should be impossible, all characters came from previous iconv... */ - if (errno == E2BIG || errno == EINVAL) - goto end_loop; - - /* - * error should be EILSEQ, not convertible sequence - * skip UTF-8 sequence - */ - /* avoid infinite recursion */ - eilseq_raised = 1; - if (*pb == '?') - goto end_loop; - *pb = (char) 0x80; - while(l && (*pb & 0xC0) == 0x80) - ++pb, --l; - --pb; - ++l; - *pb = '?'; - } - if (temp_errno == E2BIG) { - errno = 0; - continue; - } - errno = temp_errno; - irreversible = temp_irreversible; - break; + irreversible = tds_iconv_indirect(from, to, &eilseq_raised, inbuf, inbytesleft, outbuf, outbytesleft); } else if (io == to_client && conv->flags & TDS_ENCODING_SWAPBYTE && inbuf) { - /* swap bytes if necessary */ -#if ENABLE_EXTRA_CHECKS - char tmp[8]; -#else - char tmp[128]; -#endif - char *pib = tmp; - size_t il = *inbytesleft > sizeof(tmp) ? sizeof(tmp) : *inbytesleft; - size_t n; - - for (n = 0; n < il; n += 2) { - tmp[n] = (*inbuf)[n + 1]; - tmp[n + 1] = (*inbuf)[n]; - } - irreversible = tds_sys_iconv(cd, (ICONV_CONST char **) &pib, &il, outbuf, outbytesleft); - il = pib - tmp; - *inbuf += il; - *inbytesleft -= il; - if (irreversible != (size_t) - 1 && *inbytesleft) - continue; + irreversible = tds_iconv_swap(to->cd, inbuf, inbytesleft, outbuf, outbytesleft); } else { - irreversible = tds_sys_iconv(cd, (ICONV_CONST char **) inbuf, inbytesleft, outbuf, outbytesleft); + irreversible = tds_sys_iconv(to->cd, (ICONV_CONST char **) inbuf, inbytesleft, outbuf, outbytesleft); } /* iconv success, return */ if (irreversible != (size_t) - 1) { @@ -742,7 +829,7 @@ tds_iconv(TDSSOCKET * tds, const TDSICONV * conv, TDS_ICONV_DIRECTION io, * Invalid input sequence encountered reading from server. * Skip one input sequence, adjusting pointers. */ - one_character = skip_one_input_sequence(cd, input_charset, inbuf, inbytesleft); + one_character = skip_one_input_sequence(to->cd, &from->charset, inbuf, inbytesleft); if (!one_character) break; @@ -754,7 +841,7 @@ tds_iconv(TDSSOCKET * tds, const TDSICONV * conv, TDS_ICONV_DIRECTION io, * do not convert singlebyte <-> singlebyte. */ if (error_cd == invalid) { - error_cd = tds_sys_iconv_open(output_charset_name, iconv_names[POS_UTF8]); + error_cd = tds_sys_iconv_open(to->charset.name, iconv_names[POS_UTF8]); if (error_cd == invalid) { break; /* what to do? */ } @@ -772,8 +859,7 @@ tds_iconv(TDSSOCKET * tds, const TDSICONV * conv, TDS_ICONV_DIRECTION io, if (!*inbytesleft) break; } -end_loop: - + /* swap bytes if necessary */ if (io == to_server && conv->flags & TDS_ENCODING_SWAPBYTE) { assert((*outbuf - p) % 2 == 0); @@ -789,13 +875,13 @@ tds_iconv(TDSSOCKET * tds, const TDSICONV * conv, TDS_ICONV_DIRECTION io, /* invalid multibyte input sequence encountered */ if (io == to_client) { if (irreversible == (size_t) - 1) { - tdserror(tds->tds_ctx, tds, TDSEICONV2BIG, 0); + tds_iconv_err(tds, TDSEICONV2BIG); } else { - tdserror(tds->tds_ctx, tds, TDSEICONVI, 0); + tds_iconv_err(tds, TDSEICONVI); errno = 0; } } else { - tdserror(tds->tds_ctx, tds, TDSEICONVO, 0); + tds_iconv_err(tds, TDSEICONVO); } suppress->eilseq = 1; } @@ -805,13 +891,13 @@ tds_iconv(TDSSOCKET * tds, const TDSICONV * conv, TDS_ICONV_DIRECTION io, if (suppress->einval) break; /* in chunk conversion this can mean we end a chunk inside a character */ - tdserror(tds->tds_ctx, tds, TDSEICONVAVAIL, 0); + tds_iconv_err(tds, TDSEICONVAVAIL); suppress->einval = 1; break; case E2BIG: /* output buffer has no more room */ if (suppress->e2big) break; - tdserror(tds->tds_ctx, tds, TDSEICONVIU, 0); + tds_iconv_err(tds, TDSEICONVIU); suppress->e2big = 1; break; default: @@ -830,7 +916,7 @@ tds_iconv(TDSSOCKET * tds, const TDSICONV * conv, TDS_ICONV_DIRECTION io, * \return Count of bytes either not read, or read but not converted. Returns zero on success. */ size_t -tds_iconv_fread(iconv_t cd, FILE * stream, size_t field_len, size_t term_len, char *outbuf, size_t * outbytesleft) +tds_iconv_fread(TDSSOCKET * tds, TDSICONV * conv, FILE * stream, size_t field_len, size_t term_len, char *outbuf, size_t * outbytesleft) { #ifdef ENABLE_EXTRA_CHECKS char buffer[16]; @@ -841,9 +927,9 @@ tds_iconv_fread(iconv_t cd, FILE * stream, size_t field_len, size_t term_len, ch size_t isize = 0, nonreversible_conversions = 0; /* - * If cd isn't valid, it's just an indication that this column needs no conversion. + * If conv isn't valid, it's just an indication that this column needs no conversion. */ - if (cd == (iconv_t) -1) { + if (conv == NULL) { assert(field_len <= *outbytesleft); if (field_len > 0) { if (1 != fread(outbuf, field_len, 1, stream)) { @@ -877,7 +963,7 @@ tds_iconv_fread(iconv_t cd, FILE * stream, size_t field_len, size_t term_len, ch isize += ib - buffer; ib = buffer; - nonreversible_conversions += tds_sys_iconv(cd, (ICONV_CONST char **) &ib, &isize, &outbuf, outbytesleft); + nonreversible_conversions += tds_iconv(tds, conv, to_server, (const char **) &ib, &isize, &outbuf, outbytesleft); if (isize != 0) { memmove(buffer, ib, isize); @@ -923,8 +1009,8 @@ tds_iconv_get_info(TDSSOCKET * tds, int canonic_client, int canonic_server) /* search a charset from already allocated charsets */ for (i = tds->char_conv_count; --i >= initial_char_conv_count;) - if (canonic_client == tds->char_convs[i]->client_charset.canonic - && canonic_server == tds->char_convs[i]->server_charset.canonic) + if (canonic_client == tds->char_convs[i]->from.charset.canonic + && canonic_server == tds->char_convs[i]->to.charset.canonic) return tds->char_convs[i]; /* allocate a new iconv structure */ @@ -987,11 +1073,11 @@ tds_srv_charset_changed_num(TDSSOCKET * tds, int canonic_charset_num) tdsdump_log(TDS_DBG_FUNC, "setting server single-byte charset to \"%s\"\n", canonic_charsets[canonic_charset_num].name); - if (canonic_charset_num == char_conv->server_charset.canonic) + if (canonic_charset_num == char_conv->to.charset.canonic) return; /* find and set conversion */ - char_conv = tds_iconv_get_info(tds, tds->char_convs[client2ucs2]->client_charset.canonic, canonic_charset_num); + char_conv = tds_iconv_get_info(tds, tds->char_convs[client2ucs2]->from.charset.canonic, canonic_charset_num); if (char_conv) tds->char_convs[client2server_chardata] = char_conv; @@ -1432,10 +1518,10 @@ tds_iconv_from_collate(TDSSOCKET * tds, TDS_UCHAR collate[5]) int canonic_charset = collate2charset(sql_collate, lcid); /* same as client (usually this is true, so this improve performance) ? */ - if (tds->char_convs[client2server_chardata]->server_charset.canonic == canonic_charset) + if (tds->char_convs[client2server_chardata]->to.charset.canonic == canonic_charset) return tds->char_convs[client2server_chardata]; - return tds_iconv_get_info(tds, tds->char_convs[client2ucs2]->client_charset.canonic, canonic_charset); + return tds_iconv_get_info(tds, tds->char_convs[client2ucs2]->from.charset.canonic, canonic_charset); } /** @} */ diff --git a/src/tds/login.c b/src/tds/login.c index dc9bfdfecb..c72f5531f4 100644 --- a/src/tds/login.c +++ b/src/tds/login.c @@ -405,7 +405,7 @@ tds_connect(TDSSOCKET * tds, TDSCONNECTION * connection, int *p_oserr) #endif /* set up iconv if not already initialized*/ - if (tds->char_convs[client2ucs2]->to_wire == (iconv_t) -1) { + if (tds->char_convs[client2ucs2]->to.cd == (iconv_t) -1) { if (!tds_dstr_isempty(&connection->client_charset)) { tds_iconv_open(tds, tds_dstr_cstr(&connection->client_charset)); } diff --git a/src/tds/query.c b/src/tds/query.c index 67c4380563..461d7b742c 100644 --- a/src/tds/query.c +++ b/src/tds/query.c @@ -120,7 +120,7 @@ tds_ascii_to_ucs2(char *buffer, const char *buf) * \return string allocated (or input pointer if no conversion required) or NULL if error */ static const char * -tds_convert_string(TDSSOCKET * tds, const TDSICONV * char_conv, const char *s, int len, size_t *out_len) +tds_convert_string(TDSSOCKET * tds, TDSICONV * char_conv, const char *s, int len, size_t *out_len) { char *buf; @@ -140,7 +140,7 @@ tds_convert_string(TDSSOCKET * tds, const TDSICONV * char_conv, const char *s, i } /* allocate needed buffer (+1 is to exclude 0 case) */ - ol = il * char_conv->server_charset.max_bytes_per_char / char_conv->client_charset.min_bytes_per_char + 1; + ol = il * char_conv->to.charset.max_bytes_per_char / char_conv->from.charset.min_bytes_per_char + 1; buf = (char *) malloc(ol); if (!buf) return NULL; diff --git a/src/tds/read.c b/src/tds/read.c index 602a458315..4867ea72a4 100644 --- a/src/tds/read.c +++ b/src/tds/read.c @@ -49,7 +49,7 @@ TDS_RCSID(var, "$Id: read.c,v 1.112 2010-07-30 07:34:06 freddy77 Exp $"); -static int read_and_convert(TDSSOCKET * tds, const TDSICONV * char_conv, +static int read_and_convert(TDSSOCKET * tds, TDSICONV * char_conv, size_t * wire_size, char **outbuf, size_t * outbytesleft); /** @@ -318,7 +318,7 @@ tds_get_n(TDSSOCKET * tds, void *dest, int need) * moved to the beginning, ptemp is adjusted to point just behind them, and the next chunk is read. */ static int -read_and_convert(TDSSOCKET * tds, const TDSICONV * char_conv, size_t * wire_size, char **outbuf, +read_and_convert(TDSSOCKET * tds, TDSICONV * char_conv, size_t * wire_size, char **outbuf, size_t * outbytesleft) { TEMP_INIT(256); diff --git a/src/tds/token.c b/src/tds/token.c index 507f79994d..5a27ead2f1 100644 --- a/src/tds/token.c +++ b/src/tds/token.c @@ -3397,7 +3397,7 @@ adjust_character_column_size(TDSSOCKET * tds, TDSCOLUMN * curcol) static const char sybase_utf[] = "UTF-16LE"; #endif - curcol->char_conv = tds_iconv_get(tds, tds->char_convs[client2ucs2]->client_charset.name, sybase_utf); + curcol->char_conv = tds_iconv_get(tds, tds->char_convs[client2ucs2]->from.charset.name, sybase_utf); /* fallback to UCS-2LE */ /* FIXME should be useless. Does not works always */ @@ -3420,9 +3420,9 @@ adjust_character_column_size(TDSSOCKET * tds, TDSCOLUMN * curcol) "\tServer column_size: %d\n" "\tClient charset: %s\n" "\tClient column_size: %d\n", - curcol->char_conv->server_charset.name, + curcol->char_conv->to.charset.name, curcol->on_server.column_size, - curcol->char_conv->client_charset.name, + curcol->char_conv->from.charset.name, curcol->column_size); } @@ -3443,10 +3443,10 @@ determine_adjusted_size(const TDSICONV * char_conv, int size) if (size >= 0x10000000) return 0x7fffffff; - size *= char_conv->client_charset.max_bytes_per_char; - if (size % char_conv->server_charset.min_bytes_per_char) - size += char_conv->server_charset.min_bytes_per_char; - size /= char_conv->server_charset.min_bytes_per_char; + size *= char_conv->from.charset.max_bytes_per_char; + if (size % char_conv->to.charset.min_bytes_per_char) + size += char_conv->to.charset.min_bytes_per_char; + size /= char_conv->to.charset.min_bytes_per_char; return size; } diff --git a/src/tds/unittests/iconv_fread.c b/src/tds/unittests/iconv_fread.c index 1e692f3e77..cf5ba5ff8b 100644 --- a/src/tds/unittests/iconv_fread.c +++ b/src/tds/unittests/iconv_fread.c @@ -35,15 +35,25 @@ static void *no_unused_var_warn[] = { software_version, no_unused_var_warn }; int main(int argc, char **argv) { - iconv_t cd = tds_sys_iconv_open("ISO-8859-1", "UTF-8"); static const char out_file[] = "iconv_fread.out"; char buf[256]; int i; FILE *f; + TDSCONTEXT *ctx = tds_alloc_context(NULL); + TDSSOCKET *tds = tds_alloc_socket(ctx, 512); + TDSICONV * conv; - if (cd == (iconv_t) - 1) { + if (!ctx || !tds) { + fprintf(stderr, "Error creating socket!\n"); + return 1; + } + + tds_iconv_open(tds, "ISO-8859-1"); + + conv = tds_iconv_get(tds, "UTF-8", "ISO-8859-1"); + if (conv == NULL) { fprintf(stderr, "Error creating conversion, giving up!\n"); - return 0; + return 1; } f = fopen(out_file, "w+b"); @@ -74,19 +84,23 @@ main(int argc, char **argv) /* convert it */ memset(out, 'x', sizeof(out)); - res = tds_iconv_fread(cd, f, i+2, 0, out, &out_len); + res = tds_iconv_fread(NULL, conv, f, i+2, 0, out, &out_len); printf("res %u out_len %u\n", (unsigned int) res, (unsigned int) out_len); /* test */ memset(buf, 'a', i); buf[i] = 0x90; assert(res == 0); - assert(sizeof(out) - out_len == i+1); + if (sizeof(out) - out_len != i+1) { + fprintf(stderr, "out %u bytes expected %d\n", (unsigned) (sizeof(out) - out_len), i+1); + return 1; + } assert(memcmp(out, buf, i+1) == 0); } fclose(f); unlink(out_file); - tds_sys_iconv_close(cd); + tds_free_socket(tds); + tds_free_context(ctx); return 0; } diff --git a/src/tds/unittests/utf8_3.c b/src/tds/unittests/utf8_3.c index 97920bd8b7..2a0a66813c 100644 --- a/src/tds/unittests/utf8_3.c +++ b/src/tds/unittests/utf8_3.c @@ -84,7 +84,7 @@ test(const char *buf) l = (sizeof(query) -1); strncpy(query, curcol->column_name, l); query[l] = 0; - fprintf(stderr, "Wrong result Got: '%s' len %d\n Expected: '%s' len %u\n", query, + fprintf(stderr, "Wrong result Got: '%s' len %d\n Expected: '%s' len %u\n", query, curcol->column_namelen, tmp, (unsigned int) strlen(tmp)); exit(1); } diff --git a/src/tds/write.c b/src/tds/write.c index c40fcb13ed..deb932ab64 100644 --- a/src/tds/write.c +++ b/src/tds/write.c @@ -101,8 +101,7 @@ tds_put_string(TDSSOCKET * tds, const char *s, int len) char outbuf[256], *poutbuf; size_t inbytesleft, outbytesleft, bytes_out = 0; - client = &tds->char_convs[client2ucs2]->client_charset; - server = &tds->char_convs[client2ucs2]->server_charset; + client = &tds->char_convs[client2ucs2]->from.charset; if (len < 0) { if (client->min_bytes_per_char == 1) { /* ascii or UTF-8 */