Skip to content

Commit

Permalink
Fix double iconv conversion
Browse files Browse the repository at this point in the history
Double iconv is used if system iconv is not able to convert directly from
a charset to another. This happen with some iconv implementations (like
Solaris or HP-UX).
Use always tds_iconv instead of tds_sys_iconv (mainly in tds_iconv_fread).
Do not read too much characters in tds_iconv but only required ones to be able
to continue reading correctly on next call.
  • Loading branch information
freddy77 committed Feb 27, 2013
1 parent a5325a9 commit 2dac22d
Show file tree
Hide file tree
Showing 14 changed files with 294 additions and 190 deletions.
26 changes: 16 additions & 10 deletions include/tdsiconv.h
Expand Up @@ -85,22 +85,27 @@ typedef struct tds_errno_message_flags {
unsigned int einval:1;
} TDS_ERRNO_MESSAGE_FLAGS;

typedef struct tdsiconvdir
{
TDS_ENCODING charset;

iconv_t cd;
iconv_t cd2;

unsigned char num_got;
unsigned char num_left;
char left[6];
} TDSICONVDIR;

struct tdsiconvinfo
{
TDS_ENCODING client_charset;
TDS_ENCODING server_charset;
struct tdsiconvdir to, from;

#define TDS_ENCODING_INDIRECT 1
#define TDS_ENCODING_SWAPBYTE 2
#define TDS_ENCODING_MEMCPY 4
unsigned int flags;

iconv_t to_wire; /* conversion from client charset to server's format */
iconv_t from_wire; /* conversion from server's format to client charset */

iconv_t to_wire2; /* conversion from client charset to server's format - indirect */
iconv_t from_wire2; /* conversion from server's format to client charset - indirect */

/*
* Suppress error messages that would otherwise be emitted by tds_iconv().
* Functions that process large buffers ask tds_iconv to convert it in "chunks".
Expand All @@ -110,15 +115,16 @@ struct tdsiconvinfo
* can prepopulate it.
*/
TDS_ERRNO_MESSAGE_FLAGS suppress;

};

/* We use ICONV_CONST for tds_iconv(), even if we don't have iconv() */
#ifndef ICONV_CONST
# define ICONV_CONST const
#endif

size_t tds_iconv_fread(iconv_t cd, FILE * stream, size_t field_len, size_t term_len, char *outbuf, size_t * outbytesleft);
size_t tds_iconv(TDSSOCKET * tds, const TDSICONV * char_conv, TDS_ICONV_DIRECTION io,
size_t tds_iconv_fread(TDSSOCKET * tds, TDSICONV * conv, FILE * stream, size_t field_len, size_t term_len, char *outbuf, size_t * outbytesleft);
size_t tds_iconv(TDSSOCKET * tds, TDSICONV * char_conv, TDS_ICONV_DIRECTION io,
const char **inbuf, size_t * inbytesleft, char **outbuf, size_t * outbytesleft);
const char *tds_canonical_charset_name(const char *charset_name);
const char *tds_sybase_charset_name(const char *charset_name);
Expand Down
15 changes: 7 additions & 8 deletions src/dblib/bcp.c
Expand Up @@ -1230,7 +1230,7 @@ _bcp_read_hostfile(DBPROCESS * dbproc, FILE * hostfile, int *row_error)
int file_len;
size_t col_bytes_left;
offset_type file_bytes_left, len;
iconv_t cd;
TDSICONV * conv;

len = _bcp_measure_terminated_field(hostfile, hostcol->terminator, hostcol->term_len);
if (len > 0x7fffffffl || len < 0) {
Expand All @@ -1250,14 +1250,13 @@ _bcp_read_hostfile(DBPROCESS * dbproc, FILE * hostfile, int *row_error)
*/
file_len = collen;
if (bcpcol->char_conv) {
TDSICONV *char_conv = bcpcol->char_conv;
collen *= char_conv->server_charset.max_bytes_per_char;
collen += char_conv->client_charset.min_bytes_per_char - 1;
collen /= char_conv->client_charset.min_bytes_per_char;
cd = char_conv->to_wire;
conv = bcpcol->char_conv;
collen *= conv->to.charset.max_bytes_per_char;
collen += conv->from.charset.min_bytes_per_char - 1;
collen /= conv->from.charset.min_bytes_per_char;
tdsdump_log(TDS_DBG_FUNC, "Adjusted collen is %d.\n", collen);
} else {
cd = (iconv_t) - 1;
conv = NULL;
}

coldata = calloc(1, 1 + collen);
Expand All @@ -1273,7 +1272,7 @@ _bcp_read_hostfile(DBPROCESS * dbproc, FILE * hostfile, int *row_error)
*/
col_bytes_left = collen;
/* TODO make tds_iconv_fread handle terminator directly to avoid fseek in _bcp_measure_terminated_field */
file_bytes_left = tds_iconv_fread(cd, hostfile, file_len, hostcol->term_len, coldata, &col_bytes_left);
file_bytes_left = tds_iconv_fread(NULL, conv, hostfile, file_len, hostcol->term_len, coldata, &col_bytes_left);
collen -= (int)col_bytes_left;

/* tdsdump_log(TDS_DBG_FUNC, "collen is %d after tds_iconv_fread()\n", collen); */
Expand Down
10 changes: 5 additions & 5 deletions src/odbc/convert_tds2sql.c
Expand Up @@ -64,12 +64,12 @@ odbc_convert_char(TDS_STMT * stmt, TDSCOLUMN * curcol, TDS_CHAR * src, TDS_UINT
conv = tds->char_convs[client2server_chardata];
if (desttype == SQL_C_WCHAR) {
/* SQL_C_WCHAR, convert to wide encode */
conv = tds_iconv_get(tds, ODBC_WIDE_NAME, conv->server_charset.name);
conv = tds_iconv_get(tds, ODBC_WIDE_NAME, conv->to.charset.name);
if (!conv)
conv = tds_iconv_get(tds, ODBC_WIDE_NAME, "ISO-8859-1");
#ifdef ENABLE_ODBC_WIDE
} else {
conv = tds_iconv_get(tds, tds_dstr_cstr(&stmt->dbc->original_charset), conv->server_charset.name);
conv = tds_iconv_get(tds, tds_dstr_cstr(&stmt->dbc->original_charset), conv->to.charset.name);
if (!conv)
conv = tds_iconv_get(tds, tds_dstr_cstr(&stmt->dbc->original_charset), "ISO-8859-1");
if (!conv)
Expand All @@ -95,9 +95,9 @@ odbc_convert_char(TDS_STMT * stmt, TDSCOLUMN * curcol, TDS_CHAR * src, TDS_UINT
}

/* returned size have to take into account buffer left unconverted */
if (il == 0 || (conv->client_charset.min_bytes_per_char == conv->client_charset.max_bytes_per_char
&& conv->server_charset.min_bytes_per_char == conv->server_charset.max_bytes_per_char)) {
ol += il * conv->client_charset.min_bytes_per_char / conv->server_charset.min_bytes_per_char;
if (il == 0 || (conv->from.charset.min_bytes_per_char == conv->from.charset.max_bytes_per_char
&& conv->to.charset.min_bytes_per_char == conv->to.charset.max_bytes_per_char)) {
ol += il * conv->from.charset.min_bytes_per_char / conv->to.charset.min_bytes_per_char;
} else {
/* TODO convert and discard ?? or return proper SQL_NO_TOTAL values ?? */
return SQL_NO_TOTAL;
Expand Down
2 changes: 1 addition & 1 deletion src/odbc/odbc_util.c
Expand Up @@ -268,7 +268,7 @@ odbc_mb2utf(TDS_DBC *dbc, const char *s, int len)
il = len;

/* allocate needed buffer (+1 is to exclude 0 case) */
ol = il * char_conv->server_charset.max_bytes_per_char / char_conv->client_charset.min_bytes_per_char + 1;
ol = il * char_conv->to.charset.max_bytes_per_char / char_conv->from.charset.min_bytes_per_char + 1;
assert(ol > 0);
buf = (char *) malloc(ol);
if (!buf)
Expand Down
4 changes: 2 additions & 2 deletions src/odbc/sql2tds.c
Expand Up @@ -204,7 +204,7 @@ odbc_sql2tds(TDS_STMT * stmt, const struct _drecord *drec_ipd, const struct _dre

tds_set_param_type(tds, curcol, dest_type);

curcol->char_conv = tds_iconv_get(tds, ODBC_WIDE_NAME, conv->server_charset.name);
curcol->char_conv = tds_iconv_get(tds, ODBC_WIDE_NAME, conv->to.charset.name);
memcpy(curcol->column_collation, tds->collation, sizeof(tds->collation));
} else {
#ifdef ENABLE_ODBC_WIDE
Expand All @@ -214,7 +214,7 @@ odbc_sql2tds(TDS_STMT * stmt, const struct _drecord *drec_ipd, const struct _dre
tds_set_param_type(tds, curcol, dest_type);
/* use binary format for binary to char */
if (is_char_type(dest_type))
curcol->char_conv = sql_src_type == SQL_C_BINARY ? NULL : tds_iconv_get(tds, tds_dstr_cstr(&dbc->original_charset), conv->server_charset.name);
curcol->char_conv = sql_src_type == SQL_C_BINARY ? NULL : tds_iconv_get(tds, tds_dstr_cstr(&dbc->original_charset), conv->to.charset.name);
#else
tds_set_param_type(dbc->tds_socket, curcol, dest_type);
/* use binary format for binary to char */
Expand Down
2 changes: 1 addition & 1 deletion src/tds/challenge.c
Expand Up @@ -120,7 +120,7 @@ convert_to_usc2le_string(TDSSOCKET * tds, const char *s, size_t len, char *out)
char *ob;
size_t il, ol;

const TDSICONV * char_conv = tds->char_convs[client2ucs2];
TDSICONV * char_conv = tds->char_convs[client2ucs2];

/* char_conv is only mostly const */
TDS_ERRNO_MESSAGE_FLAGS *suppress = (TDS_ERRNO_MESSAGE_FLAGS *) & char_conv->suppress;
Expand Down

0 comments on commit 2dac22d

Please sign in to comment.