Skip to content

Commit

Permalink
CsvImport - Fix mishandling of quoted empty fields
Browse files Browse the repository at this point in the history
  • Loading branch information
gjanssens committed May 27, 2019
1 parent ed42f8a commit e557b02
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 4 deletions.
16 changes: 14 additions & 2 deletions gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,20 @@ int GncCsvTokenizer::tokenize()
bs_pos = line.find ("\"\"");
while (bs_pos != std::string::npos)
{
line.replace (bs_pos, 2, "\\\"");
bs_pos = line.find ("\"\"");
// Only make changes in case the double quotes are part of a larger field
// In other words a field which only contains two double quotes represent an
// empty field. We don't need to touch those.
// The way to determine whether the double quotes represent an empty string
// is by checking whether the character in front or after are either
// a field separator or the beginning or end of of the string.
if (!(((bs_pos == 0) || // quotes are at start of line
(m_sep_str.find (line[bs_pos-1]) != std::string::npos)) // quotes preceeded by field separator
&&
((bs_pos + 2 >= line.length()) || // quotes are at end of line
(m_sep_str.find (line[bs_pos+2]) != std::string::npos)))) // quotes followed by field separator
// Only make changes in case the double quotes are not an empty field
line.replace (bs_pos, 2, "\\\"");
bs_pos = line.find ("\"\"", bs_pos + 2);
}

Tokenizer tok(line, sep);
Expand Down
4 changes: 2 additions & 2 deletions gnucash/import-export/csv-imp/test/test-tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ GncTokenizerTest::test_gnc_tokenize_helper (const std::string& separators, token

static tokenize_csv_test_data comma_separated [] = {
{ "Date,Num,Description,Notes,Account,Deposit,Withdrawal,Balance", 8, { "Date","Num","Description","Notes","Account","Deposit","Withdrawal","Balance" } },
{ "05/01/15,45,Acme Inc.,,Miscellaneous,,\"1,100.00\",", 8, { "05/01/15","45","Acme Inc.","","Miscellaneous","","1,100.00","" } },
{ "05/01/15,45,Typical csv import line - including quoted empty field,,Miscellaneous,\"\",\"1,100.00\",", 8, { "05/01/15","45","Acme Inc.","","Miscellaneous","","1,100.00","" } },
{ "05/01/15,45,Acme Inc.,,Miscellaneous,", 6, { "05/01/15","45","Acme Inc.","","Miscellaneous","",NULL,NULL } },
{ "Test\\ with backslash,nextfield", 2, { "Test\\ with backslash","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } },
{ "Test with \\\" escaped quote,nextfield", 2, { "Test with \" escaped quote","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } },
Expand All @@ -188,7 +188,7 @@ TEST_F (GncTokenizerTest, tokenize_comma_sep)

static tokenize_csv_test_data semicolon_separated [] = {
{ "Date;Num;Description;Notes;Account;Deposit;Withdrawal;Balance", 8, { "Date","Num","Description","Notes","Account","Deposit","Withdrawal","Balance" } },
{ "05/01/15;45;Acme Inc.;;Miscellaneous;;\"1,100.00\";", 8, { "05/01/15","45","Acme Inc.","","Miscellaneous","","1,100.00","" } },
{ "05/01/15;45;Typical csv import line - including quoted empty field;;Miscellaneous;\"\";\"1,100.00\";", 8, { "05/01/15","45","Acme Inc.","","Miscellaneous","","1,100.00","" } },
{ "05/01/15;45;Acme Inc.;;Miscellaneous;", 6, { "05/01/15","45","Acme Inc.","","Miscellaneous","",NULL,NULL } },
{ NULL, 0, { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL } },
};
Expand Down

0 comments on commit e557b02

Please sign in to comment.