Skip to content

Commit

Permalink
fixed multiple double quote bug, set default bufferSize to 2^16, adde…
Browse files Browse the repository at this point in the history
…d bufferSize to options
  • Loading branch information
FourierTransformer committed Mar 30, 2020
1 parent 2d900a2 commit 4732944
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 35 deletions.
17 changes: 15 additions & 2 deletions encoder.lua
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,7 @@ local function getHeadersFromOptions(options)
return headers
end

-- works really quickly with luajit-2.1, because table.concat life
local function encode(inputTable, delimiter, options)
local function initializeGenerator(inputTable, delimiter, options)
-- delimiter MUST be one character
assert(#delimiter == 1 and type(delimiter) == "string", "the delimiter must be of string type and exactly one character")

Expand All @@ -112,6 +111,12 @@ local function encode(inputTable, delimiter, options)

local escapedHeaders = escapeHeadersForOutput(headers)
local output = initializeOutputWithEscapedHeaders(escapedHeaders, delimiter)
return output, headers
end

-- works really quickly with luajit-2.1, because table.concat life
local function encode(inputTable, delimiter, options)
local output, headers = initializeGenerator(inputTable, delimiter, options)

for i, line in csvLineGenerator(inputTable, delimiter, headers) do
output[i+1] = line
Expand All @@ -121,4 +126,12 @@ local function encode(inputTable, delimiter, options)
return table.concat(output)
end

local function encodeLine(inputTable, delimiter, options)
local output, headers = initializeGenerator(inputTable, delimiter, options)
-- something something create a function, first time return output[1]
-- then return csvLineGenerator
-- csvLineGenerator(inputTable, delimiter, headers)

end

return encode
56 changes: 34 additions & 22 deletions ftcsv.lua
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ else
end
difference = j - i
if difference >= 1 then doubleQuoteEscape = true end
if difference == 1 then
if difference % 2 == 1 then
return luaCompatibility.findClosingQuote(j+1, inputLength, inputString, quote, doubleQuoteEscape)
end
return j-1, doubleQuoteEscape
Expand Down Expand Up @@ -258,7 +258,7 @@ local function parseString(inputString, i, options)
if fieldNum < totalColumnCount then
-- sometimes in buffered mode, the buffer starts with a newline
-- this skips the newline and lets the parsing continue.
if buffered and lineNum == 1 and fieldNum == 1 then
if buffered and lineNum == 1 and fieldNum == 1 and field == "" then
fieldStart = i + 1 + skipChar
lineStart = fieldStart
else
Expand Down Expand Up @@ -393,7 +393,7 @@ local function initializeInputFromStringOrFile(inputFile, options, amount)
return inputString, file
end

local function parseOptions(delimiter, options)
local function parseOptions(delimiter, options, fromParseLine)
-- delimiter MUST be one character
assert(#delimiter == 1 and type(delimiter) == "string", "the delimiter must be of string type and exactly one character")

Expand Down Expand Up @@ -430,11 +430,18 @@ local function parseOptions(delimiter, options)
else
assert(type(options.ignoreQuotes) == "boolean", "ftcsv only takes a boolean value for optional parameter 'ignoreQuotes'. You passed in '" .. tostring(options.ignoreQuotes) .. "' of type '" .. type(options.ignoreQuotes) .. "'.")
end
if options.bufferSize ~= nil then
assert(type(options.bufferSize) == "number", "ftcsv only takes a number value for optional parameter 'bufferSize'. You passed in '" .. tostring(options.bufferSize) .. "' of type '" .. type(options.bufferSize) .. "'.")
if fromParseLine == false then
error("ftcsv: bufferSize can only be specified using 'parseLine'. When using 'parse', the entire file is read into memory")
end
end
else
options = {
["headers"] = true,
["loadFromString"] = false,
["ignoreQuotes"] = false
["ignoreQuotes"] = false,
["bufferSize"] = 2^16
}
end

Expand Down Expand Up @@ -519,7 +526,7 @@ end

-- runs the show!
function ftcsv.parse(inputFile, delimiter, options)
local options, fieldsToKeep = parseOptions(delimiter, options)
local options, fieldsToKeep = parseOptions(delimiter, options, false)

local inputString = initializeInputFromStringOrFile(inputFile, options, "*all")

Expand All @@ -545,20 +552,26 @@ local function determineAtEndOfFile(file, fileSize)
end
end

local function initializeInputFile(inputString, options, bufferSize)
local function initializeInputFile(inputString, options)
if options.loadFromString == true then
error("ftcsv: parseLine currently doesn't support loading from string")
end
return initializeInputFromStringOrFile(inputString, options, bufferSize)
return initializeInputFromStringOrFile(inputString, options, options.bufferSize)
end

function ftcsv.parseLine(inputFile, delimiter, bufferSize, userOptions)
local options, fieldsToKeep = parseOptions(delimiter, userOptions)
function ftcsv.parseLine(inputFile, delimiter, userOptions)
local options, fieldsToKeep = parseOptions(delimiter, userOptions, true)
local inputString, file = initializeInputFile(inputFile, options)

local inputString, file = initializeInputFile(inputFile, options, bufferSize)

local fileSize = getFileSize(file)
local atEndOfFile = determineAtEndOfFile(file, fileSize)
local fileSize, atEndOfFile = 0, false
if options.bufferSize == "*all" then
file = nil
atEndOfFile = true
else
fileSize = getFileSize(file)
atEndOfFile = determineAtEndOfFile(file, fileSize)
end

local endOfHeaders, parserArgs, _ = parseHeadersAndSetupArgs(inputString, delimiter, options, fieldsToKeep, atEndOfFile)
parserArgs.buffered = true
Expand All @@ -581,17 +594,16 @@ function ftcsv.parseLine(inputFile, delimiter, bufferSize, userOptions)
end

-- read more of the input
buffer = file:read(bufferSize)
if not buffer then
file:close()
return nil
else
-- TODO: see if there's a noticable difference between the
-- function call and doing it directly.
-- parserArgs.endOfFile = determineAtEndOfFile(file, fileSize)
if file:seek() == fileSize then
parserArgs.endOfFile = true
if file then
buffer = file:read(options.bufferSize)
if not buffer then
file:close()
return nil
else
parserArgs.endOfFile = determineAtEndOfFile(file, fileSize)
end
else
return nil
end

-- appends the new input to what was left over
Expand Down
4 changes: 2 additions & 2 deletions spec/error_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ describe("parseLine features small, nonworking buffer size", function()
it("should error out when trying to load from string", function()
local test = function()
local parse = {}
for i, line in ftcsv.parseLine("a,b,c\n1,2,3", ",", 63, {loadFromString=true}) do
for i, line in ftcsv.parseLine("a,b,c\n1,2,3", ",", {loadFromString=true}) do
parse[i] = line
end
return parse
Expand All @@ -61,4 +61,4 @@ it("should error when dealing with quotes", function()
local actual = ftcsv.parse('a,b,c\n"apple,banana,carrot', ",", {loadFromString=true})
end
assert.has_error(test, "ftcsv: can't find closing quote in row 1. Try running with the option ignoreQuotes=true if the source incorrectly uses quotes.")
end)
end)
12 changes: 11 additions & 1 deletion spec/feature_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,16 @@ describe("csv features", function()
assert.are.same(expected, actual)
end)

it("should handle escaped doublequotes", function()
local expected = {}
expected[1] = {}
expected[1].a = 'A"B""C'
expected[1].b = 'A""B"C'
expected[1].c = 'A"""B""C'
local actual = ftcsv.parse('a;b;c\n"A""B""""C";"A""""B""C";"A""""""B""""C"', ";", {loadFromString=true})
assert.are.same(expected, actual)
end)

it("should handle renaming a field", function()
local expected = {}
expected[1] = {}
Expand Down Expand Up @@ -328,4 +338,4 @@ describe("csv features", function()
assert.are.same(expected, actual)
end)

end)
end)
12 changes: 6 additions & 6 deletions spec/parseLine_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ describe("parseLine features small, working buffer size", function()
local json = loadFile("spec/json/correctness.json")
json = cjson.decode(json)
local parse = {}
for i, line in ftcsv.parseLine("spec/csvs/correctness.csv", ",", 52) do
for i, line in ftcsv.parseLine("spec/csvs/correctness.csv", ",", {bufferSize=52}) do
assert.are.same(json[i], line)
parse[i] = line
end
Expand All @@ -27,7 +27,7 @@ describe("parseLine features small, nonworking buffer size", function()
it("should handle correctness", function()
local test = function()
local parse = {}
for i, line in ftcsv.parseLine("spec/csvs/correctness.csv", ",", 63) do
for i, line in ftcsv.parseLine("spec/csvs/correctness.csv", ",", {bufferSize=63}) do
parse[i] = line
end
return parse
Expand All @@ -40,7 +40,7 @@ describe("parseLine features smaller, nonworking buffer size", function()
it("should handle correctness", function()
local test = function()
local parse = {}
for i, line in ftcsv.parseLine("spec/csvs/correctness.csv", ",", 50) do
for i, line in ftcsv.parseLine("spec/csvs/correctness.csv", ",", {bufferSize=50}) do
parse[i] = line
end
return parse
Expand All @@ -53,7 +53,7 @@ describe("smaller bufferSize than header and incorrect number of fields", functi
it("should handle correctness", function()
local test = function()
local parse = {}
for i, line in ftcsv.parseLine("spec/csvs/correctness.csv", ",", 23) do
for i, line in ftcsv.parseLine("spec/csvs/correctness.csv", ",", {bufferSize=23}) do
parse[i] = line
end
return parse
Expand All @@ -66,11 +66,11 @@ describe("smaller bufferSize than header, but with correct field numbers", funct
it("should handle correctness", function()
local test = function()
local parse = {}
for i, line in ftcsv.parseLine("spec/csvs/correctness.csv", ",", 30) do
for i, line in ftcsv.parseLine("spec/csvs/correctness.csv", ",", {bufferSize=30}) do
parse[i] = line
end
return parse
end
assert.has_error(test, "ftcsv: bufferSize needs to be larger to parse this file")
end)
end)
end)
4 changes: 2 additions & 2 deletions spec/parse_encode_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ describe("csv parseLine decode", function()
local json = loadFile("spec/json/" .. value .. ".json")
json = cjson.decode(json)
local parse = {}
for i, v in ftcsv.parseLine("spec/csvs/" .. value .. ".csv", ",", 1024) do
for i, v in ftcsv.parseLine("spec/csvs/" .. value .. ".csv", ",") do
parse[i] = v
assert.are.same(json[i], v)
end
Expand Down Expand Up @@ -86,4 +86,4 @@ describe("csv encode", function()
assert.are.same(jsonDecode, reEncoded)
end)
end
end)
end)

0 comments on commit 4732944

Please sign in to comment.