From 6f1b192b85da646eb6afe947fbd95db032e49605 Mon Sep 17 00:00:00 2001 From: Shakil Thakur Date: Fri, 29 Mar 2019 23:29:20 -0500 Subject: [PATCH] moved out/refactored a lot of the encoder --- encoder.lua | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++++ ftcsv.lua | 105 ++------------------------------------------ 2 files changed, 126 insertions(+), 101 deletions(-) create mode 100644 encoder.lua diff --git a/encoder.lua b/encoder.lua new file mode 100644 index 0000000..fd8f737 --- /dev/null +++ b/encoder.lua @@ -0,0 +1,122 @@ +-- CSV Encoder for ftcsv + +-- lua/luajit load compat +local M = {} +if type(jit) == 'table' or _ENV then + M.load = _G.load +else + M.load = loadstring +end + +local function delimitField(field) + field = tostring(field) + if field:find('"') then + return field:gsub('"', '""') + else + return field + end +end + +local function escapeHeadersForLuaGenerator(headers) + local escapedHeaders = {} + for i = 1, #headers do + if headers[i]:find('"') then + escapedHeaders[i] = headers[i]:gsub('"', '\\"') + else + escapedHeaders[i] = headers[i] + end + end + return escapedHeaders +end + +-- a function that compiles some lua code to quickly print out the csv +local function csvLineGenerator(inputTable, delimiter, headers) + local escapedHeaders = escapeHeadersForLuaGenerator(headers) + + local outputFunc = [[ + local args, i = ... + i = i + 1; + if i > ]] .. #inputTable .. [[ then return nil end; + return i, '"' .. args.delimitField(args.t[i]["]] .. + table.concat(escapedHeaders, [["]) .. '"]] .. + delimiter .. [["' .. args.delimitField(args.t[i]["]]) .. + [["]) .. '"\r\n']] + + local arguments = {} + arguments.t = inputTable + -- we shouldn't redefine delimitField for every line in + -- the csv, so we'll just pass it in here and reference it + arguments.delimitField = delimitField + + return M.load(outputFunc), arguments, 0 + +end + +local function validateHeaders(headers, inputTable) + for i = 1, #headers do + if inputTable[1][headers[i]] == nil then + error("ftcsv: the field '" .. headers[i] .. "' doesn't exist in the inputTable") + end + end +end + +local function initializeOutputWithEscapedHeaders(escapedHeaders, delimiter) + local output = {} + output[1] = '"' .. table.concat(escapedHeaders, '"' .. delimiter .. '"') .. '"\r\n' + return output +end + +local function escapeHeadersForOutput(headers) + local escapedHeaders = {} + for i = 1, #headers do + escapedHeaders[i] = delimitField(headers[i]) + end + return escapedHeaders +end + +local function extractHeadersFromTable(inputTable) + local headers = {} + for key, _ in pairs(inputTable[1]) do + headers[#headers+1] = key + end + + -- lets make the headers alphabetical + table.sort(headers) + + return headers +end + +local function getHeadersFromOptions(options) + local headers = nil + if options then + if options.fieldsToKeep ~= nil then + assert(type(options.fieldsToKeep) == "table", "ftcsv only takes in a list (as a table) for the optional parameter 'fieldsToKeep'. You passed in '" .. tostring(options.headers) .. "' of type '" .. type(options.headers) .. "'.") + headers = options.fieldsToKeep + end + end + return headers +end + +-- works really quickly with luajit-2.1, because table.concat life +local function encode(inputTable, delimiter, options) + -- delimiter MUST be one character + assert(#delimiter == 1 and type(delimiter) == "string", "the delimiter must be of string type and exactly one character") + + local headers = getHeadersFromOptions(options) + if headers == nil then + headers = extractHeadersFromTable(inputTable) + end + validateHeaders(headers, inputTable) + + local escapedHeaders = escapeHeadersForOutput(headers) + local output = initializeOutputWithEscapedHeaders(escapedHeaders, delimiter) + + for i, line in csvLineGenerator(inputTable, delimiter, headers) do + output[i+1] = line + end + + -- combine and return final string + return table.concat(output) +end + +return encode diff --git a/ftcsv.lua b/ftcsv.lua index 19bb48e..2299869 100644 --- a/ftcsv.lua +++ b/ftcsv.lua @@ -1,5 +1,5 @@ local ftcsv = { - _VERSION = 'ftcsv 1.1.5', + _VERSION = 'ftcsv 1.2.0', _DESCRIPTION = 'CSV library for Lua', _URL = 'https://github.com/FourierTransformer/ftcsv', _LICENSE = [[ @@ -24,16 +24,12 @@ local ftcsv = { LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ]] + ]], + encode = require("encoder") } --- lua 5.1 load compat +-- luajit/lua compatability layer local M = {} -if type(jit) == 'table' or _ENV then - M.load = _G.load -else - M.load = loadstring -end -- perf local sbyte = string.byte @@ -541,98 +537,5 @@ function ftcsv.parseLine(inputFile, delimiter, bufferSize, options) end end --- a function that delimits " to "", used by the writer -local function delimitField(field) - field = tostring(field) - if field:find('"') then - return field:gsub('"', '""') - else - return field - end -end - --- a function that compiles some lua code to quickly print out the csv -local function writer(inputTable, dilimeter, headers) - -- they get re-created here if they need to be escaped so lua understands it based on how - -- they came in - for i = 1, #headers do - if inputTable[1][headers[i]] == nil then - error("ftcsv: the field '" .. headers[i] .. "' doesn't exist in the inputTable") - end - if headers[i]:find('"') then - headers[i] = headers[i]:gsub('"', '\\"') - end - end - - local outputFunc = [[ - local state, i = ... - local d = state.delimitField - i = i + 1; - if i > state.tableSize then return nil end; - return i, '"' .. d(state.t[i]["]] .. table.concat(headers, [["]) .. '"]] .. dilimeter .. [["' .. d(state.t[i]["]]) .. [["]) .. '"\r\n']] - - -- print(outputFunc) - - local state = {} - state.t = inputTable - state.tableSize = #inputTable - state.delimitField = delimitField - - return M.load(outputFunc), state, 0 - -end - --- takes the values from the headers in the first row of the input table -local function extractHeaders(inputTable) - local headers = {} - for key, _ in pairs(inputTable[1]) do - headers[#headers+1] = key - end - - -- lets make the headers alphabetical - table.sort(headers) - - return headers -end - --- turns a lua table into a csv --- works really quickly with luajit-2.1, because table.concat life -function ftcsv.encode(inputTable, delimiter, options) - local output = {} - - -- dilimeter MUST be one character - assert(#delimiter == 1 and type(delimiter) == "string", "the delimiter must be of string type and exactly one character") - - -- grab the headers from the options if they are there - local headers = nil - if options then - if options.fieldsToKeep ~= nil then - assert(type(options.fieldsToKeep) == "table", "ftcsv only takes in a list (as a table) for the optional parameter 'fieldsToKeep'. You passed in '" .. tostring(options.headers) .. "' of type '" .. type(options.headers) .. "'.") - headers = options.fieldsToKeep - end - end - if headers == nil then - headers = extractHeaders(inputTable) - end - - -- newHeaders are needed if there are quotes within the header - -- because they need to be escaped - local newHeaders = {} - for i = 1, #headers do - if headers[i]:find('"') then - newHeaders[i] = headers[i]:gsub('"', '""') - else - newHeaders[i] = headers[i] - end - end - output[1] = '"' .. table.concat(newHeaders, '"' .. delimiter .. '"') .. '"\r\n' - - -- add each line by line. - for i, line in writer(inputTable, delimiter, headers) do - output[i+1] = line - end - return table.concat(output) -end - return ftcsv