diff --git a/.travis.yml b/.travis.yml index 2014ce0..35bed36 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,5 @@ node_js: - "10" - "8" - "6" - - "4" after_success: 'npm run coveralls' script: "npm run travis" \ No newline at end of file diff --git a/bin/options.json b/bin/options.json index d0db175..220b457 100644 --- a/bin/options.json +++ b/bin/options.json @@ -76,6 +76,14 @@ "--alwaysSplitAtEOL":{ "desc": "Always interpret each line (as defined by eol) as a row. This will prevent eol characters from being used within a row (even inside a quoted field). This ensures that misplaced quotes only break on row, and not all ensuing rows.", "type": "boolean" + }, + "--nullObject":{ + "desc":"How to parse if a csv cell contains 'null'. Default false will keep 'null' as string. Change to true if a null object is needed.", + "type":"boolean" + }, + "--downstreamFormat":{ + "desc":"Option to set what JSON array format is needed by downstream. 'line' is also called ndjson format. This format will write lines of JSON (without square brackets and commas) to downstream. 'array' will write complete JSON array string to downstream (suitable for file writable stream etc). Default 'line'", + "type":"string" } }, "examples": [ diff --git a/readme.md b/readme.md index 2c8c239..b41bd3d 100644 --- a/readme.md +++ b/readme.md @@ -246,8 +246,10 @@ Following parameters are supported: * **includeColumns**: This parameter instructs the parser to include only those columns as specified by the regular expression. Example: /(name|age)/ will parse and include columns whose header contains "name" or "age" * **ignoreColumns**: This parameter instructs the parser to ignore columns as specified by the regular expression. Example: /(name|age)/ will ignore columns whose header contains "name" or "age" * **colParser**: Allows override parsing logic for a specific column. It accepts a JSON object with fields like: `headName: ` . e.g. {field1:'number'} will use built-in number parser to convert value of the `field1` column to number. For more information See [details below](#column-parser) -* **alwaysSplitAtEOL**: Always interpret each line (as defined by `eol`) as a row. This will prevent `eol` characters from being used within a row (even inside a quoted field). This ensures that misplaced quotes only break on row, and not all ensuing rows. - +* **alwaysSplitAtEOL**: Always interpret each line (as defined by `eol` like `\n`) as a row. This will prevent `eol` characters from being used within a row (even inside a quoted field). Default is false. Change to true if you are confident no inline line breaks (like line break in a cell which has multi line text). +* **nullObject**: How to parse if a csv cell contains "null". Default false will keep "null" as string. Change to true if a null object is needed. +* **downstreamFormat**: Option to set what JSON array format is needed by downstream. "line" is also called ndjson format. This format will write lines of JSON (without square brackets and commas) to downstream. "array" will write complete JSON array string to downstream (suitable for file writable stream etc). Default "line" +* **needEmitAll**: Parser will build JSON result is `.then` is called (or await is used). If this is not desired, set this to false. Default is true. All parameters can be used in Command Line tool. ## Asynchronouse Result Process diff --git a/src/Converter.ts b/src/Converter.ts index 4d84593..83f076e 100644 --- a/src/Converter.ts +++ b/src/Converter.ts @@ -16,7 +16,7 @@ import { bufFromString } from "./util"; -export class Converter extends Transform implements PromiseLike> { +export class Converter extends Transform implements PromiseLike { preRawData(onRawData: PreRawDataCallback): Converter { this.runtime.preRawDataHook = onRawData; return this; @@ -115,11 +115,11 @@ export class Converter extends Transform implements PromiseLike> { // } this.once("error", (err: any) => { // console.log("BBB"); - - setTimeout(() => { + //wait for next cycle to emit the errors. + setImmediate(() => { this.result.processError(err); this.emit("done", err); - }, 0); + }); }); this.once("done", () => { diff --git a/src/Parameters.ts b/src/Parameters.ts index 6515f0b..0c42cbb 100644 --- a/src/Parameters.ts +++ b/src/Parameters.ts @@ -66,13 +66,26 @@ export interface CSVParseParam { */ eol?: string; /** - * Always interpret each line (as defined by eol) as a row. This will prevent eol characters from being used within a row (even inside a quoted field). This ensures that misplaced quotes only break on row, and not all ensuing rows. + * Always interpret each line (as defined by eol) as a row. This will prevent eol characters from being used within a row (even inside a quoted field). Default is false. Change to true if you are confident no inline line breaks (like line break in a cell which has multi line text) */ alwaysSplitAtEOL: boolean; /** * The format to be converted to. "json" (default) -- convert csv to json. "csv" -- convert csv to csv row array. "line" -- convert csv to csv line string */ output: "json" | "csv" | "line"; + + /** + * Convert string "null" to null object in JSON outputs. Default is false. + */ + nullObject:boolean; + /** + * Define the format required by downstream (this parameter does not work if objectMode is on). `line` -- json is emitted in a single line separated by a line breake like "json1\njson2" . `array` -- downstream requires array format like "[json1,json2]". Default is line. + */ + downstreamFormat: "line" | "array"; + /** + * Define whether .then(callback) returns all JSON data in its callback. Default is true. Change to false to save memory if subscribing json lines. + */ + needEmitAll: boolean; } export type CellParser = (item: string, head: string, resultRow: any, row: string[], columnIndex: number) => any; @@ -101,7 +114,10 @@ export function mergeParams(params?: Partial): CSVParseParam { colParser: {}, eol: undefined, alwaysSplitAtEOL: false, - output: "json" + output: "json", + nullObject: false, + downstreamFormat:"line", + needEmitAll:true } if (!params) { params = {}; diff --git a/src/Result.test.ts b/src/Result.test.ts new file mode 100644 index 0000000..9797c07 --- /dev/null +++ b/src/Result.test.ts @@ -0,0 +1,22 @@ +import {Result} from "./Result"; +import { Converter } from "./Converter"; +import P from "bluebird"; +import {readFileSync} from "fs"; +import path from "path"; +import assert from "assert"; +import { JSONResult } from "./lineToJson"; +const dataDir=path.join(__dirname,"../test/data/"); + +describe("Result",()=>{ + it ("should return need push downstream based on needEmitAll parameter",function (){ + const conv=new Converter(); + const res=new Result(conv); + assert.equal(res["needEmitAll"],false); + conv.then(); + assert.equal(res["needEmitAll"],true); + conv.parseParam.needEmitAll=false; + assert.equal(res["needEmitAll"],false); + }); + +}) + diff --git a/src/Result.ts b/src/Result.ts index 63ffc10..7c376ab 100644 --- a/src/Result.ts +++ b/src/Result.ts @@ -2,7 +2,7 @@ import { Converter } from "./Converter"; import { ProcessLineResult } from "./Processor"; import P from "bluebird"; import CSVError from "./CSVError"; - +import { EOL } from "os"; export class Result { private get needEmitLine(): boolean { return !!this.converter.parseRuntime.subscribe && !!this.converter.parseRuntime.subscribe.onNext || this.needPushDownstream @@ -15,12 +15,18 @@ export class Result { return this._needPushDownstream; } private get needEmitAll(): boolean { - return !!this.converter.parseRuntime.then; + return !!this.converter.parseRuntime.then && this.converter.parseParam.needEmitAll; + // return !!this.converter.parseRuntime.then; } private finalResult: any[] = []; constructor(private converter: Converter) { } processResult(resultLines: ProcessLineResult[]): P { const startPos = this.converter.parseRuntime.parsedLineNumber; + if (this.needPushDownstream && this.converter.parseParam.downstreamFormat === "array") { + if (startPos === 0) { + pushDownstream(this.converter, "[" + EOL); + } + } // let prom: P; return new P((resolve, reject) => { if (this.needEmitLine) { @@ -60,14 +66,20 @@ export class Result { } } endProcess() { - if (this.needEmitAll) { + if (this.converter.parseRuntime.then && this.converter.parseRuntime.then.onfulfilled) { - this.converter.parseRuntime.then.onfulfilled(this.finalResult); + if (this.needEmitAll) { + this.converter.parseRuntime.then.onfulfilled(this.finalResult); + }else{ + this.converter.parseRuntime.then.onfulfilled([]); + } } - } if (this.converter.parseRuntime.subscribe && this.converter.parseRuntime.subscribe.onCompleted) { this.converter.parseRuntime.subscribe.onCompleted(); } + if (this.needPushDownstream && this.converter.parseParam.downstreamFormat === "array") { + pushDownstream(this.converter, "]" + EOL); + } } } @@ -94,15 +106,15 @@ function processLineByLine( }, cb); } else { // processRecursive(lines, hook, conv, offset, needPushDownstream, cb, nextLine, false); - if (needPushDownstream){ - pushDownstream(conv,nextLine); + if (needPushDownstream) { + pushDownstream(conv, nextLine); } - while (offset -1) { - if (conv.parseParam.colParser[head] && (conv.parseParam.colParser[head] as ColumnParam).flat) { + const headArr=head.split("."); + let jsonHead=true; + while(headArr.length>0){ + const headCom=headArr.shift(); + if (headCom!.length===0){ + jsonHead=false; + break; + } + } + if (!jsonHead || conv.parseParam.colParser[head] && (conv.parseParam.colParser[head] as ColumnParam).flat) { conv.parseRuntime.columnValueSetter[headIdx] = flatSetter; } else { conv.parseRuntime.columnValueSetter[headIdx] = jsonSetter; @@ -125,6 +135,9 @@ function setPath(resultJson: any, head: string, value: any, conv: Converter,head } } } + if (conv.parseParam.nullObject ===true && value ==="null"){ + value=null; + } conv.parseRuntime.columnValueSetter[headIdx](resultJson, head, value); // flatSetter(resultJson, head, value); diff --git a/test/testCSVConverter3.ts b/test/testCSVConverter3.ts index b52b43f..6fb9c05 100644 --- a/test/testCSVConverter3.ts +++ b/test/testCSVConverter3.ts @@ -1,5 +1,5 @@ import csv from "../src"; -var assert = require("assert"); +import assert from "assert"; var fs = require("fs"); import { sandbox } from "sinon"; import CSVError from "../src/CSVError"; @@ -231,13 +231,93 @@ describe("testCSVConverter3", function () { it("should parse header with quotes correctly", function () { var testData = __dirname + "/data/csvWithUnclosedHeader"; return csv({ - headers:["exam_date","sample_no","status","sample_type","patient_id","last_name","first_name","gender_of_patient","patient_birth_date","patient_note","patient_department","accession_number","sample_site","physician","operator","department","note","test_order_code","draw_time","approval_status","approval_time","report_layout","patient_account_number","none_1","errors_detected_during_measurement","age","error_code_01","weight","error_code_02","height","error_code_03","hcg_beta_p","error_code_04","troponin_i_p","error_code_05","ck_mb_p","error_code_06","d_dimer_p","error_code_07","hscrp_p","error_code_08","myoglobin_p","error_code_09","nt_probnp","error_code_10","crp","error_code_11","bnp","error_code_12","tnt","error_code_13","demo_p","error_code_14","pct","error_code_15"] + headers: ["exam_date", "sample_no", "status", "sample_type", "patient_id", "last_name", "first_name", "gender_of_patient", "patient_birth_date", "patient_note", "patient_department", "accession_number", "sample_site", "physician", "operator", "department", "note", "test_order_code", "draw_time", "approval_status", "approval_time", "report_layout", "patient_account_number", "none_1", "errors_detected_during_measurement", "age", "error_code_01", "weight", "error_code_02", "height", "error_code_03", "hcg_beta_p", "error_code_04", "troponin_i_p", "error_code_05", "ck_mb_p", "error_code_06", "d_dimer_p", "error_code_07", "hscrp_p", "error_code_08", "myoglobin_p", "error_code_09", "nt_probnp", "error_code_10", "crp", "error_code_11", "bnp", "error_code_12", "tnt", "error_code_13", "demo_p", "error_code_14", "pct", "error_code_15"] }) - .fromFile(testData) + .fromFile(testData) + .then((d) => { + assert.equal(d.length, 2); + assert.equal(d[0].sample_no, "12669"); + }) + + }); + it ("should stream json string correctly",function(done){ + const data=`a,b,c +1,2,3 +4,5,6` + let hasLeftBracket=false; + let hasRightBracket=false; + csv({ + downstreamFormat:"array" + }) + .fromString(data) + .on("data",(d)=>{ + const str=d.toString(); + if (str[0]==="[" && str.length ===2){ + hasLeftBracket=true; + }else if (str[0]==="]" && str.length===2){ + hasRightBracket=true; + }else{ + assert.equal(str[str.length-2],","); + } + + }) + .on("end",()=>{ + assert.equal(hasLeftBracket,true); + assert.equal(hasRightBracket,true); + done(); + }) + }) + it ("should stream json line correctly",function(done){ + const data=`a,b,c +1,2,3 +4,5,6` + csv({ + downstreamFormat:"line" + }) + .fromString(data) + .on("data",(d)=>{ + const str=d.toString(); + + assert.notEqual(str[str.length-2],","); + }) + .on("end",()=>{ + done(); + }) + }) + it ("should not send json if needEmitAll is false",async function(){ + const data=`a,b,c +1,2,3 +4,5,6` + return csv({ + needEmitAll:false + }) + .fromString(data) .then((d)=>{ - assert.equal(d.length,2); - assert.equal(d[0].sample_no,"12669"); + assert(d.length===0); }) - - }); + }) + it ("should convert null to null object",async function(){ + const data=`a,b,c +null,2,3 +4,5,6` + return csv({ + nullObject:true + }) + .fromString(data) + .then((d)=>{ + assert.equal(d[0].a,null) + }) + }) + it ("should process period properly",async function(){ + const data=`a..,b,c +1,2,3 +4,5,6` + return csv({ + }) + .fromString(data) + .then((d)=>{ + assert.equal(d[0]["a.."],1); + assert.equal(d[1]["a.."],4); + }) + }) });