Navigation Menu

Skip to content

Commit

Permalink
fix bugs; added array format ; added nullobject ; added emitall
Browse files Browse the repository at this point in the history
  • Loading branch information
Keyang committed Jun 26, 2019
1 parent 6a2d572 commit 306c123
Show file tree
Hide file tree
Showing 9 changed files with 184 additions and 31 deletions.
1 change: 0 additions & 1 deletion .travis.yml
Expand Up @@ -3,6 +3,5 @@ node_js:
- "10"
- "8"
- "6"
- "4"
after_success: 'npm run coveralls'
script: "npm run travis"
8 changes: 8 additions & 0 deletions bin/options.json
Expand Up @@ -76,6 +76,14 @@
"--alwaysSplitAtEOL":{
"desc": "Always interpret each line (as defined by eol) as a row. This will prevent eol characters from being used within a row (even inside a quoted field). This ensures that misplaced quotes only break on row, and not all ensuing rows.",
"type": "boolean"
},
"--nullObject":{
"desc":"How to parse if a csv cell contains 'null'. Default false will keep 'null' as string. Change to true if a null object is needed.",
"type":"boolean"
},
"--downstreamFormat":{
"desc":"Option to set what JSON array format is needed by downstream. 'line' is also called ndjson format. This format will write lines of JSON (without square brackets and commas) to downstream. 'array' will write complete JSON array string to downstream (suitable for file writable stream etc). Default 'line'",
"type":"string"
}
},
"examples": [
Expand Down
6 changes: 4 additions & 2 deletions readme.md
Expand Up @@ -246,8 +246,10 @@ Following parameters are supported:
* **includeColumns**: This parameter instructs the parser to include only those columns as specified by the regular expression. Example: /(name|age)/ will parse and include columns whose header contains "name" or "age"
* **ignoreColumns**: This parameter instructs the parser to ignore columns as specified by the regular expression. Example: /(name|age)/ will ignore columns whose header contains "name" or "age"
* **colParser**: Allows override parsing logic for a specific column. It accepts a JSON object with fields like: `headName: <String | Function | ColParser>` . e.g. {field1:'number'} will use built-in number parser to convert value of the `field1` column to number. For more information See [details below](#column-parser)
* **alwaysSplitAtEOL**: Always interpret each line (as defined by `eol`) as a row. This will prevent `eol` characters from being used within a row (even inside a quoted field). This ensures that misplaced quotes only break on row, and not all ensuing rows.

* **alwaysSplitAtEOL**: Always interpret each line (as defined by `eol` like `\n`) as a row. This will prevent `eol` characters from being used within a row (even inside a quoted field). Default is false. Change to true if you are confident no inline line breaks (like line break in a cell which has multi line text).
* **nullObject**: How to parse if a csv cell contains "null". Default false will keep "null" as string. Change to true if a null object is needed.
* **downstreamFormat**: Option to set what JSON array format is needed by downstream. "line" is also called ndjson format. This format will write lines of JSON (without square brackets and commas) to downstream. "array" will write complete JSON array string to downstream (suitable for file writable stream etc). Default "line"
* **needEmitAll**: Parser will build JSON result is `.then` is called (or await is used). If this is not desired, set this to false. Default is true.
All parameters can be used in Command Line tool.

## Asynchronouse Result Process
Expand Down
8 changes: 4 additions & 4 deletions src/Converter.ts
Expand Up @@ -16,7 +16,7 @@ import { bufFromString } from "./util";



export class Converter extends Transform implements PromiseLike<Array<any>> {
export class Converter extends Transform implements PromiseLike<any[]> {
preRawData(onRawData: PreRawDataCallback): Converter {
this.runtime.preRawDataHook = onRawData;
return this;
Expand Down Expand Up @@ -115,11 +115,11 @@ export class Converter extends Transform implements PromiseLike<Array<any>> {
// }
this.once("error", (err: any) => {
// console.log("BBB");

setTimeout(() => {
//wait for next cycle to emit the errors.
setImmediate(() => {
this.result.processError(err);
this.emit("done", err);
}, 0);
});

});
this.once("done", () => {
Expand Down
20 changes: 18 additions & 2 deletions src/Parameters.ts
Expand Up @@ -66,13 +66,26 @@ export interface CSVParseParam {
*/
eol?: string;
/**
* Always interpret each line (as defined by eol) as a row. This will prevent eol characters from being used within a row (even inside a quoted field). This ensures that misplaced quotes only break on row, and not all ensuing rows.
* Always interpret each line (as defined by eol) as a row. This will prevent eol characters from being used within a row (even inside a quoted field). Default is false. Change to true if you are confident no inline line breaks (like line break in a cell which has multi line text)
*/
alwaysSplitAtEOL: boolean;
/**
* The format to be converted to. "json" (default) -- convert csv to json. "csv" -- convert csv to csv row array. "line" -- convert csv to csv line string
*/
output: "json" | "csv" | "line";

/**
* Convert string "null" to null object in JSON outputs. Default is false.
*/
nullObject:boolean;
/**
* Define the format required by downstream (this parameter does not work if objectMode is on). `line` -- json is emitted in a single line separated by a line breake like "json1\njson2" . `array` -- downstream requires array format like "[json1,json2]". Default is line.
*/
downstreamFormat: "line" | "array";
/**
* Define whether .then(callback) returns all JSON data in its callback. Default is true. Change to false to save memory if subscribing json lines.
*/
needEmitAll: boolean;
}

export type CellParser = (item: string, head: string, resultRow: any, row: string[], columnIndex: number) => any;
Expand Down Expand Up @@ -101,7 +114,10 @@ export function mergeParams(params?: Partial<CSVParseParam>): CSVParseParam {
colParser: {},
eol: undefined,
alwaysSplitAtEOL: false,
output: "json"
output: "json",
nullObject: false,
downstreamFormat:"line",
needEmitAll:true
}
if (!params) {
params = {};
Expand Down
22 changes: 22 additions & 0 deletions src/Result.test.ts
@@ -0,0 +1,22 @@
import {Result} from "./Result";
import { Converter } from "./Converter";
import P from "bluebird";
import {readFileSync} from "fs";
import path from "path";
import assert from "assert";
import { JSONResult } from "./lineToJson";
const dataDir=path.join(__dirname,"../test/data/");

describe("Result",()=>{
it ("should return need push downstream based on needEmitAll parameter",function (){
const conv=new Converter();
const res=new Result(conv);
assert.equal(res["needEmitAll"],false);
conv.then();
assert.equal(res["needEmitAll"],true);
conv.parseParam.needEmitAll=false;
assert.equal(res["needEmitAll"],false);
});

})

41 changes: 27 additions & 14 deletions src/Result.ts
Expand Up @@ -2,7 +2,7 @@ import { Converter } from "./Converter";
import { ProcessLineResult } from "./Processor";
import P from "bluebird";
import CSVError from "./CSVError";

import { EOL } from "os";
export class Result {
private get needEmitLine(): boolean {
return !!this.converter.parseRuntime.subscribe && !!this.converter.parseRuntime.subscribe.onNext || this.needPushDownstream
Expand All @@ -15,12 +15,18 @@ export class Result {
return this._needPushDownstream;
}
private get needEmitAll(): boolean {
return !!this.converter.parseRuntime.then;
return !!this.converter.parseRuntime.then && this.converter.parseParam.needEmitAll;
// return !!this.converter.parseRuntime.then;
}
private finalResult: any[] = [];
constructor(private converter: Converter) { }
processResult(resultLines: ProcessLineResult[]): P<any> {
const startPos = this.converter.parseRuntime.parsedLineNumber;
if (this.needPushDownstream && this.converter.parseParam.downstreamFormat === "array") {
if (startPos === 0) {
pushDownstream(this.converter, "[" + EOL);
}
}
// let prom: P<any>;
return new P((resolve, reject) => {
if (this.needEmitLine) {
Expand Down Expand Up @@ -60,14 +66,20 @@ export class Result {
}
}
endProcess() {
if (this.needEmitAll) {

if (this.converter.parseRuntime.then && this.converter.parseRuntime.then.onfulfilled) {
this.converter.parseRuntime.then.onfulfilled(this.finalResult);
if (this.needEmitAll) {
this.converter.parseRuntime.then.onfulfilled(this.finalResult);
}else{
this.converter.parseRuntime.then.onfulfilled([]);
}
}
}
if (this.converter.parseRuntime.subscribe && this.converter.parseRuntime.subscribe.onCompleted) {
this.converter.parseRuntime.subscribe.onCompleted();
}
if (this.needPushDownstream && this.converter.parseParam.downstreamFormat === "array") {
pushDownstream(this.converter, "]" + EOL);
}
}
}

Expand All @@ -94,15 +106,15 @@ function processLineByLine(
}, cb);
} else {
// processRecursive(lines, hook, conv, offset, needPushDownstream, cb, nextLine, false);
if (needPushDownstream){
pushDownstream(conv,nextLine);
if (needPushDownstream) {
pushDownstream(conv, nextLine);
}
while (offset<lines.length){
const line=lines[offset];
while (offset < lines.length) {
const line = lines[offset];
hook(line, conv.parseRuntime.parsedLineNumber + offset);
offset++;
if (needPushDownstream){
pushDownstream(conv,line);
if (needPushDownstream) {
pushDownstream(conv, line);
}
}
cb();
Expand All @@ -116,11 +128,11 @@ function processLineByLine(
// }
} else {
if (needPushDownstream) {
while (offset<lines.length) {
while (offset < lines.length) {
const line = lines[offset++];
pushDownstream(conv, line);
}

}
cb();
}
Expand All @@ -144,7 +156,8 @@ function processRecursive(
}
function pushDownstream(conv: Converter, res: ProcessLineResult) {
if (typeof res === "object" && !conv.options.objectMode) {
conv.push(JSON.stringify(res) + "\n", "utf8");
const data = JSON.stringify(res);
conv.push(data + (conv.parseParam.downstreamFormat === "array" ? "," + EOL : EOL), "utf8");
} else {
conv.push(res);
}
Expand Down
15 changes: 14 additions & 1 deletion src/lineToJson.ts
Expand Up @@ -114,8 +114,18 @@ function setPath(resultJson: any, head: string, value: any, conv: Converter,head
if (conv.parseParam.flatKeys) {
conv.parseRuntime.columnValueSetter[headIdx] = flatSetter;
} else {

if (head.indexOf(".") > -1) {
if (conv.parseParam.colParser[head] && (conv.parseParam.colParser[head] as ColumnParam).flat) {
const headArr=head.split(".");
let jsonHead=true;
while(headArr.length>0){
const headCom=headArr.shift();
if (headCom!.length===0){
jsonHead=false;
break;
}
}
if (!jsonHead || conv.parseParam.colParser[head] && (conv.parseParam.colParser[head] as ColumnParam).flat) {
conv.parseRuntime.columnValueSetter[headIdx] = flatSetter;
} else {
conv.parseRuntime.columnValueSetter[headIdx] = jsonSetter;
Expand All @@ -125,6 +135,9 @@ function setPath(resultJson: any, head: string, value: any, conv: Converter,head
}
}
}
if (conv.parseParam.nullObject ===true && value ==="null"){
value=null;
}
conv.parseRuntime.columnValueSetter[headIdx](resultJson, head, value);
// flatSetter(resultJson, head, value);

Expand Down
94 changes: 87 additions & 7 deletions test/testCSVConverter3.ts
@@ -1,5 +1,5 @@
import csv from "../src";
var assert = require("assert");
import assert from "assert";
var fs = require("fs");
import { sandbox } from "sinon";
import CSVError from "../src/CSVError";
Expand Down Expand Up @@ -231,13 +231,93 @@ describe("testCSVConverter3", function () {
it("should parse header with quotes correctly", function () {
var testData = __dirname + "/data/csvWithUnclosedHeader";
return csv({
headers:["exam_date","sample_no","status","sample_type","patient_id","last_name","first_name","gender_of_patient","patient_birth_date","patient_note","patient_department","accession_number","sample_site","physician","operator","department","note","test_order_code","draw_time","approval_status","approval_time","report_layout","patient_account_number","none_1","errors_detected_during_measurement","age","error_code_01","weight","error_code_02","height","error_code_03","hcg_beta_p","error_code_04","troponin_i_p","error_code_05","ck_mb_p","error_code_06","d_dimer_p","error_code_07","hscrp_p","error_code_08","myoglobin_p","error_code_09","nt_probnp","error_code_10","crp","error_code_11","bnp","error_code_12","tnt","error_code_13","demo_p","error_code_14","pct","error_code_15"]
headers: ["exam_date", "sample_no", "status", "sample_type", "patient_id", "last_name", "first_name", "gender_of_patient", "patient_birth_date", "patient_note", "patient_department", "accession_number", "sample_site", "physician", "operator", "department", "note", "test_order_code", "draw_time", "approval_status", "approval_time", "report_layout", "patient_account_number", "none_1", "errors_detected_during_measurement", "age", "error_code_01", "weight", "error_code_02", "height", "error_code_03", "hcg_beta_p", "error_code_04", "troponin_i_p", "error_code_05", "ck_mb_p", "error_code_06", "d_dimer_p", "error_code_07", "hscrp_p", "error_code_08", "myoglobin_p", "error_code_09", "nt_probnp", "error_code_10", "crp", "error_code_11", "bnp", "error_code_12", "tnt", "error_code_13", "demo_p", "error_code_14", "pct", "error_code_15"]
})
.fromFile(testData)
.fromFile(testData)
.then((d) => {
assert.equal(d.length, 2);
assert.equal(d[0].sample_no, "12669");
})

});
it ("should stream json string correctly",function(done){
const data=`a,b,c
1,2,3
4,5,6`
let hasLeftBracket=false;
let hasRightBracket=false;
csv({
downstreamFormat:"array"
})
.fromString(data)
.on("data",(d)=>{
const str=d.toString();
if (str[0]==="[" && str.length ===2){
hasLeftBracket=true;
}else if (str[0]==="]" && str.length===2){
hasRightBracket=true;
}else{
assert.equal(str[str.length-2],",");
}

})
.on("end",()=>{
assert.equal(hasLeftBracket,true);
assert.equal(hasRightBracket,true);
done();
})
})
it ("should stream json line correctly",function(done){
const data=`a,b,c
1,2,3
4,5,6`
csv({
downstreamFormat:"line"
})
.fromString(data)
.on("data",(d)=>{
const str=d.toString();

assert.notEqual(str[str.length-2],",");
})
.on("end",()=>{
done();
})
})
it ("should not send json if needEmitAll is false",async function(){
const data=`a,b,c
1,2,3
4,5,6`
return csv({
needEmitAll:false
})
.fromString(data)
.then((d)=>{
assert.equal(d.length,2);
assert.equal(d[0].sample_no,"12669");
assert(d.length===0);
})

});
})
it ("should convert null to null object",async function(){
const data=`a,b,c
null,2,3
4,5,6`
return csv({
nullObject:true
})
.fromString(data)
.then((d)=>{
assert.equal(d[0].a,null)
})
})
it ("should process period properly",async function(){
const data=`a..,b,c
1,2,3
4,5,6`
return csv({
})
.fromString(data)
.then((d)=>{
assert.equal(d[0]["a.."],1);
assert.equal(d[1]["a.."],4);
})
})
});

0 comments on commit 306c123

Please sign in to comment.