diff --git a/.npmignore b/.npmignore deleted file mode 100644 index fe5d48e2..00000000 --- a/.npmignore +++ /dev/null @@ -1,8 +0,0 @@ -benchmark -test -node_modules -.travis.yml -History.md -examples -.nyc_output -coverage diff --git a/History.md b/History.md index 03f87eb5..cd6818be 100644 --- a/History.md +++ b/History.md @@ -1,3 +1,11 @@ +# v3.6.0 + +* [ADDED] `maxRows` option to limit the number of rows parsed. [#275](https://github.com/C2FO/fast-csv/issues/275) [#277](https://github.com/C2FO/fast-csv/pull/277) - [@cbrittingham](https://github.com/cbrittingham) +* [ADDED] `skipRows` to allow skipping parsed rows see [parsing.md](./docs/parsing.md) +* [ADDED] `skipLines` to allow skipping entire lines of a csv [parsing.md](./docs/parsing.md) [#267](https://github.com/C2FO/fast-csv/issues/267) +* Exported formatting and parsing types. +* Removed `.npmignore` in favor of `package.json` files + # v3.5.0 * Upgraded dependencies diff --git a/benchmark/.eslintrc.js b/benchmark/.eslintrc.js index 19068549..377d30db 100644 --- a/benchmark/.eslintrc.js +++ b/benchmark/.eslintrc.js @@ -1,6 +1,8 @@ module.exports = { + parserOptions: { + project: null, + }, rules: { "no-console": 0, - "@typescript-eslint/no-var-requires": 0 }, }; diff --git a/benchmark/index.js b/benchmark/index.js index b60a7ccf..f4a4ded5 100644 --- a/benchmark/index.js +++ b/benchmark/index.js @@ -2,7 +2,6 @@ const path = require('path'); const fs = require('fs'); const fastCsv = require('..'); - function camelize(str) { return str.replace(/_(.)/g, (a, b) => b.toUpperCase()); } @@ -11,7 +10,7 @@ const promisfyStream = (stream, expectedRows) => { let count = 0; return new Promise((res, rej) => { stream - .on('data', (row) => { + .on('data', row => { count += 1; }) .on('end', () => { @@ -25,13 +24,14 @@ const promisfyStream = (stream, expectedRows) => { }); }; -const benchmarkFastCsv = type => (num) => { +const benchmarkFastCsv = type => num => { const file = path.resolve(__dirname, `./assets/${num}.${type}.csv`); - const stream = fs.createReadStream(file) - .pipe(fastCsv.parse({ headers: true })) - .transform((data) => { + const stream = fs + .createReadStream(file) + .pipe(fastCsv.parse({ headers: true, maxRows: 10 })) + .transform(data => { const ret = {}; - [ 'first_name', 'last_name', 'email_address' ].forEach((prop) => { + ['first_name', 'last_name', 'email_address'].forEach(prop => { ret[camelize(prop)] = data[prop]; }); ret.address = data.address; @@ -47,7 +47,7 @@ async function benchmarkRun(title, num, m) { for (let i = 0; i < howMany; i += 1) { // eslint-disable-next-line no-await-in-loop await m(num); - console.log('%s: RUN(%d lines) 1 %dms', title, num, (new Date() - runStart)); + console.log('%s: RUN(%d lines) 1 %dms', title, num, new Date() - runStart); runStart = new Date(); } console.log('%s: 3xAVG for %d lines %dms', title, num, (new Date() - start) / howMany); @@ -55,7 +55,7 @@ async function benchmarkRun(title, num, m) { function runBenchmarks(num, type) { console.log(`\nRUNNING ${num}.${type}.csv benchmarks`, num); - return benchmarkRun('fast-csv', num, benchmarkFastCsv(type)) + return benchmarkRun('fast-csv', num, benchmarkFastCsv(type)); } function benchmarks(type) { @@ -67,7 +67,7 @@ function benchmarks(type) { benchmarks('nonquoted') .then(() => benchmarks('quoted')) .then(() => process.exit()) - .catch((e) => { + .catch(e => { console.error(e.stack); return process.exit(1); }); diff --git a/docs/parsing.md b/docs/parsing.md index 5bfb55af..dc091da3 100644 --- a/docs/parsing.md +++ b/docs/parsing.md @@ -17,6 +17,9 @@ * [Ignoring Empty Rows](#csv-parse-ignoring-empty-rows) * [Transforming Rows](#csv-parse-transforming) * [Validating Rows](#csv-parse-validation) + * [Max Rows](#max-rows) + * [Skip Rows](#skip-rows) + * [Skip Lines](#skip-lines) ## Options @@ -45,6 +48,9 @@ * `rtrim: {boolean} = false`: Set to `true` to right trim all fields. * `ltrim: {boolean} = false`: Set to `true` to left trim all fields. * `encoding: {string} = 'utf8'`: Passed to [StringDecoder](https://nodejs.org/api/string_decoder.html#string_decoder_new_stringdecoder_encoding) when decoding incoming buffers. Change if incoming content is not 'utf8' encoded. +* `maxRows: {number} = 0`: If number is `> 0` the specified number of rows will be parsed.(e.g. `100` would return the first 100 rows of data). +* `skipRows: {number} = 0`: If number is `> 0` the specified number of **parsed** rows will be skipped. +* `skipLines: {number} = 0`: If number is `> 0` the specified number of lines will be skipped. ## Events @@ -585,3 +591,123 @@ Valid [row={"firstName":"timmy","lastName":"yukon"}] Parsed 2 rows ``` + +[`examples/parsing/max_rows.example.example.js`](../examples/parsing/max_rows.example.js) + +In the following example there are 10 rows, but only 5 will be parsed because of the `maxRows` option. + +```javascript +const rows = [ + 'header1,header2\n', + 'col1,col1\n', + 'col2,col2\n', + 'col3,col3\n', + 'col4,col4\n', + 'col5,col5\n', + 'col6,col6\n', + 'col7,col7\n', + 'col8,col8\n', + 'col9,col9\n', + 'col10,col10', +]; + +const stream = csv + .parse({ headers: true, maxRows: 5 }) + .on('error', error => console.error(error)) + .on('data', row => console.log(row)) + .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); + +rows.forEach(row => stream.write(row)); +stream.end(); +``` + +Expected output + +``` +{ header1: 'col1', header2: 'col1' } +{ header1: 'col2', header2: 'col2' } +{ header1: 'col3', header2: 'col3' } +{ header1: 'col4', header2: 'col4' } +{ header1: 'col5', header2: 'col5' } +Parsed 5 rows +``` + + +[`examples/parsing/skip_rows.example.example.js`](../examples/parsing/skip_rows.example.js) + +In the following example the first 2 rows are skipped. + +**NOTE** Notice how the header row is not skipped, only the row. + +```javascript +const rows = [ + 'header1,header2\n', + 'col1,col1\n', + 'col2,col2\n', + 'col3,col3\n', + 'col4,col4\n', + 'col5,col5\n', + 'col6,col6\n', +]; + +const stream = csv + .parse({ headers: true, skipRows: 2 }) + .on('error', error => console.error(error)) + .on('data', row => console.log(row)) + .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); + +rows.forEach(row => stream.write(row)); +stream.end(); +``` + +Expected output + +``` +{ header1: 'col3', header2: 'col3' } +{ header1: 'col4', header2: 'col4' } +{ header1: 'col5', header2: 'col5' } +{ header1: 'col6', header2: 'col6' } +Parsed 4 rows +``` + + +[`examples/parsing/skip_lines.example.example.js`](../examples/parsing/skip_lines.example.js) + +In the following example the first 2 lines are skipped. + +**NOTE** Notice how the headers come from the third line because the first two are skipped. + +```javascript +const csv = require('../../'); + +const rows = [ + 'skip1_header1,skip1_header2\n', + 'skip2_header1,skip2_header2\n', + 'header1,header2\n', + 'col1,col1\n', + 'col2,col2\n', + 'col3,col3\n', + 'col4,col4\n', +]; + +const stream = csv + .parse({ headers: true, skipLines: 2 }) + .on('error', error => console.error(error)) + .on('data', row => console.log(row)) + .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); + +rows.forEach(row => stream.write(row)); +stream.end(); +``` + +Expected output + +``` +{ header1: 'col1', header2: 'col1' } +{ header1: 'col2', header2: 'col2' } +{ header1: 'col3', header2: 'col3' } +{ header1: 'col4', header2: 'col4' } +Parsed 4 rows +``` + + diff --git a/examples/parsing/max_rows.example.js b/examples/parsing/max_rows.example.js new file mode 100644 index 00000000..261f92bf --- /dev/null +++ b/examples/parsing/max_rows.example.js @@ -0,0 +1,24 @@ +const csv = require('../../'); + +const rows = [ + 'header1,header2\n', + 'col1,col1\n', + 'col2,col2\n', + 'col3,col3\n', + 'col4,col4\n', + 'col5,col5\n', + 'col6,col6\n', + 'col7,col7\n', + 'col8,col8\n', + 'col9,col9\n', + 'col10,col10', +]; + +const stream = csv + .parse({ headers: true, maxRows: 5 }) + .on('error', error => console.error(error)) + .on('data', row => console.log(row)) + .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); + +rows.forEach(row => stream.write(row)); +stream.end(); diff --git a/examples/parsing/skip_lines.example.js b/examples/parsing/skip_lines.example.js new file mode 100644 index 00000000..d43eae1d --- /dev/null +++ b/examples/parsing/skip_lines.example.js @@ -0,0 +1,20 @@ +const csv = require('../../'); + +const rows = [ + 'skip1_header1,skip1_header2\n', + 'skip2_header1,skip2_header2\n', + 'header1,header2\n', + 'col1,col1\n', + 'col2,col2\n', + 'col3,col3\n', + 'col4,col4\n', +]; + +const stream = csv + .parse({ headers: true, skipLines: 2 }) + .on('error', error => console.error(error)) + .on('data', row => console.log(row)) + .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); + +rows.forEach(row => stream.write(row)); +stream.end(); diff --git a/examples/parsing/skip_rows.example.js b/examples/parsing/skip_rows.example.js new file mode 100644 index 00000000..69a8e6af --- /dev/null +++ b/examples/parsing/skip_rows.example.js @@ -0,0 +1,20 @@ +const csv = require('../../'); + +const rows = [ + 'header1,header2\n', + 'col1,col1\n', + 'col2,col2\n', + 'col3,col3\n', + 'col4,col4\n', + 'col5,col5\n', + 'col6,col6\n', +]; + +const stream = csv + .parse({ headers: true, skipRows: 2 }) + .on('error', error => console.error(error)) + .on('data', row => console.log(row)) + .on('end', rowCount => console.log(`Parsed ${rowCount} rows`)); + +rows.forEach(row => stream.write(row)); +stream.end(); diff --git a/package.json b/package.json index a37fb6a8..d86fa29f 100644 --- a/package.json +++ b/package.json @@ -1,11 +1,11 @@ { "name": "fast-csv", - "version": "3.5.0", + "version": "3.6.0", "description": "CSV parser and writer", "main": "./build/src/index.js", "types": "./build/src/index.d.ts", "scripts": { - "prepublish": "npm run build", + "prepare": "npm run build", "build": "tsc", "mocha": "nyc mocha", "test": "npm run lint && npm run mocha", @@ -14,6 +14,7 @@ "benchmark": "node ./benchmark", "coverage": "nyc report --reporter=text-lcov | coveralls" }, + "files": ["build/src/**"], "repository": { "type": "git", "url": "git@github.com:C2FO/fast-csv.git" diff --git a/src/formatter/formatter/index.ts b/src/formatter/formatter/index.ts index e4bb1d09..2879426e 100644 --- a/src/formatter/formatter/index.ts +++ b/src/formatter/formatter/index.ts @@ -1,5 +1,2 @@ -import RowFormatter from './RowFormatter'; - -export default { - RowFormatter, -}; +export { default as RowFormatter } from './RowFormatter'; +export { default as FieldFormatter } from './FieldFormatter'; diff --git a/src/formatter/index.ts b/src/formatter/index.ts index f297b031..d769dfd3 100644 --- a/src/formatter/index.ts +++ b/src/formatter/index.ts @@ -8,6 +8,7 @@ import CsvFormatterStream from './CsvFormatterStream'; export { default as CsvFormatterStream } from './CsvFormatterStream'; export * from './types'; export * from './FormatterOptions'; +export * from './formatter'; export const format = (options?: FormatterOptionsArgs): CsvFormatterStream => new CsvFormatterStream(new FormatterOptions(options)); diff --git a/src/index.ts b/src/index.ts index a9d5d2bd..3f137496 100644 --- a/src/index.ts +++ b/src/index.ts @@ -6,10 +6,41 @@ */ import { deprecate } from 'util'; -import { parseStream, parseString, parseFile } from './parser'; +import { parseStream, parseString, parseFile, RowValidateCallback } from './parser'; -export { format, write, writeToStream, writeToBuffer, writeToString, writeToPath } from './formatter'; -export { parse, parseString, parseStream, parseFile } from './parser'; +export { + format, + write, + writeToStream, + writeToBuffer, + writeToString, + writeToPath, + FormatterOptionsArgs, + Row as FormatterRow, + RowMap as FormatterRowMap, + RowArray as FormatterRowArray, + RowHashArray as FormatterRowHashArray, + RowTransformCallback as FormatterRowTransformCallback, + RowTransformFunction as FormatterRowTransformFunction, +} from './formatter'; +export { + parse, + parseString, + parseStream, + parseFile, + ParserOptionsArgs, + Row as ParserRow, + RowMap as ParserRowMap, + RowArray as ParserRowArray, + RowValidateCallback as ParserRowValidateCallback, + SyncRowValidate as ParserSyncRowValidate, + AsyncRowValidate as ParserAsyncRowValidate, + RowValidate as ParserRowValidate, + RowTransformCallback as ParserRowTransformCallback, + SyncRowTransform as ParserSyncRowTransform, + AsyncRowTransform as ParserAsyncRowTransform, + RowTransformFunction as ParserRowTransformFunction, +} from './parser'; export const fromString = deprecate(parseString, 'csv.fromString has been deprecated in favor of csv.parseString'); export const fromStream = deprecate(parseStream, 'csv.fromStream has been deprecated in favor of csv.parseStream'); diff --git a/src/parser/CsvParserStream.ts b/src/parser/CsvParserStream.ts index b740f317..a3af59b7 100644 --- a/src/parser/CsvParserStream.ts +++ b/src/parser/CsvParserStream.ts @@ -3,7 +3,7 @@ import { Transform, TransformCallback } from 'stream'; import { ParserOptions } from './ParserOptions'; import { HeaderTransformer, RowTransformerValidator } from './transforms'; import { Parser } from './parser'; -import { RowArray, RowTransformFunction, RowValidate, RowValidatorCallback } from './types'; +import { Row, RowArray, RowTransformFunction, RowValidate, RowValidatorCallback } from './types'; export default class CsvParserStream extends Transform { private readonly parserOptions: ParserOptions; @@ -20,6 +20,10 @@ export default class CsvParserStream extends Transform { private rowCount = 0; + private parsedRowCount = 0; + + private parsedLineCount = 0; + private endEmitted = false; public constructor(parserOptions: ParserOptions) { @@ -31,6 +35,18 @@ export default class CsvParserStream extends Transform { this.rowTransformerValidator = new RowTransformerValidator(); } + private get hasHitRowLimit(): boolean { + return this.parserOptions.limitRows && this.rowCount >= this.parserOptions.maxRows; + } + + private get shouldEmitRows(): boolean { + return this.parsedRowCount > this.parserOptions.skipRows; + } + + private get shouldSkipLine(): boolean { + return this.parsedLineCount <= this.parserOptions.skipLines; + } + public transform(transformFunction: RowTransformFunction): CsvParserStream { this.rowTransformerValidator.rowTransform = transformFunction; return this; @@ -54,23 +70,31 @@ export default class CsvParserStream extends Transform { } public _transform(data: Buffer, encoding: string, done: TransformCallback): void { + // if we have hit our maxRows parsing limit then skip parsing + if (this.hasHitRowLimit) { + return done(); + } try { const { lines } = this; const newLine = lines + this.decoder.write(data); const rows = this.parse(newLine, true); - this.processRows(rows, done); + return this.processRows(rows, done); } catch (e) { - done(e); + return done(e); } } public _flush(done: TransformCallback): void { + // if we have hit our maxRows parsing limit then skip parsing + if (this.hasHitRowLimit) { + return done(); + } try { const newLine = this.lines + this.decoder.end(); const rows = this.parse(newLine, false); - this.processRows(rows, done); + return this.processRows(rows, done); } catch (e) { - done(e); + return done(e); } } @@ -86,11 +110,18 @@ export default class CsvParserStream extends Transform { private processRows(rows: string[][], cb: TransformCallback): void { const rowsLength = rows.length; const iterate = (i: number): void => { - if (i >= rowsLength) { + // if we have emitted all rows or we have hit the maxRows limit option + // then end + if (i >= rowsLength || this.hasHitRowLimit) { return cb(); } + this.parsedLineCount += 1; + if (this.shouldSkipLine) { + return iterate(i + 1); + } const row = rows[i]; this.rowCount += 1; + this.parsedRowCount += 1; const nextRowCount = this.rowCount; return this.transformRow(row, (err, transformResult): void => { if (err) { @@ -102,12 +133,8 @@ export default class CsvParserStream extends Transform { } if (!transformResult.isValid) { this.emit('data-invalid', transformResult.row, nextRowCount, transformResult.reason); - } else if (!transformResult.row) { - this.rowCount -= 1; - } else if (!this.parserOptions.objectMode) { - this.push(JSON.stringify(transformResult.row)); - } else { - this.push(transformResult.row); + } else if (transformResult.row) { + this.pushRow(transformResult.row); } if (i % 100 === 0) { // incase the transform are sync insert a next tick to prevent stack overflow @@ -133,12 +160,28 @@ export default class CsvParserStream extends Transform { return cb(null, { isValid: false, row: parsedRow }); } if (withHeaders.row) { - return this.rowTransformerValidator.transformAndValidate(withHeaders.row, cb); + if (this.shouldEmitRows) { + return this.rowTransformerValidator.transformAndValidate(withHeaders.row, cb); + } + // skipped because of skipRows option remove from total row count + this.rowCount -= 1; + return cb(null, { row: null, isValid: true }); } + // this is a header row dont include in the rowCount or parsedRowCount + this.rowCount -= 1; + this.parsedRowCount -= 1; return cb(null, { row: null, isValid: true }); }); } catch (e) { cb(e); } } + + private pushRow(row: Row): void { + if (!this.parserOptions.objectMode) { + this.push(JSON.stringify(row)); + } else { + this.push(row); + } + } } diff --git a/src/parser/ParserOptions.ts b/src/parser/ParserOptions.ts index a3b2c562..8f4e0624 100644 --- a/src/parser/ParserOptions.ts +++ b/src/parser/ParserOptions.ts @@ -16,6 +16,9 @@ export interface ParserOptionsArgs { ltrim?: boolean; rtrim?: boolean; encoding?: string; + maxRows?: number; + skipLines?: number; + skipRows?: number; } export class ParserOptions { @@ -57,6 +60,14 @@ export class ParserOptions { public readonly encoding: string = 'utf8'; + public readonly limitRows: boolean = false; + + public readonly maxRows: number = 0; + + public readonly skipLines: number = 0; + + public readonly skipRows: number = 0; + public constructor(opts?: ParserOptionsArgs) { Object.assign(this, opts || {}); if (this.delimiter.length > 1) { @@ -66,5 +77,9 @@ export class ParserOptions { this.escapeChar = this.escape ?? this.quote; this.supportsComments = !isNil(this.comment); this.NEXT_TOKEN_REGEXP = new RegExp(`([^\\s]|\\r\\n|\\n|\\r|${this.escapedDelimiter})`); + + if (this.maxRows > 0) { + this.limitRows = true; + } } } diff --git a/src/parser/index.ts b/src/parser/index.ts index b16e11a0..2279d91f 100644 --- a/src/parser/index.ts +++ b/src/parser/index.ts @@ -6,6 +6,8 @@ import CsvParserStream from './CsvParserStream'; export { default as CsvParserStream } from './CsvParserStream'; export * from './types'; export * from './ParserOptions'; +export * from './parser'; +export * from './transforms'; export const parse = (args?: ParserOptionsArgs): CsvParserStream => new CsvParserStream(new ParserOptions(args)); diff --git a/src/parser/parser/index.ts b/src/parser/parser/index.ts index faf6c2d1..e360e451 100644 --- a/src/parser/parser/index.ts +++ b/src/parser/parser/index.ts @@ -1,2 +1,5 @@ export { default as Parser } from './Parser'; export { default as RowParser } from './RowParser'; +export { Scanner } from './Scanner'; +export { Token, MaybeToken } from './Token'; +export { ColumnParser, NonQuotedColumnParser, QuotedColumnParser } from './column'; diff --git a/src/parser/types.ts b/src/parser/types.ts index 314b3638..5520021d 100644 --- a/src/parser/types.ts +++ b/src/parser/types.ts @@ -2,7 +2,7 @@ export interface RowMap { [s: string]: string; } export type RowArray = string[]; -export type Row = string[] | object; +export type Row = RowMap | RowArray; export interface RowValidationResult { row: Row | null; diff --git a/test/formatter/CsvFormatterStream.test.ts b/test/formatter/CsvFormatterStream.test.ts index 5d9e57bb..0a093017 100644 --- a/test/formatter/CsvFormatterStream.test.ts +++ b/test/formatter/CsvFormatterStream.test.ts @@ -2,15 +2,7 @@ import * as assert from 'assert'; import * as fs from 'fs'; import * as path from 'path'; import * as csv from '../../src'; -import { - FormatterOptions, - CsvFormatterStream, - Row, - RowArray, - RowHashArray, - RowMap, - FormatterOptionsArgs, -} from '../../src/formatter'; +import { FormatterOptions, CsvFormatterStream } from '../../src/formatter'; import RecordingStream from '../RecordingStream'; describe('CsvFormatterStream', () => { @@ -36,7 +28,7 @@ describe('CsvFormatterStream', () => { ], ]; - const pipeToRecordingStream = (formatter: CsvFormatterStream, rows: Row[]) => + const pipeToRecordingStream = (formatter: CsvFormatterStream, rows: csv.FormatterRow[]) => new Promise((res, rej) => { const rs = new RecordingStream(); formatter @@ -49,7 +41,7 @@ describe('CsvFormatterStream', () => { formatter.end(); }); - const formatRows = (rows: Row[], options: FormatterOptionsArgs = {}) => + const formatRows = (rows: csv.FormatterRow[], options: csv.FormatterOptionsArgs = {}) => pipeToRecordingStream(csv.format(options), rows); it('should write an array of arrays', () => @@ -66,16 +58,16 @@ describe('CsvFormatterStream', () => { it('should support transforming an array of arrays', () => formatRows(arrayRows, { headers: true, - transform(row: Row) { - return (row as RowArray).map(entry => entry.toUpperCase()); + transform(row: csv.FormatterRow) { + return (row as csv.FormatterRowArray).map(entry => entry.toUpperCase()); }, }).then(written => assert.deepStrictEqual(written, ['A,B', '\nA1,B1', '\nA2,B2']))); it('should support transforming an array of multi-dimensional arrays', () => formatRows(multiDimensionalRows, { headers: true, - transform(row: Row) { - return (row as RowHashArray).map(entry => [entry[0], entry[1].toUpperCase()]); + transform(row: csv.FormatterRow) { + return (row as csv.FormatterRowHashArray).map(entry => [entry[0], entry[1].toUpperCase()]); }, }).then(written => { assert.deepStrictEqual(written, ['a,b', '\nA1,B1', '\nA2,B2']); @@ -84,16 +76,16 @@ describe('CsvFormatterStream', () => { it('should support transforming an array of objects', () => formatRows(objectRows, { headers: true, - transform(row: RowMap) { + transform(row: csv.FormatterRowMap) { return { A: row.a, B: row.b }; }, }).then(written => assert.deepStrictEqual(written, ['A,B', '\na1,b1', '\na2,b2']))); }); describe('#transform', () => { it('should support transforming an array of arrays', () => { - const formatter = new CsvFormatterStream(new FormatterOptions({ headers: true })).transform((row: Row) => - (row as RowArray).map(entry => entry.toUpperCase()), - ); + const formatter = new CsvFormatterStream( + new FormatterOptions({ headers: true }), + ).transform((row: csv.FormatterRow) => (row as csv.FormatterRowArray).map(entry => entry.toUpperCase())); return pipeToRecordingStream(formatter, arrayRows).then(written => assert.deepStrictEqual(written, ['A,B', '\nA1,B1', '\nA2,B2']), ); @@ -101,7 +93,8 @@ describe('CsvFormatterStream', () => { it('should support transforming an array of multi-dimensional arrays', () => { const formatter = new CsvFormatterStream(new FormatterOptions({ headers: true })).transform( - (row: Row): Row => (row as RowHashArray).map(entry => [entry[0], entry[1].toUpperCase()]), + (row: csv.FormatterRow): csv.FormatterRow => + (row as csv.FormatterRowHashArray).map(entry => [entry[0], entry[1].toUpperCase()]), ); return pipeToRecordingStream(formatter, multiDimensionalRows).then(written => assert.deepStrictEqual(written, ['a,b', '\nA1,B1', '\nA2,B2']), @@ -110,7 +103,10 @@ describe('CsvFormatterStream', () => { it('should support transforming an array of objects', () => { const formatter = new CsvFormatterStream(new FormatterOptions({ headers: true })).transform( - (row: Row): Row => ({ A: (row as RowMap).a, B: (row as RowMap).b }), + (row: csv.FormatterRow): csv.FormatterRow => ({ + A: (row as csv.FormatterRowMap).a, + B: (row as csv.FormatterRowMap).b, + }), ); return pipeToRecordingStream(formatter, objectRows).then(written => assert.deepStrictEqual(written, ['A,B', '\na1,b1', '\na2,b2']), @@ -120,7 +116,7 @@ describe('CsvFormatterStream', () => { it('should error if the transform fails', () => { const formatter = new CsvFormatterStream(new FormatterOptions({ headers: true })).transform( // eslint-disable-next-line @typescript-eslint/no-unused-vars - (row: Row): Row => { + (row: csv.FormatterRow): csv.FormatterRow => { throw new Error('Expected error'); }, ); @@ -348,8 +344,8 @@ describe('CsvFormatterStream', () => { csv .writeToString(arrayRows, { headers: true, - transform(row: Row): Row { - return (row as RowArray).map(entry => entry.toUpperCase()); + transform(row: csv.FormatterRow): csv.FormatterRow { + return (row as csv.FormatterRowArray).map(entry => entry.toUpperCase()); }, }) .then(formatted => assert.strictEqual(formatted, 'A,B\nA1,B1\nA2,B2'))); @@ -363,8 +359,8 @@ describe('CsvFormatterStream', () => { csv .writeToString(multiDimensionalRows, { headers: true, - transform(row: Row) { - return (row as RowHashArray).map(col => [col[0], col[1].toUpperCase()]); + transform(row: csv.FormatterRow) { + return (row as csv.FormatterRowHashArray).map(col => [col[0], col[1].toUpperCase()]); }, }) .then(formatted => assert.strictEqual(formatted, 'a,b\nA1,B1\nA2,B2'))); @@ -373,7 +369,7 @@ describe('CsvFormatterStream', () => { csv .writeToString(objectRows, { headers: true, - transform(row: RowMap) { + transform(row: csv.FormatterRowMap) { return { A: row.a, B: row.b, @@ -451,8 +447,8 @@ describe('CsvFormatterStream', () => { csv .writeToBuffer(arrayRows, { headers: true, - transform(row: Row): Row { - return (row as RowArray).map(entry => entry.toUpperCase()); + transform(row: csv.FormatterRow): csv.FormatterRow { + return (row as csv.FormatterRowArray).map(entry => entry.toUpperCase()); }, }) .then(formatted => assert.deepStrictEqual(formatted, Buffer.from('A,B\nA1,B1\nA2,B2')))); @@ -466,8 +462,8 @@ describe('CsvFormatterStream', () => { csv .writeToBuffer(multiDimensionalRows, { headers: true, - transform(row: Row) { - return (row as RowHashArray).map(col => [col[0], col[1].toUpperCase()]); + transform(row: csv.FormatterRow) { + return (row as csv.FormatterRowHashArray).map(col => [col[0], col[1].toUpperCase()]); }, }) .then(formatted => assert.deepStrictEqual(formatted, Buffer.from('a,b\nA1,B1\nA2,B2')))); @@ -476,7 +472,7 @@ describe('CsvFormatterStream', () => { csv .writeToBuffer(objectRows, { headers: true, - transform(row: RowMap): Row { + transform(row: csv.FormatterRowMap): csv.FormatterRow { return { A: row.a, B: row.b, @@ -545,7 +541,7 @@ describe('CsvFormatterStream', () => { }); describe('.write', () => { - const writeToRecordingStream = (rows: Row[], options = {}) => + const writeToRecordingStream = (rows: csv.FormatterRow[], options = {}) => new Promise((res, rej) => { const rs = new RecordingStream(); csv.write(rows, options) @@ -564,8 +560,8 @@ describe('CsvFormatterStream', () => { it('should support transforming an array of arrays', () => writeToRecordingStream(arrayRows, { headers: true, - transform(row: Row) { - return (row as RowArray).map(entry => entry.toUpperCase()); + transform(row: csv.FormatterRow) { + return (row as csv.FormatterRowArray).map(entry => entry.toUpperCase()); }, }).then(data => assert.deepStrictEqual(data, ['A,B', '\nA1,B1', '\nA2,B2']))); @@ -577,8 +573,8 @@ describe('CsvFormatterStream', () => { it('should support transforming an array of multi-dimensional arrays', () => writeToRecordingStream(multiDimensionalRows, { headers: true, - transform(row: Row) { - return (row as RowHashArray).map(col => [col[0], col[1].toUpperCase()]); + transform(row: csv.FormatterRow) { + return (row as csv.FormatterRowHashArray).map(col => [col[0], col[1].toUpperCase()]); }, }).then(data => assert.deepStrictEqual(data, ['a,b', '\nA1,B1', '\nA2,B2']))); @@ -590,7 +586,7 @@ describe('CsvFormatterStream', () => { it('should support transforming an array of objects', () => writeToRecordingStream(objectRows, { headers: true, - transform(row: RowMap) { + transform(row: csv.FormatterRowMap) { return { A: row.a, B: row.b, @@ -622,7 +618,7 @@ describe('CsvFormatterStream', () => { }); describe('.writeToPath', () => { - const writeToPath = (rows: Row[], options = {}) => + const writeToPath = (rows: csv.FormatterRow[], options = {}) => new Promise((res, rej) => { const csvPath = path.resolve(__dirname, 'assets/test_output.csv'); csv.writeToPath(csvPath, rows, options) @@ -652,15 +648,15 @@ describe('CsvFormatterStream', () => { it('should support transforming an array of arrays', () => writeToPath(arrayRows, { headers: true, - transform(row: Row) { - return (row as RowArray).map(entry => entry.toUpperCase()); + transform(row: csv.FormatterRow) { + return (row as csv.FormatterRowArray).map(entry => entry.toUpperCase()); }, }).then(content => assert.deepStrictEqual(content, Buffer.from('A,B\nA1,B1\nA2,B2')))); it('should transforming an array of objects', () => writeToPath(objectRows, { headers: true, - transform(row: RowMap) { + transform(row: csv.FormatterRowMap) { return { A: row.a, B: row.b, @@ -671,8 +667,8 @@ describe('CsvFormatterStream', () => { it('should transforming an array of multi-dimensional array', () => writeToPath(multiDimensionalRows, { headers: true, - transform(row: Row) { - return (row as RowHashArray).map(col => [col[0], col[1].toUpperCase()]); + transform(row: csv.FormatterRow) { + return (row as csv.FormatterRowHashArray).map(col => [col[0], col[1].toUpperCase()]); }, }).then(content => assert.deepStrictEqual(content, Buffer.from('a,b\nA1,B1\nA2,B2')))); @@ -700,7 +696,7 @@ describe('CsvFormatterStream', () => { }); describe('.write', () => { - const writeToRecordingStream = (rows: Row[], options = {}) => + const writeToRecordingStream = (rows: csv.FormatterRow[], options = {}) => new Promise((res, rej) => { const rs = new RecordingStream(); csv.write(rows, options) @@ -719,8 +715,8 @@ describe('CsvFormatterStream', () => { it('should support transforming an array of arrays', () => writeToRecordingStream(arrayRows, { headers: true, - transform(row: Row) { - return (row as RowArray).map(entry => entry.toUpperCase()); + transform(row: csv.FormatterRow) { + return (row as csv.FormatterRowArray).map(entry => entry.toUpperCase()); }, }).then(data => assert.deepStrictEqual(data, ['A,B', '\nA1,B1', '\nA2,B2']))); @@ -732,8 +728,8 @@ describe('CsvFormatterStream', () => { it('should support transforming an array of multi-dimensional arrays', () => writeToRecordingStream(multiDimensionalRows, { headers: true, - transform(row: Row) { - return (row as RowHashArray).map(col => [col[0], col[1].toUpperCase()]); + transform(row: csv.FormatterRow) { + return (row as csv.FormatterRowHashArray).map(col => [col[0], col[1].toUpperCase()]); }, }).then(data => assert.deepStrictEqual(data, ['a,b', '\nA1,B1', '\nA2,B2']))); @@ -745,7 +741,7 @@ describe('CsvFormatterStream', () => { it('should support transforming an array of objects', () => writeToRecordingStream(objectRows, { headers: true, - transform(row: RowMap) { + transform(row: csv.FormatterRowMap) { return { A: row.a, B: row.b, @@ -777,7 +773,7 @@ describe('CsvFormatterStream', () => { }); describe('.writeToStream', () => { - const writeToStream = (rows: Row[], options: FormatterOptionsArgs = {}) => + const writeToStream = (rows: csv.FormatterRow[], options: csv.FormatterOptionsArgs = {}) => new Promise((res, rej) => { const rs = new RecordingStream(); csv.writeToStream(rs, rows, options); @@ -804,15 +800,15 @@ describe('CsvFormatterStream', () => { it('should support transforming an array of arrays', () => writeToStream(arrayRows, { headers: true, - transform(row: Row): Row { - return (row as RowArray).map(entry => entry.toUpperCase()); + transform(row: csv.FormatterRow): csv.FormatterRow { + return (row as csv.FormatterRowArray).map(entry => entry.toUpperCase()); }, }).then(content => assert.deepStrictEqual(content, ['A,B', '\nA1,B1', '\nA2,B2']))); it('should transforming an array of objects', () => writeToStream(objectRows, { headers: true, - transform(row: RowMap): Row { + transform(row: csv.FormatterRowMap): csv.FormatterRow { return { A: row.a, B: row.b, @@ -823,8 +819,8 @@ describe('CsvFormatterStream', () => { it('should transforming an array of multi-dimensional array', () => writeToStream(multiDimensionalRows, { headers: true, - transform(row: Row): Row { - return (row as RowHashArray).map(col => [col[0], col[1].toUpperCase()]); + transform(row: csv.FormatterRow): csv.FormatterRow { + return (row as csv.FormatterRowHashArray).map(col => [col[0], col[1].toUpperCase()]); }, }).then(content => assert.deepStrictEqual(content, ['a,b', '\nA1,B1', '\nA2,B2']))); diff --git a/test/formatter/FormatterOptions.test.ts b/test/formatter/FormatterOptions.test.ts index 25cee6d8..90b6b0a9 100644 --- a/test/formatter/FormatterOptions.test.ts +++ b/test/formatter/FormatterOptions.test.ts @@ -1,5 +1,6 @@ import * as assert from 'assert'; -import { FormatterOptions, FormatterOptionsArgs } from '../../src/formatter'; +import { FormatterOptionsArgs } from '../../src'; +import { FormatterOptions } from '../../src/formatter'; describe('FormatterOptions', () => { const createOptions = (opts: FormatterOptionsArgs = {}) => new FormatterOptions(opts); diff --git a/test/formatter/formatter/FieldFormatter.test.ts b/test/formatter/formatter/FieldFormatter.test.ts index a3925dea..4412119d 100644 --- a/test/formatter/formatter/FieldFormatter.test.ts +++ b/test/formatter/formatter/FieldFormatter.test.ts @@ -1,10 +1,10 @@ import * as assert from 'assert'; -import FieldFormatter from '../../../src/formatter/formatter/FieldFormatter'; -import { FormatterOptions } from '../../../src/formatter/FormatterOptions'; +import { FormatterOptionsArgs } from '../../../src'; +import { FormatterOptions, FieldFormatter } from '../../../src/formatter'; describe('FieldFormatter', () => { describe('#format', () => { - const createFormatter = (formatterOptions = {}, headers?: string[]) => { + const createFormatter = (formatterOptions: FormatterOptionsArgs = {}, headers?: string[]) => { const formatter = new FieldFormatter(new FormatterOptions(formatterOptions)); if (headers) { formatter.headers = headers; diff --git a/test/formatter/formatter/RowFormatter.test.ts b/test/formatter/formatter/RowFormatter.test.ts index aa3268de..88ad133d 100644 --- a/test/formatter/formatter/RowFormatter.test.ts +++ b/test/formatter/formatter/RowFormatter.test.ts @@ -1,6 +1,12 @@ import * as assert from 'assert'; -import { FormatterOptions, Row, RowArray, RowHashArray, RowMap, RowTransformCallback } from '../../../src/formatter'; -import RowFormatter from '../../../src/formatter/formatter/RowFormatter'; +import { + FormatterRow as Row, + FormatterRowArray as RowArray, + FormatterRowHashArray as RowHashArray, + FormatterRowMap as RowMap, + FormatterRowTransformCallback as RowTransformCallback, +} from '../../../src'; +import { RowFormatter, FormatterOptions } from '../../../src/formatter'; describe('RowFormatter', () => { const createFormatter = (formatterOptions = {}): RowFormatter => diff --git a/test/issues/issue111.test.ts b/test/issues/issue111.test.ts index 1af27e2b..85937a1c 100644 --- a/test/issues/issue111.test.ts +++ b/test/issues/issue111.test.ts @@ -1,6 +1,5 @@ import * as assert from 'assert'; -import { Parser } from '../../src/parser/parser'; -import { ParserOptions } from '../../src/parser'; +import { ParserOptions, Parser } from '../../src/parser'; describe('Issue #111 - https://github.com/C2FO/fast-csv/issues/111', () => { const createParser = (parserOptions = {}) => new Parser(new ParserOptions(parserOptions)); diff --git a/test/issues/issue131.test.ts b/test/issues/issue131.test.ts index 1c82a3d6..a5243ed9 100644 --- a/test/issues/issue131.test.ts +++ b/test/issues/issue131.test.ts @@ -1,7 +1,6 @@ import * as assert from 'assert'; import { EOL } from 'os'; import * as csv from '../../src'; -import { RowMap } from '../../src/parser'; describe('Issue #131 - https://github.com/C2FO/fast-csv/issues/131', () => { const csvWithBom = [ @@ -10,7 +9,7 @@ describe('Issue #131 - https://github.com/C2FO/fast-csv/issues/131', () => { ].join(EOL); it('should parse a csv with a UTF-8 Byte Order Mark', next => { - const actual: RowMap[] = []; + const actual: csv.ParserRowMap[] = []; csv.parseString(csvWithBom, { headers: true }) .on('data', data => actual.push(data)) .on('end', (count: number) => { diff --git a/test/issues/issue150.test.ts b/test/issues/issue150.test.ts index 3d4b9ad8..4fd0cc04 100644 --- a/test/issues/issue150.test.ts +++ b/test/issues/issue150.test.ts @@ -1,6 +1,5 @@ import * as assert from 'assert'; -import { Parser } from '../../src/parser/parser'; -import { ParserOptions } from '../../src/parser'; +import { ParserOptions, Parser } from '../../src/parser'; describe('Issue #150 - https://github.com/C2FO/fast-csv/issues/150', () => { const createParser = (parserOptions = {}) => new Parser(new ParserOptions(parserOptions)); diff --git a/test/issues/issue174.test.ts b/test/issues/issue174.test.ts index 864946b8..2e249095 100644 --- a/test/issues/issue174.test.ts +++ b/test/issues/issue174.test.ts @@ -1,7 +1,7 @@ import { EOL } from 'os'; import * as assert from 'assert'; -import { Parser } from '../../src/parser/parser'; -import { ParserOptions, ParserOptionsArgs } from '../../src/parser'; +import { ParserOptionsArgs } from '../../src'; +import { Parser, ParserOptions } from '../../src/parser'; describe('Issue #174 - https://github.com/C2FO/fast-csv/issues/174', () => { const createParser = (parserOptions: ParserOptionsArgs = {}) => new Parser(new ParserOptions(parserOptions)); diff --git a/test/issues/issue214.test.ts b/test/issues/issue214.test.ts index 5661ae51..72730894 100644 --- a/test/issues/issue214.test.ts +++ b/test/issues/issue214.test.ts @@ -1,7 +1,6 @@ import { EOL } from 'os'; import * as assert from 'assert'; import * as csv from '../../src'; -import { RowMap } from '../../src/parser'; describe('Issue #214 - https://github.com/C2FO/fast-csv/issues/214', () => { const CSV_CONTENT = [ @@ -20,9 +19,9 @@ describe('Issue #214 - https://github.com/C2FO/fast-csv/issues/214', () => { ]; it('should emit data when using the on method', next => { - const rows: RowMap[] = []; + const rows: csv.ParserRowMap[] = []; csv.parseString(CSV_CONTENT, { headers: true }) - .on('data', (r: RowMap) => rows.push(r)) + .on('data', (r: csv.ParserRowMap) => rows.push(r)) .on('error', next) .on('end', (count: number) => { assert.deepStrictEqual(rows, expectedRows); @@ -32,9 +31,9 @@ describe('Issue #214 - https://github.com/C2FO/fast-csv/issues/214', () => { }); it('should emit data when using the addListener method', next => { - const rows: RowMap[] = []; + const rows: csv.ParserRowMap[] = []; csv.parseString(CSV_CONTENT, { headers: true }) - .addListener('data', (r: RowMap) => rows.push(r)) + .addListener('data', (r: csv.ParserRowMap) => rows.push(r)) .on('error', next) .on('end', (count: number) => { assert.deepStrictEqual(rows, expectedRows); diff --git a/test/issues/issue223.test.ts b/test/issues/issue223.test.ts index cd5da64e..29948a1b 100644 --- a/test/issues/issue223.test.ts +++ b/test/issues/issue223.test.ts @@ -1,7 +1,6 @@ import { EOL } from 'os'; import * as assert from 'assert'; -import { Parser } from '../../src/parser/parser'; -import { ParserOptions } from '../../src/parser'; +import { Parser, ParserOptions } from '../../src/parser'; describe('Issue #223 - https://github.com/C2FO/fast-csv/issues/223', () => { const createParser = (parserOptions = {}) => new Parser(new ParserOptions(parserOptions)); diff --git a/test/issues/issue87.test.ts b/test/issues/issue87.test.ts index d8a72b78..236f423d 100644 --- a/test/issues/issue87.test.ts +++ b/test/issues/issue87.test.ts @@ -3,7 +3,6 @@ import * as fs from 'fs'; import * as path from 'path'; import { Transform, TransformCallback } from 'stream'; import * as csv from '../../src'; -import { Row } from '../../src/parser'; describe('Issue #87 - https://github.com/C2FO/fast-csv/issues/87', () => { class MyStream extends Transform { @@ -19,7 +18,7 @@ describe('Issue #87 - https://github.com/C2FO/fast-csv/issues/87', () => { this.rowCount = 0; } - private transform(data: Row, encoding: string, done: TransformCallback) { + private transform(data: csv.ParserRow, encoding: string, done: TransformCallback) { this.rowCount += 1; if (this.rowCount % 2 === 0) { setTimeout(() => done(), 10); diff --git a/test/parser/CsvParsingStream.test.ts b/test/parser/CsvParsingStream.test.ts index b4018d14..4a94ace5 100644 --- a/test/parser/CsvParsingStream.test.ts +++ b/test/parser/CsvParsingStream.test.ts @@ -5,7 +5,7 @@ import * as domain from 'domain'; import partition from 'lodash.partition'; import * as csv from '../../src'; import assets, { PathAndContent } from './assets'; -import { CsvParserStream, ParserOptionsArgs, Row, RowMap, RowValidateCallback } from '../../src/parser'; +import { CsvParserStream } from '../../src/parser'; import Done = Mocha.Done; @@ -25,17 +25,17 @@ describe('CsvParserStream', () => { interface ParseResults { count: number; - rows: Row[]; - invalidRows: Row[]; + rows: csv.ParserRow[]; + invalidRows: csv.ParserRow[]; } const collectData = (stream: CsvParserStream): Promise => new Promise((res, rej) => { - const rows: Row[] = []; - const invalidRows: Row[] = []; + const rows: csv.ParserRow[] = []; + const invalidRows: csv.ParserRow[] = []; stream - .on('data', (row: Row) => rows.push(row)) - .on('data-invalid', (row: Row) => invalidRows.push(row)) + .on('data', (row: csv.ParserRow) => rows.push(row)) + .on('data-invalid', (row: csv.ParserRow) => invalidRows.push(row)) .on('error', rej) .on('end', (count: number) => { res({ count, rows, invalidRows }); @@ -44,8 +44,8 @@ describe('CsvParserStream', () => { const parseContentAndCollectFromStream = (data: PathAndContent, parser: CsvParserStream): Promise => new Promise((res, rej) => { - const rows: Row[] = []; - const invalidRows: Row[] = []; + const rows: csv.ParserRow[] = []; + const invalidRows: csv.ParserRow[] = []; parser .on('data', row => rows.push(row)) .on('data-invalid', row => invalidRows.push(row)) @@ -57,10 +57,10 @@ describe('CsvParserStream', () => { parser.end(); }); - const parseContentAndCollect = (data: PathAndContent, options: ParserOptionsArgs = {}): Promise => + const parseContentAndCollect = (data: PathAndContent, options: csv.ParserOptionsArgs = {}): Promise => new Promise((res, rej) => { - const rows: Row[] = []; - const invalidRows: Row[] = []; + const rows: csv.ParserRow[] = []; + const invalidRows: csv.ParserRow[] = []; const parser = csv .parse(options) .on('data', row => rows.push(row)) @@ -80,7 +80,7 @@ describe('CsvParserStream', () => { })); it('should emit a readable event ', next => { - const actual: Row[] = []; + const actual: csv.ParserRow[] = []; const parser = csv.parse({ headers: true }); const stream = parser.on('error', next).on('end', (count: number) => { assert.deepStrictEqual(actual, assets.withHeaders.parsed); @@ -267,6 +267,221 @@ describe('CsvParserStream', () => { }); }); + describe('maxRows option', () => { + it('should parse up to the specified number of maxRows', () => { + const maxRows = 3; + return parseContentAndCollect(assets.withHeaders, { headers: true, maxRows }).then(({ count, rows }) => { + assert.deepStrictEqual(rows, assets.withHeaders.parsed.slice(0, maxRows)); + assert.strictEqual(count, maxRows); + }); + }); + + it('should parse all rows if maxRows === 0', () => { + const maxRows = 0; + return parseContentAndCollect(assets.withHeaders, { headers: true, maxRows }).then(({ count, rows }) => { + assert.deepStrictEqual(rows, assets.withHeaders.parsed); + assert.strictEqual(count, rows.length); + }); + }); + }); + + describe('skipLines option', () => { + it('should skip up to the specified number of rows using the first non-skipped line as headers', () => { + const skipLines = 2; + return parseContentAndCollect(assets.withHeadersSkippedLines, { + headers: true, + skipLines, + }).then(({ count, rows }) => { + assert.deepStrictEqual(rows, assets.withHeadersSkippedLines.parsed); + assert.strictEqual(count, rows.length); + }); + }); + + it('should skip up to the specified number of rows not withoutHeaders', () => { + const skipLines = 2; + return parseContentAndCollect(assets.skipLines, { skipLines }).then(({ count, rows }) => { + assert.deepStrictEqual(rows, assets.skipLines.parsed); + assert.strictEqual(count, rows.length); + }); + }); + + describe('with transform', () => { + it('should not transform skipped rows', () => { + let transformedRows: csv.ParserRow[] = []; + const transformer = (row: csv.ParserRow): csv.ParserRow => { + const transformed = { + firstName: (row as csv.ParserRowMap).first_name, + lastName: (row as csv.ParserRowMap).last_name, + emailAddress: (row as csv.ParserRowMap).email_address, + }; + transformedRows.push(transformed); + return transformed; + }; + const skipLines = 2; + const expected = assets.withHeadersSkippedLines.parsed.map(transformer); + transformedRows = []; + const parser = csv.parse({ headers: true, skipLines }).transform(transformer); + return parseContentAndCollectFromStream(assets.withHeadersSkippedLines, parser).then( + ({ count, rows }) => { + assert.deepStrictEqual(rows, expected); + assert.deepStrictEqual(transformedRows, expected); + assert.strictEqual(count, expected.length); + }, + ); + }); + }); + + describe('with validate', () => { + it('should not validate skipped rows', () => { + let validatedRows: csv.ParserRow[] = []; + const validator = (row: csv.ParserRow): boolean => { + validatedRows.push(row); + return (validatedRows.length - 1) % 2 === 0; + }; + const skipLines = 2; + const nonSkippedRows = assets.withHeadersSkippedLines.parsed; + const expected = nonSkippedRows.filter(validator); + validatedRows = []; + const parser = csv.parse({ headers: true, skipLines }).validate(validator); + return parseContentAndCollectFromStream(assets.withHeadersSkippedLines, parser).then( + ({ count, rows }) => { + assert.deepStrictEqual(rows, expected); + assert.deepStrictEqual(validatedRows, nonSkippedRows); + assert.strictEqual(count, nonSkippedRows.length); + }, + ); + }); + }); + + it('should parse all rows if maxRows === 0', () => { + const skipLines = 0; + return parseContentAndCollect(assets.withHeaders, { headers: true, skipLines }).then(({ count, rows }) => { + assert.deepStrictEqual(rows, assets.withHeaders.parsed); + assert.strictEqual(count, rows.length); + }); + }); + }); + + describe('skipRows option', () => { + describe('with headers', () => { + it('should skip up to the specified number of rows not including the header row in the count', () => { + const skipRows = 3; + return parseContentAndCollect(assets.withHeaders, { + headers: true, + skipRows, + }).then(({ count, rows }) => { + assert.deepStrictEqual(rows, assets.withHeaders.parsed.slice(skipRows)); + assert.strictEqual(count, rows.length); + }); + }); + + it('should skip up to the specified number of rows and allow renaming the headers', () => { + const skipRows = 3; + return parseContentAndCollect(assets.withHeaders, { + headers: ['h1', 'h2', 'h3'], + renameHeaders: true, + skipRows, + }).then(({ count, rows }) => { + assert.deepStrictEqual( + rows, + assets.withHeaders.parsed.slice(skipRows).map(r => { + return { + h1: r.first_name, + h2: r.last_name, + h3: r.email_address, + }; + }), + ); + assert.strictEqual(count, rows.length); + }); + }); + }); + + describe('without headers', () => { + it('should skip up to the specified number of rows without headers', () => { + const skipRows = 3; + return parseContentAndCollect(assets.noHeadersAndQuotes, { skipRows }).then(({ count, rows }) => { + assert.deepStrictEqual(rows, assets.noHeadersAndQuotes.parsed.slice(skipRows)); + assert.strictEqual(count, rows.length); + }); + }); + + it('should skip up to the specified number of rows without headers and allow specifying headers', () => { + const skipRows = 3; + return parseContentAndCollect(assets.noHeadersAndQuotes, { + headers: ['h1', 'h2', 'h3', 'h4'], + skipRows, + }).then(({ count, rows }) => { + assert.deepStrictEqual( + rows, + assets.noHeadersAndQuotes.parsed.slice(skipRows).map(r => { + return { + h1: r[0], + h2: r[1], + h3: r[2], + h4: r[3], + }; + }), + ); + assert.strictEqual(count, rows.length); + }); + }); + }); + + describe('with transform', () => { + it('should not transform skipped rows', () => { + let transformedRows: csv.ParserRow[] = []; + const transformer = (row: csv.ParserRow): csv.ParserRow => { + const transformed = { + firstName: (row as csv.ParserRowMap).first_name, + lastName: (row as csv.ParserRowMap).last_name, + emailAddress: (row as csv.ParserRowMap).email_address, + address: (row as csv.ParserRowMap).address, + }; + transformedRows.push(transformed); + return transformed; + }; + const skipRows = 3; + const expected = assets.withHeaders.parsed.slice(skipRows).map(transformer); + transformedRows = []; + const parser = csv.parse({ headers: true, skipRows }).transform(transformer); + return parseContentAndCollectFromStream(assets.withHeaders, parser).then(({ count, rows }) => { + assert.deepStrictEqual(rows, expected); + assert.deepStrictEqual(transformedRows, expected); + assert.strictEqual(count, expected.length); + }); + }); + }); + + describe('with validate', () => { + it('should not validate skipped rows', () => { + let validatedRows: csv.ParserRow[] = []; + const validator = (row: csv.ParserRow): boolean => { + validatedRows.push(row); + return (validatedRows.length - 1) % 2 === 0; + }; + const skipRows = 3; + const nonSkippedRows = assets.withHeaders.parsed.slice(skipRows); + const expected = nonSkippedRows.filter(validator); + validatedRows = []; + const parser = csv.parse({ headers: true, skipRows }).validate(validator); + return parseContentAndCollectFromStream(assets.withHeaders, parser).then(({ count, rows }) => { + assert.deepStrictEqual(rows, expected); + assert.deepStrictEqual(validatedRows, nonSkippedRows); + assert.strictEqual(count, nonSkippedRows.length); + }); + }); + }); + + it('should parse all rows if maxRows === 0', () => { + const skipRows = 0; + return parseContentAndCollect(assets.withHeaders, { headers: true, skipRows }).then(({ count, rows }) => { + assert.deepStrictEqual(rows, assets.withHeaders.parsed); + assert.strictEqual(count, rows.length); + }); + }); + }); + it('should emit an error for malformed rows', next => { assets.write(assets.malformed); const stream = csv.parseFile(assets.malformed.path, { headers: true }); @@ -274,9 +489,9 @@ describe('CsvParserStream', () => { }); describe('#validate', () => { - const syncValidator = (row: Row): boolean => - parseInt((row as RowMap).first_name.replace(/^First/, ''), 10) % 2 === 1; - const asyncValidator = (row: Row, cb: RowValidateCallback) => { + const syncValidator = (row: csv.ParserRow): boolean => + parseInt((row as csv.ParserRowMap).first_name.replace(/^First/, ''), 10) % 2 === 1; + const asyncValidator = (row: csv.ParserRow, cb: csv.ParserRowValidateCallback) => { cb(null, syncValidator(row)); }; @@ -292,8 +507,8 @@ describe('CsvParserStream', () => { }); it('should allow async validation of rows', () => { - const validator = (row: Row): boolean => - parseInt((row as RowMap).first_name.replace(/^First/, ''), 10) % 2 !== 0; + const validator = (row: csv.ParserRow): boolean => + parseInt((row as csv.ParserRowMap).first_name.replace(/^First/, ''), 10) % 2 !== 0; const invalidValid = partition(assets.withHeaders.parsed, validator); const parser = csv.parse({ headers: true }).validate(asyncValidator); @@ -309,7 +524,7 @@ describe('CsvParserStream', () => { let index = -1; const stream = csv .parseFile(assets.withHeaders.path, { headers: true }) - .validate((data: Row, validateNext): void => { + .validate((data: csv.ParserRow, validateNext): void => { setImmediate(() => { index += 1; if (index === 8) { @@ -361,11 +576,11 @@ describe('CsvParserStream', () => { }); describe('#transform', () => { - const transformer = (row: Row): Row => ({ - firstName: (row as RowMap).first_name, - lastName: (row as RowMap).last_name, - emailAddress: (row as RowMap).email_address, - address: (row as RowMap).address, + const transformer = (row: csv.ParserRow): csv.ParserRow => ({ + firstName: (row as csv.ParserRowMap).first_name, + lastName: (row as csv.ParserRowMap).last_name, + emailAddress: (row as csv.ParserRowMap).email_address, + address: (row as csv.ParserRowMap).address, }); it('should allow transforming of data', () => { @@ -446,7 +661,7 @@ describe('CsvParserStream', () => { it('should support pausing a stream', () => { assets.write(assets.withHeaders); return new Promise((res, rej) => { - const rows: Row[] = []; + const rows: csv.ParserRow[] = []; let paused = false; const stream = csv.parse({ headers: true }); fs.createReadStream(assets.withHeaders.path) @@ -524,7 +739,7 @@ describe('CsvParserStream', () => { describe('.parseString', () => { it('should accept a csv string', next => { - const actual: Row[] = []; + const actual: csv.ParserRow[] = []; csv.parseString(assets.withHeaders.content, { headers: true }) .on('data', data => actual.push(data)) .on('end', (count: number) => { diff --git a/test/parser/ParserOptions.test.ts b/test/parser/ParserOptions.test.ts index c3bea9b5..e9492556 100644 --- a/test/parser/ParserOptions.test.ts +++ b/test/parser/ParserOptions.test.ts @@ -1,5 +1,6 @@ import * as assert from 'assert'; -import { ParserOptions, ParserOptionsArgs } from '../../src/parser'; +import { ParserOptionsArgs } from '../../src'; +import { ParserOptions } from '../../src/parser'; describe('ParserOptions', () => { const createOptions = (opts: ParserOptionsArgs = {}) => new ParserOptions(opts); @@ -163,4 +164,48 @@ describe('ParserOptions', () => { assert.strictEqual(createOptions({ renameHeaders: false }).renameHeaders, false); }); }); + + describe('#maxRows', () => { + it('should default maxRows 0 and limitRows to false', () => { + const opts = createOptions(); + assert.strictEqual(opts.maxRows, 0); + assert.strictEqual(opts.limitRows, false); + }); + + it('should set maxRows to the provided option and limitRows to true if maxRows > 0', () => { + const opts = createOptions({ maxRows: 1 }); + assert.strictEqual(opts.maxRows, 1); + assert.strictEqual(opts.limitRows, true); + }); + + it('should set maxRows to the provided option and limitRows to true if maxRows === 0', () => { + const opts = createOptions({ maxRows: 0 }); + assert.strictEqual(opts.maxRows, 0); + assert.strictEqual(opts.limitRows, false); + }); + }); + + describe('#skipLines', () => { + it('should default skipLines to 0', () => { + const opts = createOptions(); + assert.strictEqual(opts.skipLines, 0); + }); + + it('should set skipLines to the user provided option', () => { + const opts = createOptions({ skipLines: 10 }); + assert.strictEqual(opts.skipLines, 10); + }); + }); + + describe('#skipRows', () => { + it('should default skipLines to 0', () => { + const opts = createOptions(); + assert.strictEqual(opts.skipRows, 0); + }); + + it('should set skipLines to the user provided option', () => { + const opts = createOptions({ skipRows: 10 }); + assert.strictEqual(opts.skipRows, 10); + }); + }); }); diff --git a/test/parser/assets/index.ts b/test/parser/assets/index.ts index d1d0f9be..5b7f2f33 100644 --- a/test/parser/assets/index.ts +++ b/test/parser/assets/index.ts @@ -2,11 +2,13 @@ import { existsSync, mkdirSync, writeFileSync } from 'fs'; import * as path from 'path'; import alternateEncoding from './alternateEncoding'; import noHeadersAndQuotes from './noHeadersAndQuotes'; +import skipLines from './skipLines'; import withHeaders from './withHeaders'; import withHeadersAndQuotes from './withHeadersAndQuotes'; import withHeadersAndAlternateQuote from './withHeadersAndAlternateQuote'; import withHeadersAndMissingColumns from './withHeadersAndMissingColumns'; import withHeadersAlternateDelimiter from './withHeadersAlternateDelimiter'; +import withHeadersSkippedLines from './withHeadersSkippedLines'; import headerColumnMismatch from './headerColumnMismatch'; import malformed from './malformed'; import trailingComma from './trailingComma'; @@ -32,11 +34,13 @@ const write = (opts: PathAndContent): void => { export default { write, alternateEncoding, + skipLines, withHeaders, withHeadersAndQuotes, withHeadersAndAlternateQuote, withHeadersAndMissingColumns, withHeadersAlternateDelimiter, + withHeadersSkippedLines, noHeadersAndQuotes, headerColumnMismatch, malformed, diff --git a/test/parser/assets/skipLines.ts b/test/parser/assets/skipLines.ts new file mode 100644 index 00000000..d94b48bc --- /dev/null +++ b/test/parser/assets/skipLines.ts @@ -0,0 +1,33 @@ +/* eslint-disable @typescript-eslint/camelcase */ +import { resolve } from 'path'; +import { EOL } from 'os'; + +export default { + path: resolve(__dirname, 'tmp', 'skip_lines.csv'), + + content: [ + 'Skip First1,Last1,skip.email2@email.com', + 'Skip First2,Skip Last2,skip.email2@email.com', + 'First1,Last1,email1@email.com', + 'First2,Last2,email2@email.com', + 'First3,Last3,email3@email.com', + 'First4,Last4,email4@email.com', + 'First5,Last5,email5@email.com', + 'First6,Last6,email6@email.com', + 'First7,Last7,email7@email.com', + 'First8,Last8,email8@email.com', + 'First9,Last9,email9@email.com', + ].join(EOL), + + parsed: [ + ['First1', 'Last1', 'email1@email.com'], + ['First2', 'Last2', 'email2@email.com'], + ['First3', 'Last3', 'email3@email.com'], + ['First4', 'Last4', 'email4@email.com'], + ['First5', 'Last5', 'email5@email.com'], + ['First6', 'Last6', 'email6@email.com'], + ['First7', 'Last7', 'email7@email.com'], + ['First8', 'Last8', 'email8@email.com'], + ['First9', 'Last9', 'email9@email.com'], + ], +}; diff --git a/test/parser/assets/withHeadersSkippedLines.ts b/test/parser/assets/withHeadersSkippedLines.ts new file mode 100644 index 00000000..1b29f4cf --- /dev/null +++ b/test/parser/assets/withHeadersSkippedLines.ts @@ -0,0 +1,70 @@ +/* eslint-disable @typescript-eslint/camelcase */ +import { resolve } from 'path'; +import { EOL } from 'os'; + +export default { + path: resolve(__dirname, 'tmp', 'with_headers_skip_lines.csv'), + + content: [ + 'skip_one_first_name,skip_one_last_name,skip_one_email_address', + 'skip_two_first_name,skip_two_last_name,skip_two_email_address', + 'first_name,last_name,email_address', + 'First1,Last1,email1@email.com', + 'First2,Last2,email2@email.com', + 'First3,Last3,email3@email.com', + 'First4,Last4,email4@email.com', + 'First5,Last5,email5@email.com', + 'First6,Last6,email6@email.com', + 'First7,Last7,email7@email.com', + 'First8,Last8,email8@email.com', + 'First9,Last9,email9@email.com', + ].join(EOL), + + parsed: [ + { + first_name: 'First1', + last_name: 'Last1', + email_address: 'email1@email.com', + }, + { + first_name: 'First2', + last_name: 'Last2', + email_address: 'email2@email.com', + }, + { + first_name: 'First3', + last_name: 'Last3', + email_address: 'email3@email.com', + }, + { + first_name: 'First4', + last_name: 'Last4', + email_address: 'email4@email.com', + }, + { + first_name: 'First5', + last_name: 'Last5', + email_address: 'email5@email.com', + }, + { + first_name: 'First6', + last_name: 'Last6', + email_address: 'email6@email.com', + }, + { + first_name: 'First7', + last_name: 'Last7', + email_address: 'email7@email.com', + }, + { + first_name: 'First8', + last_name: 'Last8', + email_address: 'email8@email.com', + }, + { + first_name: 'First9', + last_name: 'Last9', + email_address: 'email9@email.com', + }, + ], +}; diff --git a/test/parser/parser/Parser.test.ts b/test/parser/parser/Parser.test.ts index 46897584..524cec76 100644 --- a/test/parser/parser/Parser.test.ts +++ b/test/parser/parser/Parser.test.ts @@ -1,6 +1,6 @@ import * as assert from 'assert'; -import { ParserOptions, ParserOptionsArgs } from '../../../src/parser'; -import { Parser } from '../../../src/parser/parser'; +import { ParserOptionsArgs } from '../../../src'; +import { ParserOptions, Parser } from '../../../src/parser'; describe('Parser', () => { const createParser = (parserOptions: ParserOptionsArgs = {}) => new Parser(new ParserOptions(parserOptions)); diff --git a/test/parser/parser/RowParser.test.ts b/test/parser/parser/RowParser.test.ts index aebaa8cd..49da09e1 100644 --- a/test/parser/parser/RowParser.test.ts +++ b/test/parser/parser/RowParser.test.ts @@ -1,7 +1,6 @@ import * as assert from 'assert'; -import { ParserOptions, ParserOptionsArgs } from '../../../src/parser'; -import RowParser from '../../../src/parser/parser/RowParser'; -import { Scanner } from '../../../src/parser/parser/Scanner'; +import { ParserOptionsArgs } from '../../../src'; +import { ParserOptions, Scanner, RowParser } from '../../../src/parser'; describe('RowParser', () => { const parse = (line: string, hasMoreData = false, parserOpts: ParserOptionsArgs = {}) => { diff --git a/test/parser/parser/Scanner.test.ts b/test/parser/parser/Scanner.test.ts index 605dd412..b5a1db80 100644 --- a/test/parser/parser/Scanner.test.ts +++ b/test/parser/parser/Scanner.test.ts @@ -1,7 +1,6 @@ import * as assert from 'assert'; -import { ParserOptions, ParserOptionsArgs } from '../../../src/parser'; -import { Scanner } from '../../../src/parser/parser/Scanner'; -import { MaybeToken, Token } from '../../../src/parser/parser/Token'; +import { ParserOptionsArgs } from '../../../src'; +import { ParserOptions, Scanner, Token, MaybeToken } from '../../../src/parser'; const createOptions = (opts: ParserOptionsArgs = {}) => new ParserOptions(opts); describe('Scanner', () => { diff --git a/test/parser/parser/column/ColumnParser.test.ts b/test/parser/parser/column/ColumnParser.test.ts index a00bd259..3553b32e 100644 --- a/test/parser/parser/column/ColumnParser.test.ts +++ b/test/parser/parser/column/ColumnParser.test.ts @@ -1,8 +1,6 @@ import * as assert from 'assert'; import * as sinon from 'sinon'; -import { ParserOptions } from '../../../../src/parser'; -import { ColumnParser } from '../../../../src/parser/parser/column'; -import { Scanner } from '../../../../src/parser/parser/Scanner'; +import { ParserOptions, Scanner, ColumnParser } from '../../../../src/parser'; describe('ColumnParser', () => { describe('#parse', () => { diff --git a/test/parser/parser/column/NonQuotedColumnParser.test.ts b/test/parser/parser/column/NonQuotedColumnParser.test.ts index c24e7629..19f0d6fd 100644 --- a/test/parser/parser/column/NonQuotedColumnParser.test.ts +++ b/test/parser/parser/column/NonQuotedColumnParser.test.ts @@ -1,7 +1,6 @@ import * as assert from 'assert'; -import { ParserOptions, ParserOptionsArgs } from '../../../../src/parser'; -import { NonQuotedColumnParser } from '../../../../src/parser/parser/column'; -import { Scanner } from '../../../../src/parser/parser/Scanner'; +import { ParserOptionsArgs } from '../../../../src'; +import { ParserOptions, Scanner, NonQuotedColumnParser } from '../../../../src/parser'; describe('NonQuotedColumnParser', () => { const parse = (line: string, hasMoreData = false, parserOpts: ParserOptionsArgs = {}) => { diff --git a/test/parser/parser/column/QuotedColumnParser.test.ts b/test/parser/parser/column/QuotedColumnParser.test.ts index 49e121ad..6fdc206d 100644 --- a/test/parser/parser/column/QuotedColumnParser.test.ts +++ b/test/parser/parser/column/QuotedColumnParser.test.ts @@ -1,7 +1,6 @@ import * as assert from 'assert'; -import { ParserOptions, ParserOptionsArgs } from '../../../../src/parser'; -import { QuotedColumnParser } from '../../../../src/parser/parser/column'; -import { Scanner } from '../../../../src/parser/parser/Scanner'; +import { ParserOptionsArgs } from '../../../../src'; +import { ParserOptions, QuotedColumnParser, Scanner } from '../../../../src/parser'; describe('QuotedColumnParser', () => { const parse = (line: string, hasMoreData = false, parserOpts: ParserOptionsArgs = {}) => { diff --git a/test/parser/transforms/HeaderTransformer.test.ts b/test/parser/transforms/HeaderTransformer.test.ts index 4d2341b4..e226a5b1 100644 --- a/test/parser/transforms/HeaderTransformer.test.ts +++ b/test/parser/transforms/HeaderTransformer.test.ts @@ -1,6 +1,6 @@ import * as assert from 'assert'; -import { HeaderTransformer } from '../../../src/parser/transforms'; -import { ParserOptions, ParserOptionsArgs, RowArray, RowValidationResult } from '../../../src/parser'; +import { ParserOptionsArgs, ParserRowArray as RowArray } from '../../../src'; +import { ParserOptions, RowValidationResult, HeaderTransformer } from '../../../src/parser'; describe('HeaderTransformer', () => { const createHeaderTransformer = (opts?: ParserOptionsArgs) => new HeaderTransformer(new ParserOptions(opts)); diff --git a/test/parser/transforms/RowTransformerValidator.test.ts b/test/parser/transforms/RowTransformerValidator.test.ts index a7490d14..e52a4a7b 100644 --- a/test/parser/transforms/RowTransformerValidator.test.ts +++ b/test/parser/transforms/RowTransformerValidator.test.ts @@ -1,6 +1,6 @@ import * as assert from 'assert'; -import { Row, RowArray, RowValidationResult } from '../../../src/parser'; -import { RowTransformerValidator } from '../../../src/parser/transforms'; +import { ParserRow as Row, ParserRowArray as RowArray } from '../../../src'; +import { RowValidationResult, RowTransformerValidator } from '../../../src/parser'; describe('RowTransformerValidator', () => { const createRowTransformerValidator = () => new RowTransformerValidator();