A nodejs micro-module to scan through a file and identify the positions to cleanly split the file into multiple chunks based on the line delimiter character.
npm install cleancut
var cleancut = require('cleancut');
var filename = './mockaroo_mockdata.csv';
var opts = {
maxChunks : 10, // default 2
minSize : 1048576, // default 1048576 bytes = 10 mb
scanSize : 10240, // default 10240 bytes = 10 kb
linebreak : '\n' // default '\n'
};
var results = cleancut(filename, opts);
console.log(results.splitAt);
cleancut(filename, opts, true)
.then(function(results){
console.log(results.splitAt);
});
cleancut(filename, opts,
function(err,results){
console.log(results.splitAt);
});
[ { _id: 0, start: 0, end: 6358 },
{ _id: 1, start: 6359, end: 12725 },
{ _id: 2, start: 12726, end: 19124 },
{ _id: 3, start: 19125, end: 25433 },
{ _id: 4, start: 25434, end: 31815 },
{ _id: 5, start: 31816, end: 38130 },
{ _id: 6, start: 38131, end: 44506 },
{ _id: 7, start: 44507, end: 50845 },
{ _id: 8, start: 50846, end: 57229 },
{ _id: 9, start: 57230, end: 63533 } ]
-
filename
: the source file to cut cleanly (e.g. a very big csv file) -
opts
: configuration file to define how to cut cleanlymaxChunks
: the max number of chunks to cut the file into (default: 2),minSize
: the min size each chunk must be in bytes (default: 1048576 bytes),scanSize
: the number of bytes to sample at each cut point (default: 10240 bytes),linebreak
: the line delimiter (default: '\n')
-
callback(err,results)
(optional): callback function witherr
andresults
arguments.err
: error message if anyresults
: result objectsrcfile
: the source file to be cutlinebreak
: the line delimiter for the cutsplitAt
: array of objects specifying the cut points_id
: chunk idstart
: start position in bytesend
: end position in bytes
-
return
: either aPromise<results>
orresults
as above incallback
If callback
is not defined, cleancut will be a synchronous function returning result
.
If callback is defined or true
, a Promise
will be returned.