New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
sheet_to_json use lots of memory #764
Comments
@smallst can you share a sample file? |
thanks for replying. this is a sample file. |
The root cause of the issue is a bad reported sheet size: If you unzip the file (XLSX is really a zip format) and look at the xml in <dimension ref="A1:W1048576"/>
<row r="1048576" customFormat="false" ht="12.8" hidden="false" customHeight="false" outlineLevel="0" collapsed="false"/> The first line says that the file has over 1M records and the second line is completely unnecessary (guessing it was exported from a third party tool?) Can you try overwriting the range using the function update_sheet_range(ws) {
var range = {s:{r:20000000, c:20000000},e:{r:0,c:0}};
Object.keys(ws).filter(function(x) { return x.charAt(0) != "!"; }).map(XLSX.utils.decode_cell).forEach(function(x) {
range.s.c = Math.min(range.s.c, x.c); range.s.r = Math.min(range.s.r, x.r);
range.e.c = Math.max(range.e.c, x.c); range.e.r = Math.max(range.e.r, x.r);
});
ws['!ref'] = XLSX.utils.encode_range(range);
} Call that function on the worksheet just before passing it to |
how foolish i am! i edit the xlsx with libreoffice in linux, i nerver consider about file format problems. |
@smallst some third party tools try to cut corners :( But this is really not your fault -- we came across a V8 / nodejs bug! nodejs/node#14652 |
I understand that the bug is from nodejs/v8 but is there some workaround ? I tried using the update_sheet_range function before sheet_to_csv but without success. I get the high memory bug in all recent nodejs versions (latest 8.x 10.x and 11.x). A 40mb xls files goes over the heap size limit with max-old-space-size=1024. |
It seems to be beter by using type Map; (node verison v14.9.0) const process = require('process')
console.log('before', process.memoryUsage())
console.time('test')
// const o = {}
const o = new Map();
for (let R = 1; R <= 1048575; ++R) {
for (let C = 0; C <= 22; ++C) {
// o['!' + C + R];
o.get('!' + C + R)
}
}
console.timeEnd('test')
console.log('after', process.memoryUsage()) this may be helpful: function sheet_to_json(_sheet, opts) {
const sheet = new Map(Object.entries(_sheet))
if(sheet == null || sheet.get("!ref") == null) return [];
var val = {t:'n',v:0}, header = 0, offset = 1, hdr = [], v=0, vv="";
var r = {s:{r:0,c:0},e:{r:0,c:0}};
var o = opts || {};
var range = o.range != null ? o.range : sheet.get("!ref");
if(o.header === 1) header = 1;
else if(o.header === "A") header = 2;
else if(Array.isArray(o.header)) header = 3;
else if(o.header == null) header = 0;
switch(typeof range) {
case 'string': r = safe_decode_range(range); break;
case 'number': r = safe_decode_range(sheet.get("!ref")); r.s.r = range; break;
default: r = range;
}
if(header > 0) offset = 0;
var rr = encode_row(r.s.r);
var cols = [];
var out = [];
var outi = 0, counter = 0;
var dense = Array.isArray(sheet);
var R = r.s.r, C = 0, CC = 0;
if(dense && !sheet.get(R)) sheet.set(R, []);
for(C = r.s.c; C <= r.e.c; ++C) {
cols[C] = encode_col(C);
val = dense ? sheet.get(R)[C] : sheet.get(cols[C] + rr);
switch(header) {
case 1: hdr[C] = C - r.s.c; break;
case 2: hdr[C] = cols[C]; break;
case 3: hdr[C] = o.header[C - r.s.c]; break;
default:
if(val == null) val = {w: "__EMPTY", t: "s"};
vv = v = format_cell(val, null, o);
counter = 0;
for(CC = 0; CC < hdr.length; ++CC) if(hdr[CC] == vv) vv = v + "_" + (++counter);
hdr[C] = vv;
}
}
for (R = r.s.r + offset; R <= r.e.r; ++R) {
var row = make_json_row(sheet, r, R, cols, header, hdr, dense, o);
if((row.isempty === false) || (header === 1 ? o.blankrows !== false : !!o.blankrows)) out[outi++] = row.row;
}
out.length = outi;
return out;
}
function make_json_row(sheet, r, R, cols, header, hdr, dense, o) {
var rr = encode_row(R);
var defval = o.defval, raw = o.raw || !Object.prototype.hasOwnProperty.call(o, "raw");
var isempty = true;
var row = (header === 1) ? [] : {};
if(header !== 1) {
if(Object.defineProperty) try { Object.defineProperty(row, '__rowNum__', {value:R, enumerable:false}); } catch(e) { row.__rowNum__ = R; }
else row.__rowNum__ = R;
}
if(!dense || sheet.get(R)) for (var C = r.s.c; C <= r.e.c; ++C) {
var val = dense ? sheet.get(R)[C] : sheet.get(cols[C] + rr);
if(val === undefined || val.t === undefined) {
if(defval === undefined) continue;
if(hdr[C] != null) { row[hdr[C]] = defval; }
continue;
}
var v = val.v;
switch(val.t){
case 'z': if(v == null) break; continue;
case 'e': v = void 0; break;
case 's': case 'd': case 'b': case 'n': break;
default: throw new Error('unrecognized type ' + val.t);
}
if(hdr[C] != null) {
if(v == null) {
if(defval !== undefined) row[hdr[C]] = defval;
else if(raw && v === null) row[hdr[C]] = null;
else continue;
} else {
row[hdr[C]] = raw || (o.rawNumbers && val.t == "n") ? v : format_cell(val,v,o);
}
if(v != null) isempty = false;
}
}
return { row: row, isempty: isempty };
} reference link: |
i upload a 11 kb xlsx file and want to convert it to json in my server. my server only has 512mb memory, and i got node process killed when execute
sheet_to_json
method. the scripts works well in my pc which has 8gb memory.is
sheet_to_json
function use a lot of memory which is much bigger than file itself ? is there any tips to solve memory problems ? thanksThe text was updated successfully, but these errors were encountered: