Skip to content

Commit

Permalink
version bump 0.5.4: more options
Browse files Browse the repository at this point in the history
- cellHTML controls HTML generation
- cellFormula controls formula output
- sheetStubs now defaults to false

- cleaned up unnecessary CSV quotes (fixed #45)
- updated test_files to 20140211
- updated SSF to 0.5.7
- removed unused main function
- removed some dead code
  • Loading branch information
SheetJSDev committed Feb 12, 2014
1 parent 27af8a6 commit 7e9f218
Show file tree
Hide file tree
Showing 21 changed files with 236 additions and 297 deletions.
24 changes: 15 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,6 @@ In the browser:

## Usage

The node version installs a binary `xlsx2csv` which can read XLSX/XLSM/XLSB files and output the contents in various formats. The source is available at `xlsx2csv.njs` in the bin directory.

See <http://oss.sheetjs.com/js-xlsx/> for a browser example.

Note that older versions of IE does not support HTML5 File API, so the base64 mode is provided for testing. On OSX you can get the base64 encoding by running:

$ <target_file.xlsx base64 | pbcopy # the pbcopy puts the content in the clipboard

Simple usage (walks through every cell of every sheet and dumps the values):

var XLSX = require('xlsx')
Expand All @@ -37,6 +29,14 @@ Simple usage (walks through every cell of every sheet and dumps the values):
}
});

The node version installs a binary `xlsx2csv` which can read XLSX/XLSM/XLSB files and output the contents in various formats. The source is available at `xlsx2csv.njs` in the bin directory.

See <http://oss.sheetjs.com/js-xlsx/> for a browser example.

Note that older versions of IE does not support HTML5 File API, so the base64 mode is provided for testing. On OSX you can get the base64 encoding by running:

$ <target_file.xlsx base64 | pbcopy # the pbcopy puts the content in the clipboard

Some helper functions in `XLSX.utils` generate different views of the sheets:

- `XLSX.utils.sheet_to_csv` generates CSV
Expand Down Expand Up @@ -75,8 +75,14 @@ The exported `read` and `readFile` functions accept an options argument:

| Option Name | Default | Description |
| :---------- | ------: | :---------- |
| cellFormula | true | Save formulae to the .f field ** |
| cellHTML | true | Parse rich text and save HTML to the .h field |
| cellNF | false | Save number format string to the .z field |
| sheetStubs | true | Create cell objects for stub cells |
| sheetStubs | false | Create cell objects for stub cells |

- `cellFormula` only applies to constructing XLSB formulae. XLSX/XLSM formulae
are stored in plaintext, but XLSB formulae are stored in a binary format.
- Even if `cellNF` is false, formatted text (.w) will be generated

The defaults are enumerated in bits/84_defaults.js

Expand Down
40 changes: 22 additions & 18 deletions bits/10_ssf.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ var _strrev = function(x) { return String(x).split("").reverse().join("");};
function fill(c,l) { return new Array(l+1).join(c); }
function pad(v,d,c){var t=String(v);return t.length>=d?t:(fill(c||0,d-t.length)+t);}
function rpad(v,d,c){var t=String(v);return t.length>=d?t:(t+fill(c||0,d-t.length));}
SSF.version = '0.5.4';
SSF.version = '0.5.7';
/* Options */
var opts_fmt = {};
function fixopts(o){for(var y in opts_fmt) if(o[y]===undefined) o[y]=opts_fmt[y];}
Expand Down Expand Up @@ -147,23 +147,20 @@ var write_date = function(type, fmt, val) {
switch(type) {
case 'y': switch(fmt) { /* year */
case 'y': case 'yy': return pad(val.y % 100,2);
case 'yyy': case 'yyyy': return pad(val.y % 10000,4);
default: throw 'bad year format: ' + fmt;
default: return pad(val.y % 10000,4);
}
case 'm': switch(fmt) { /* month */
case 'm': return val.m;
case 'mm': return pad(val.m,2);
case 'mmm': return months[val.m-1][1];
case 'mmmm': return months[val.m-1][2];
case 'mmmmm': return months[val.m-1][0];
default: throw 'bad month format: ' + fmt;
default: return months[val.m-1][2];
}
case 'd': switch(fmt) { /* day */
case 'd': return val.d;
case 'dd': return pad(val.d,2);
case 'ddd': return days[val.q][0];
case 'dddd': return days[val.q][1];
default: throw 'bad day format: ' + fmt;
default: return days[val.q][1];
}
case 'h': switch(fmt) { /* 12-hour */
case 'h': return 1+(val.H+11)%12;
Expand Down Expand Up @@ -196,7 +193,6 @@ var write_date = function(type, fmt, val) {
} return fmt.length === 3 ? o : pad(o, 2);
/* TODO: handle the ECMA spec format ee -> yy */
case 'e': { return val.y; } break;
default: throw 'bad format type ' + type + ' in ' + fmt;
}
};
/*jshint +W086 */
Expand All @@ -212,14 +208,20 @@ var write_num = function(type, fmt, val) {
if(mul !== 0) return write_num(type, fmt, val * Math.pow(10,2*mul)) + fill("%",mul);
if(fmt.indexOf("E") > -1) {
var idx = fmt.indexOf("E") - fmt.indexOf(".") - 1;
if(fmt == '##0.0E+0') {
var period = fmt.length - 5;
if(fmt.match(/^#+0.0E\+0$/)) {
var period = fmt.indexOf("."); if(period === -1) period=fmt.indexOf('E');
var ee = (Number(val.toExponential(0).substr(2+(val<0))))%period;
if(ee < 0) ee += period;
o = (val/Math.pow(10,ee)).toPrecision(idx+1+(period+ee)%period);
if(!o.match(/[Ee]/)) {
var fakee = (Number(val.toExponential(0).substr(2+(val<0))));
if(o.indexOf(".") === -1) o = o[0] + "." + o.substr(1) + "E+" + (fakee - o.length+ee);
else throw "missing E |" + o;
else o += "E+" + (fakee - ee);
while(o.substr(0,2) === "0.") {
o = o[0] + o.substr(2,period) + "." + o.substr(2+period);
o = o.replace(/^0+([1-9])/,"$1").replace(/^0+\./,"0.");
}
o = o.replace(/\+-/,"-");
}
o = o.replace(/^([+-]?)([0-9]*)\.([0-9]*)[Ee]/,function($$,$1,$2,$3) { return $1 + $2 + $3.substr(0,(period+ee)%period) + "." + $3.substr(ee) + "E"; });
} else o = val.toExponential(idx);
Expand All @@ -234,6 +236,7 @@ var write_num = function(type, fmt, val) {
var myn = (rnd - base*den), myd = den;
return sign + (base?base:"") + " " + (myn === 0 ? fill(" ", r[1].length + 1 + r[4].length) : pad(myn,r[1].length," ") + r[2] + "/" + r[3] + pad(myd,r[4].length));
}
if(fmt.match(/^#+0+$/)) fmt = fmt.replace(/#/g,"");
if(fmt.match(/^00+$/)) return (val<0?"-":"")+pad(Math.round(aval),fmt.length);
if(fmt.match(/^[#?]+$/)) return String(Math.round(val)).replace(/^0$/,"");
if((r = fmt.match(/^#*0+\.(0+)/))) {
Expand Down Expand Up @@ -323,7 +326,8 @@ function eval_fmt(fmt, v, opts, flen) {
out.push(q); lst = c; break;
case '[': /* TODO: Fix this -- ignore all conditionals and formatting */
o = c;
while(fmt[i++] !== ']') o += fmt[i];
while(fmt[i++] !== ']' && i < fmt.length) o += fmt[i];
if(o.substr(-1) !== ']') throw 'unterminated "[" block: |' + o + '|';
if(o.match(/\[[HhMmSs]*\]/)) {
if(!dt) dt = parse_date_code(v, opts);
if(!dt) return "";
Expand All @@ -344,7 +348,7 @@ function eval_fmt(fmt, v, opts, flen) {
out.push({t:'D', v:o}); break;
case ' ': out.push({t:c,v:c}); ++i; break;
default:
if(",$-+/():!^&'~{}<>=".indexOf(c) === -1)
if(",$-+/():!^&'~{}<>=".indexOf(c) === -1)
throw 'unrecognized character ' + fmt[i] + ' in ' + fmt;
out.push({t:'t', v:c}); ++i; break;
}
Expand All @@ -362,21 +366,20 @@ function eval_fmt(fmt, v, opts, flen) {
/* replace fields */
for(i=0; i < out.length; ++i) {
switch(out[i].t) {
case 't': case 'T': case ' ': break;
case 'd': case 'm': case 'y': case 'h': case 'H': case 'M': case 's': case 'A': case 'e': case 'Z':
case 't': case 'T': case ' ': case 'D': break;
case 'd': case 'm': case 'y': case 'h': case 'H': case 'M': case 's': case 'e': case 'Z':
out[i].v = write_date(out[i].t, out[i].v, dt);
out[i].t = 't'; break;
case 'n': case '(': case '?':
var jj = i+1;
while(out[jj] && ("?D".indexOf(out[jj].t) > -1 || (" t".indexOf(out[jj].t) > -1 && "?t".indexOf((out[jj+1]||{}).t)>-1 && (out[jj+1].t == '?' || out[jj+1].v == '/')) || out[i].t == '(' && (out[jj].t == ')' || out[jj].t == 'n') || out[jj].t == 't' && (out[jj].v == '/' || out[jj].v == '$' || (out[jj].v == ' ' && (out[jj+1]||{}).t == '?')))) {
while(out[jj] && ("?D".indexOf(out[jj].t) > -1 || (" t".indexOf(out[jj].t) > -1 && "?t".indexOf((out[jj+1]||{}).t)>-1 && (out[jj+1].t == '?' || out[jj+1].v == '/')) || out[i].t == '(' && (out[jj].t == ')' || out[jj].t == 'n') || out[jj].t == 't' && (out[jj].v == '/' || '$€'.indexOf(out[jj].v) > -1 || (out[jj].v == ' ' && (out[jj+1]||{}).t == '?')))) {
out[i].v += out[jj].v;
delete out[jj]; ++jj;
}
out[i].v = write_num(out[i].t, out[i].v, v);
out[i].v = write_num(out[i].t, out[i].v, (flen >1 && v < 0 && i>0 && out[i-1].v == "-" ? -v:v));
out[i].t = 't';
i = jj-1; break;
case 'G': out[i].t = 't'; out[i].v = general_fmt(v,opts); break;
default: console.error(out); throw "unrecognized type " + out[i].t;
}
}
return out.map(function(x){return x.v;}).join("");
Expand All @@ -386,6 +389,7 @@ function choose_fmt(fmt, v, o) {
if(typeof fmt === 'number') fmt = ((o&&o.table) ? o.table : table_fmt)[fmt];
if(typeof fmt === "string") fmt = split_fmt(fmt);
var l = fmt.length;
if(l<4 && fmt[l-1].indexOf("@")>-1) --l;
switch(fmt.length) {
case 1: fmt = fmt[0].indexOf("@")>-1 ? ["General", "General", "General", fmt[0]] : [fmt[0], fmt[0], fmt[0], "@"]; break;
case 2: fmt = fmt[1].indexOf("@")>-1 ? [fmt[0], fmt[0], fmt[0], fmt[1]] : [fmt[0], fmt[1], fmt[0], "@"]; break;
Expand Down
2 changes: 1 addition & 1 deletion bits/31_version.js
Original file line number Diff line number Diff line change
@@ -1 +1 @@
XLSX.version = '0.5.3';
XLSX.version = '0.5.4';
68 changes: 0 additions & 68 deletions bits/37_xlsbutils.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,40 +16,9 @@ function readIEEE754(buf, idx, isLE, nl, ml) {
return (s ? -1 : 1) * m * Math.pow(2, e - ml);
}

function s2a(s) {
if(typeof Buffer !== 'undefined') return new Buffer(s, "binary");
var w = s.split("").map(function(x){ return x.charCodeAt(0) & 0xff; });
return w;
}

var __toBuffer;
if(typeof Buffer !== "undefined") {
Buffer.prototype.hexlify= function() { return this.toString('hex'); };
Buffer.prototype.utf16le= function(s,e){return this.toString('utf16le',s,e).replace(/\u0000/,'').replace(/[\u0001-\u0006]/,'!');};
Buffer.prototype.utf8 = function(s,e) { return this.toString('utf8',s,e); };
Buffer.prototype.lpstr = function(i) { var len = this.readUInt32LE(i); return len > 0 ? this.utf8(i+4,i+4+len-1) : "";};
Buffer.prototype.lpwstr = function(i) { var len = 2*this.readUInt32LE(i); return this.utf8(i+4,i+4+len-1);};
if(typeof cptable !== "undefined") Buffer.prototype.lpstr = function(i) {
var len = this.readUInt32LE(i);
if(len === 0) return "";
if(typeof current_cptable === "undefined") return this.utf8(i+4,i+4+len-1);
var t = Array(this.slice(i+4,i+4+len-1));
//1console.log("start", this.l, len, t);
var c, j = i+4, o = "", cc;
for(;j!=i+4+len;++j) {
c = this.readUInt8(j);
cc = current_cptable.dec[c];
if(typeof cc === 'undefined') {
c = c*256 + this.readUInt8(++j);
cc = current_cptable.dec[c];
}
if(typeof cc === 'undefined') throw "Unrecognized character " + c.toString(16);
if(c === 0) break;
o += cc;
//1console.log(cc, cc.charCodeAt(0), o, this.l);
}
return o;
};
__toBuffer = function(bufs) { return Buffer.concat(bufs[0]); };
} else {
__toBuffer = function(bufs) {
Expand All @@ -66,16 +35,6 @@ var __readUInt32LE = function(b, idx) { return b.readUInt32LE ? b.readUInt32LE(i
var __readInt32LE = function(b, idx) { if(b.readInt32LE) return b.readInt32LE(idx); var u = __readUInt32LE(b,idx); if(!(u & 0x80000000)) return u; return (0xffffffff - u + 1) * -1; };
var __readDoubleLE = function(b, idx) { return b.readDoubleLE ? b.readDoubleLE(idx) : readIEEE754(b, idx||0);};

var __hexlify = function(b) { return b.map(function(x){return (x<16?"0":"") + x.toString(16);}).join(""); };

var __utf16le = function(b,s,e) { if(b.utf16le) return b.utf16le(s,e); var str = ""; for(var i=s; i<e; i+=2) str += String.fromCharCode(__readUInt16LE(b,i)); return str.replace(/\u0000/,'').replace(/[\u0001-\u0006]/,'!'); };

var __utf8 = function(b,s,e) { if(b.utf8) return b.utf8(s,e); var str = ""; for(var i=s; i<e; i++) str += String.fromCharCode(__readUInt8(b,i)); return str; };

var __lpstr = function(b,i) { if(b.lpstr) return b.lpstr(i); var len = __readUInt32LE(b,i); return len > 0 ? __utf8(b, i+4,i+4+len-1) : "";};
var __lpwstr = function(b,i) { if(b.lpwstr) return b.lpwstr(i); var len = 2*__readUInt32LE(b,i); return __utf8(b, i+4,i+4+len-1);};

function bconcat(bufs) { return (typeof Buffer !== 'undefined') ? Buffer.concat(bufs) : [].concat.apply([], bufs); }

function ReadShift(size, t) {
var o, w, vv, i, loc; t = t || 'u';
Expand All @@ -88,46 +47,19 @@ function ReadShift(size, t) {
/* falls through */
case 16: o = this.toString('hex', this.l,this.l+size); break;

case 'utf8': size = t; o = __utf8(this, this.l, this.l + size); break;
case 'utf16le': size=2*t; o = __utf16le(this, this.l, this.l + size); break;

/* [MS-OLEDS] 2.1.4 LengthPrefixedAnsiString */
case 'lpstr': o = __lpstr(this, this.l); size = 5 + o.length; break;

case 'lpwstr': o = __lpwstr(this, this.l); size = 5 + o.length; if(o[o.length-1] == '\u0000') size += 2; break;

/* sbcs and dbcs support continue records in the SST way TODO codepages */
/* TODO: DBCS http://msdn.microsoft.com/en-us/library/cc194788.aspx */
case 'dbcs': size = 2*t; o = ""; loc = this.l;
for(i = 0; i != t; ++i) {
if(this.lens && this.lens.indexOf(loc) !== -1) {
w = __readUInt8(this, loc);
this.l = loc + 1;
vv = ReadShift.call(this, w ? 'dbcs' : 'sbcs', t-i);
return o + vv;
}
o += _getchar(__readUInt16LE(this, loc));
loc+=2;
} break;

case 'sbcs': size = t; o = ""; loc = this.l;
for(i = 0; i != t; ++i) {
if(this.lens && this.lens.indexOf(loc) !== -1) {
w = __readUInt8(this, loc);
this.l = loc + 1;
vv = ReadShift.call(this, w ? 'dbcs' : 'sbcs', t-i);
return o + vv;
}
o += _getchar(__readUInt8(this, loc));
loc+=1;
} break;

case 'cstr': size = 0; o = "";
while((w=__readUInt8(this, this.l + size++))!==0) o+= _getchar(w);
break;
case 'wstr': size = 0; o = "";
while((w=__readUInt16LE(this,this.l +size))!==0){o+= _getchar(w);size+=2;}
size+=2; break;
}
this.l+=size; return o;
}
Expand Down
3 changes: 2 additions & 1 deletion bits/38_recordhopper.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ var recordhopper = function(data, cb) {
var RT = data.read_shift(1);
if(RT & 0x80) RT = (RT & 0x7F) + ((data.read_shift(1) & 0x7F)<<7);
var R = RecordEnum[RT] || RecordEnum[0xFFFF];
length = tmpbyte = data.read_shift(1);
tmpbyte = data.read_shift(1);
length = tmpbyte & 0x7F;
for(cntbyte = 1; cntbyte <4 && (tmpbyte & 0x80); ++cntbyte) length += ((tmpbyte = data.read_shift(1)) & 0x7F)<<(7*cntbyte);
var d = R.f(data, length);
if(cb(d, R, RT)) return;
Expand Down
8 changes: 8 additions & 0 deletions bits/39_parsestructs.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@

/* [MS-XLSB] 2.5.143 */
var parse_StrRun = function(data, length) {
return { ich: data.read_shift(2), ifnt: data.read_shift(2) };
}

/* [MS-XLSB] 2.1.7.121 */
var parse_RichStr = function(data, length) {
var start = data.l;
var flags = data.read_shift(1);
var fRichStr = flags & 1, fExtStr = flags & 2;
var str = parse_XLWideString(data);
var rgsStrRun = [];
var z = {
t: str,
r:"<t>" + escapexml(str) + "</t>",
Expand All @@ -12,6 +19,7 @@ var parse_RichStr = function(data, length) {
if(fRichStr) {
/* TODO: formatted string */
var dwSizeStrRun = data.read_shift(4);
for(var i = 0; i != dwSizeStrRun; ++i) rgsStrRun.push(parse_StrRun(data));
}
if(fExtStr) {
/* TODO: phonetic string */
Expand Down
11 changes: 6 additions & 5 deletions bits/52_sstxml.js
Original file line number Diff line number Diff line change
Expand Up @@ -105,35 +105,36 @@ var parse_rs = (function() {
})();

/* 18.4.8 si CT_Rst */
var parse_si = function(x) {
var parse_si = function(x, opts) {
var html = opts ? opts.cellHTML : true;
var z = {};
if(!x) return null;
var y;
/* 18.4.12 t ST_Xstring (Plaintext String) */
if(x[1] === 't') {
z.t = utf8read(unescapexml(x.substr(x.indexOf(">")+1).split(/<\/t>/)[0]));
z.r = x;
z.h = z.t;
if(html) z.h = z.t;
}
/* 18.4.4 r CT_RElt (Rich Text Run) */
else if((y = x.match(/<r>/))) {
z.r = x;
/* TODO: properly parse (note: no other valid child can have body text) */
z.t = utf8read(unescapexml(x.replace(/<[^>]*>/gm,"")));
z.h = parse_rs(x);
if(html) z.h = parse_rs(x);
}
/* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */
/* 18.4.6 rPh CT_PhoneticRun (TODO: needed for Asian support) */
return z;
};

/* 18.4 Shared String Table */
var parse_sst_xml = function(data) {
var parse_sst_xml = function(data, opts) {
var s = [];
/* 18.4.9 sst CT_Sst */
var sst = data.match(new RegExp("<sst([^>]*)>([\\s\\S]*)<\/sst>","m"));
if(isval(sst)) {
s = sst[2].replace(/<(?:si|sstItem)>/g,"").split(/<\/(?:si|sstItem)>/).map(parse_si).filter(function(x) { return x; });
s = sst[2].replace(/<(?:si|sstItem)>/g,"").split(/<\/(?:si|sstItem)>/).map(function(x) { return parse_si(x, opts); }).filter(function(x) { return x; });
sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount;
}
return s;
Expand Down
4 changes: 0 additions & 4 deletions bits/54_sst.js

This file was deleted.

0 comments on commit 7e9f218

Please sign in to comment.