Skip to content

Commit

Permalink
version bump 0.6.8: first options
Browse files Browse the repository at this point in the history
- preparing for options (paralleling js-xlsx)
- cellNF option
- all relevant cell types emit formats (dead code to be removed later)
- test structure converging to js-xlsx
- miscellaneous cleanup
  • Loading branch information
SheetJSDev committed Feb 16, 2014
1 parent d00488f commit ebeadff
Show file tree
Hide file tree
Showing 12 changed files with 196 additions and 66 deletions.
1 change: 1 addition & 0 deletions .gitignore
@@ -1 +1,2 @@
node_modules
misc/coverage.html
29 changes: 21 additions & 8 deletions README.md
Expand Up @@ -26,7 +26,7 @@ See <http://oss.sheetjs.com/js-xls/> for a browser example.

Some helper functions in `XLS.utils` generate different views of the sheets:

- `XLS.utils.sheet_to_csv` generates CSV
- `XLS.utils.sheet_to_csv` generates CSV
- `XLS.utils.sheet_to_row_object_array` interprets sheets as tables with a header column and generates an array of objects
- `XLS.utils.get_formulae` generates a list of formulae

Expand All @@ -36,21 +36,33 @@ For more details:
- `index.html` is the live demo
- `bits/80_xls.js` contains the logic for generating CSV and JSON from sheets

## Cell Object Description
## Cell Object Description

`.SheetNames` is an ordered list of the sheets in the workbook

`.Sheets[sheetname]` returns a data structure representing the sheet

`.Sheets[sheetname]` returns a data structure representing the sheet. Each key
that does not start with `!` corresponds to a cell (using `A-1` notation).

`.Sheets[sheetname][address]` returns the specified cell:

- `.v` : the raw value of the cell
- `.w` : the formatted text of the cell (if applicable)
- `.t` : the type of the cell (constrained to the enumeration `ST_CellType` as documented in page 4215 of ISO/IEC 29500-1:2012(E) )
- `.f` : the formula of the cell (if applicable)
- `.z` : the number format string associated with the cell (if requested)

For dates, `.v` holds the raw date code from the sheet and `.w` holds the text

## Options

The exported `read` and `readFile` functions accept an options argument:

| Option Name | Default | Description |
| :---------- | ------: | :---------- |
| cellNF | false | Save number format string to the .z field |

- Even if `cellNF` is false, formatted text (.w) will be generated

## Other Notes

`CFB` refers to the Microsoft Compound File Binary Format, a container format for XLS as well as DOC and other pre-OOXML data formats.
Expand All @@ -67,15 +79,15 @@ Tests utilize the mocha testing framework. Travis-CI and Sauce Labs links:

- <https://travis-ci.org/SheetJS/js-xls> for XLS module in node
- <https://travis-ci.org/SheetJS/SheetJS.github.io> for XLS* modules
- <https://saucelabs.com/u/sheetjs> for XLS* modules using Sauce Labs
- <https://saucelabs.com/u/sheetjs> for XLS* modules using Sauce Labs

## Contributing

Due to the precarious nature of the Open Specifications Promise, it is very important to ensure code is cleanroom. Consult CONTRIBUTING.md
Due to the precarious nature of the Open Specifications Promise, it is very important to ensure code is cleanroom. Consult CONTRIBUTING.md

## XLSX/XLSM/XLSB Support

XLSX/XLSM/XLSB support is available in [js-xlsx](https://github.com/SheetJS/js-xlsx).
XLSX/XLSM/XLSB is available in [js-xlsx](https://github.com/SheetJS/js-xlsx).

## License

Expand All @@ -95,7 +107,8 @@ OSP-covered specifications:
- [MS-OLEDS]: Object Linking and Embedding (OLE) Data Structures
- [MS-OLEPS]: Object Linking and Embedding (OLE) Property Set Data Structures
- [MS-OSHARED]: Office Common Data Types and Objects Structures
- [MS-OVBA]: Office VBA File Format Structure
- [MS-OVBA]: Office VBA File Format Structure
- [MS-OE376]: Office Implementation Information for ECMA-376 Standards Support
- [XLS]: Microsoft Office Excel 97-2007 Binary File Format Specification

Certain features are shared with the Office Open XML File Formats, covered in:
Expand Down
12 changes: 8 additions & 4 deletions bin/xls2csv.njs
Expand Up @@ -44,12 +44,16 @@ if(!fs.existsSync(filename)) {
process.exit(2);
}

if(program.dev) X.verbose = 2;
var opts = {}, wb;
if(program.listSheets) opts.bookSheets = true;

var wb;
if(program.dev) wb = X.readFile(filename);
if(program.dev) {
X.verbose = 2;
opts.WTF = true;
wb = X.readFile(filename, opts);
}
else try {
wb = X.readFile(filename);
wb = X.readFile(filename, opts);
} catch(e) {
var msg = (program.quiet) ? "" : n + "2csv: error parsing ";
msg += filename + ": " + e;
Expand Down
2 changes: 1 addition & 1 deletion bits/01_version.js
@@ -1 +1 @@
XLS.version = '0.6.7';
XLS.version = '0.6.8';
9 changes: 1 addition & 8 deletions bits/08_blob.js
@@ -1,5 +1,5 @@
function readIEEE754(buf, idx, isLE, nl, ml) {
if(isLE === undefined) isLE = true;
if(typeof isLE === 'undefined') isLE = true;
if(!nl) nl = 8;
if(!ml && nl === 8) ml = 52;
var e, m, el = nl * 8 - ml - 1, eMax = (1 << el) - 1, eBias = eMax >> 1;
Expand Down Expand Up @@ -179,13 +179,6 @@ function CheckField(hexstr, fld) {
this.l += hexstr.length/2;
}

function WarnField(hexstr, fld) {
var b = this.slice(this.l, this.l+hexstr.length/2);
var m = b.hexlify ? b.hexlify() : __hexlify(b);
if(m !== hexstr) console.error((fld||"") + 'Expected ' + hexstr +' saw ' + m);
this.l += hexstr.length/2;
}

function prep_blob(blob, pos) {
blob.read_shift = ReadShift.bind(blob);
blob.chk = CheckField;
Expand Down
2 changes: 0 additions & 2 deletions bits/18_cfb.js
Expand Up @@ -26,7 +26,6 @@ var fat_addrs = []; // locations of FAT sectors
var blob = file.slice(0,512);
prep_blob(blob);
var read = ReadShift.bind(blob), chk = CheckField.bind(blob);
//var wrn = WarnField.bind(blob);
var j = 0, q;

// header signature 8
Expand All @@ -36,7 +35,6 @@ chk(HEADER_SIGNATURE, 'Header Signature: ');
chk(HEADER_CLSID, 'CLSID: ');

// minor version 2
//wrn(HEADER_MINOR_VERSION, 'Minor Version: ');
read(2);

// major version 3
Expand Down
61 changes: 48 additions & 13 deletions bits/80_xls.js
Expand Up @@ -26,7 +26,7 @@ function parse_compobj(obj) {
}


function parse_xlscfb(cfb) {
function parse_xlscfb(cfb, options) {
reset_cp();
var CompObj = cfb.find('!CompObj');
var Summary = cfb.find('!SummaryInformation');
Expand Down Expand Up @@ -56,7 +56,7 @@ function slurp(R, blob, length, opts) {
}

// 2.3.2
function parse_workbook(blob) {
function parse_workbook(blob, options) {
var wb = {opts:{}};
var Sheets = {};
var out = [];
Expand Down Expand Up @@ -265,40 +265,63 @@ function parse_workbook(blob) {
temp_val = {ixfe: val.ixfe, XF: XFs[val.ixfe], v:val.val, t:'n'};
if(temp_val.XF) try {
temp_val.w=SSF.format(temp_val.XF.ifmt||0, temp_val.v);
} catch(e) { }
if(opts.cellNF) temp_val.z = SSF._table[temp_val.XF.ifmt||0];
} catch(e) { if(opts.WTF) throw e; }
addline({c:val.c, r:val.r}, temp_val);
} break;
case 'BoolErr': {
temp_val = {ixfe: val.ixfe, XF: XFs[val.ixfe], v:val.val, t:val.t};
if(temp_val.XF) try {
temp_val.w=SSF.format(temp_val.XF.ifmt||0, temp_val.v);
} catch(e) { }
if(opts.cellNF) temp_val.z = SSF._table[temp_val.XF.ifmt||0];
} catch(e) { if(opts.WTF) throw e; }
addline({c:val.c, r:val.r}, temp_val);
} break;
case 'RK': {
temp_val = {ixfe: val.ixfe, XF: XFs[val.ixfe], v:val.rknum, t:'n'};
if(temp_val.XF) try {
temp_val.w=SSF.format(temp_val.XF.ifmt||0, temp_val.v);
} catch(e) { }
if(opts.cellNF) temp_val.z = SSF._table[temp_val.XF.ifmt||0];
} catch(e) { if(opts.WTF) throw e; }
addline({c:val.c, r:val.r}, temp_val);
} break;
case 'MulRk': {
for(var j = val.c; j <= val.C; ++j) {
var ixfe = val.rkrec[j-val.c][0];
addline({c:j, r:val.r}, {ixfe: ixfe, XF: XFs[ixfe], v:val.rkrec[j-val.c][1], t:'n'});
temp_val= {ixfe:ixfe, XF:XFs[ixfe], v:val.rkrec[j-val.c][1], t:'n'};
if(temp_val.XF) try {
temp_val.w=SSF.format(temp_val.XF.ifmt||0, temp_val.v);
if(opts.cellNF) temp_val.z = SSF._table[temp_val.XF.ifmt||0];
} catch(e) { if(opts.WTF) throw e; }
addline({c:j, r:val.r}, temp_val);
}
} break;
case 'Formula': {
switch(val.val) {
case 'String': last_formula = val; break;
case 'Array Formula': throw "Array Formula unsupported";
default: addline(val.cell, {v:val.val, f:stringify_formula(val.formula, range, val.cell, supbooks), ixfe: val.cell.ixfe, XF:XFs[val.cell.ixfe], t:'n'}); // TODO: infer type from formula
default: // TODO: infer type from formula
temp_val = {v:val.val, ixfe:val.cell.ixfe, t:'n'};
temp_val.XF = XFs[temp_val.ixfe];
temp_val.f = stringify_formula(val.formula,range,val.cell,supbooks);
if(temp_val.XF) try {
temp_val.w=SSF.format(temp_val.XF.ifmt||0, temp_val.v);
if(opts.cellNF) temp_val.z = SSF._table[temp_val.XF.ifmt||0];
} catch(e) { if(opts.WTF) throw e; }
addline(val.cell, temp_val);
}
} break;
case 'String': {
if(last_formula) {
last_formula.val = val;
addline(last_formula.cell, {v:last_formula.val, f:stringify_formula(last_formula.formula, range, last_formula.cell, supbooks), ixfe: last_formula.cell.ixfe, t:'s'});
temp_val = {v:last_formula.val, ixfe:last_formula.cell.ixfe, t:'s'};
temp_val.XF = XFs[temp_val.ixfe];
temp_val.f = stringify_formula(last_formula.formula, range, last_formula.cell, supbooks);
if(temp_val.XF) try {
temp_val.w=SSF.format(temp_val.XF.ifmt||0, temp_val.v);
if(opts.cellNF) temp_val.z = SSF._table[temp_val.XF.ifmt||0];
} catch(e) { if(opts.WTF) throw e; }
addline(last_formula.cell, temp_val);
last_formula = null;
}
} break;
Expand All @@ -310,11 +333,22 @@ function parse_workbook(blob) {
shared_formulae[last_cell] = val[0];
} break;
case 'LabelSst': {
addline({c:val.c, r:val.r}, {v:sst[val.isst].t, ixfe:val.ixfe, t:'s'});
temp_val={v:sst[val.isst].t, ixfe:val.ixfe, t:'s'};
temp_val.XF = XFs[temp_val.ixfe];
if(temp_val.XF) try {
temp_val.w=SSF.format(temp_val.XF.ifmt||0, temp_val.v);
if(opts.cellNF) temp_val.z = SSF._table[temp_val.XF.ifmt||0];
} catch(e) { if(opts.WTF) throw e; }
addline({c:val.c, r:val.r}, temp_val);
} break;
case 'Label': {
/* Some writers erroneously write Label */
addline({c:val.c, r:val.r}, {v:val.val, ixfe:val.ixfe, t:'s'});
temp_val = {v:val.val, ixfe:val.ixfe, XF:XFs[val.ixfe], t:'s'};
if(temp_val.XF) try {
temp_val.w=SSF.format(temp_val.XF.ifmt||0, temp_val.v);
if(opts.cellNF) temp_val.z = SSF._table[temp_val.XF.ifmt||0];
} catch(e) { if(opts.WTF) throw e; }
addline({c:val.c, r:val.r}, temp_val);
} break;
case 'Dimensions': {
range = val;
Expand Down Expand Up @@ -480,7 +514,7 @@ function parse_workbook(blob) {
if(opts.enc) wb.Encryption = opts.enc;
return wb;
}
if(Workbook) WorkbookP = parse_workbook(Workbook.content);
if(Workbook) WorkbookP = parse_workbook(Workbook.content, options);
else throw new Error("Cannot find Workbook stream");
if(CompObj) CompObjP = parse_compobj(CompObj);

Expand Down Expand Up @@ -563,6 +597,7 @@ function get_formulae(ws) {
var x = ws[y];
var val = "";
if(x.f) val = x.f;
else if(typeof x.w !== 'undefined') val = "'" + x.w;
else if(typeof x.v === 'number') val = x.v;
else val = x.v;
cmds.push(y + "=" + val);
Expand All @@ -588,6 +623,6 @@ var utils = {
};

function xlsread(f, options) {
return parse_xlscfb(CFB.read(f, options));
return parse_xlscfb(CFB.read(f, options), options);
}
var readFile = function(f) { return parse_xlscfb(CFB.read(f, {type:'file'})); };
var readFile = function(f,o){return parse_xlscfb(CFB.read(f,{type:'file'}),o);};
2 changes: 1 addition & 1 deletion package.json
@@ -1,6 +1,6 @@
{
"name": "xlsjs",
"version": "0.6.7",
"version": "0.6.8",
"author": "sheetjs",
"description": "(one day) a full-featured XLS parser and writer. For now, primitive parser",
"keywords": [ "xls", "office", "excel", "spreadsheet" ],
Expand Down
66 changes: 62 additions & 4 deletions test.js
Expand Up @@ -3,6 +3,9 @@ var XLS;
var fs = require('fs'), assert = require('assert');
describe('source',function(){ it('should load', function(){ XLS = require('./'); });});

var opts = {};
if(process.env.WTF) opts.WTF = true;

var files = (fs.existsSync('tests.lst') ? fs.readFileSync('tests.lst', 'utf-8').split("\n") : fs.readdirSync('test_files')).filter(function(x){return x.substr(-4)==".xls" || x.substr(-8)==".xls.b64";});

/* Excel enforces 31 character sheet limit, although technical file limit is 255 */
Expand Down Expand Up @@ -64,17 +67,72 @@ function parsetest(x, wb) {
describe('should parse test files', function() {
files.forEach(function(x) {
it(x, x.substr(-8) == ".pending" ? null : function() {
var wb = x.substr(-4) == ".b64" ? XLS.read(fs.readFileSync(dir + x, 'utf8'), {type: 'base64'}) : XLS.readFile(dir + x);
var wb = x.substr(-4) == ".b64" ? XLS.read(fs.readFileSync(dir + x, 'utf8'), {type: 'base64'}) : XLS.readFile(dir + x, opts);
if(x.substr(-4) === ".xls") parsetest(x, wb);
});
});
});

describe('other features', function() {
describe('options', function() {
before(function() {
XLS = require('./');
});
describe('cell', function() {
it('should generate formulae by default', function() {
var wb = XLS.readFile(dir + 'formula_stress_test.xls');
var found = false;
wb.SheetNames.forEach(function(s) {
var ws = wb.Sheets[s];
Object.keys(ws).forEach(function(addr) {
if(addr[0] === "!" || !ws.hasOwnProperty(addr)) return;
if(typeof ws[addr].f !== 'undefined') return found = true;
});
});
assert(found);
});
it('should not generate number formats by default', function() {
var wb = XLS.readFile(dir+'number_format.xls');
wb.SheetNames.forEach(function(s) {
var ws = wb.Sheets[s];
Object.keys(ws).forEach(function(addr) {
if(addr[0] === "!" || !ws.hasOwnProperty(addr)) return;
assert(typeof ws[addr].z === 'undefined');
});
});
});
it('should generate number formats when requested', function() {
var wb = XLS.readFile(dir+'number_format.xls', {cellNF: true});
wb.SheetNames.forEach(function(s) {
var ws = wb.Sheets[s];
Object.keys(ws).forEach(function(addr) {
if(addr[0] === "!" || !ws.hasOwnProperty(addr)) return;
assert(typeof ws[addr].t!== 'n' || typeof ws[addr].z !== 'undefined');
});
});
});
});
});

describe('input formats', function() {
it('should read binary strings', function() {
XLS.read(fs.readFileSync(dir+'formula_stress_test.xls', 'binary'), {type:'binary'});
});
it('should read base64 strings', function() {
XLS.read(fs.readFileSync(dir+'comments_stress_test.xls', 'base64'), {type: 'base64'});
});
});

describe('invalid files', function() {
it('should fail on passwords', function() {
assert.throws(function() { XLS.readFile(dir + 'apachepoi_password.xls'); });
});
it('should read binary strings', function() {
XLS.read(fs.readFileSync(dir + 'formula_stress_test.xls', 'binary'), {type:'binary'});
it('should fail on XLSX files', function() {
assert.throws(function() { XLS.readFile(dir + 'roo_type_excelx.xls'); });
});
it('should fail on ODS files', function() {
assert.throws(function() { XLS.readFile(dir + 'roo_type_openoffice.xls');});
});
it('should fail on DOC files', function() {
assert.throws(function() { XLS.readFile(dir + 'word_doc.doc');});
});
});
2 changes: 2 additions & 0 deletions tests.lst
Expand Up @@ -309,6 +309,7 @@ apachepoi_unicodeNameRecord.xls
apachepoi_xor-encryption-abc.xls.pending
apachepoi_yearfracExamples.xls
comments_stress_test.xls
custom_properties.xls
formula_stress_test.xls
jxls-core_array.xls
jxls-core_beandata.xls
Expand Down Expand Up @@ -460,3 +461,4 @@ xlrd_picture_in_cell.xls
xlrd_profiles.xls
xlrd_ragged.xls
xlrd_xf_class.xls
xlsx-stream-d-date-cell.xls

0 comments on commit ebeadff

Please sign in to comment.