Skip to content

Commit

Permalink
version bump 0.5.0: changes to cell interface
Browse files Browse the repository at this point in the history
Cell object stores the formatted text in the `.w` field
- Raw format and types are preserved in the `.t`, `.v` fields
- Accessors have been updated to use the field

updates for older OOXML:
- ECMA-376 makes reference to old sstItem shared string item
- [MS-XLSX] xmlns for workbook
  • Loading branch information
SheetJSDev committed Feb 4, 2014
1 parent 5c4bf62 commit a96d896
Show file tree
Hide file tree
Showing 11 changed files with 72 additions and 39 deletions.
33 changes: 26 additions & 7 deletions README.md
@@ -1,6 +1,6 @@
# xlsx

Currently a parser for XLSX/XLSM/XLSB files. Cleanroom implementation from the
Currently a parser for XLSX/XLSM/XLSB files. Cleanroom implementation from the
ISO 29500 Office Open XML specifications, [MS-XLSB], and related documents.

## Installation
Expand All @@ -19,7 +19,7 @@ In the browser:

The node version installs a binary `xlsx2csv` which can read XLSX/XLSM/XLSB files and output the contents in various formats. The source is available at `xlsx2csv.njs` in the bin directory.

See <http://oss.sheetjs.com/js-xlsx/> for a browser example.
See <http://oss.sheetjs.com/js-xlsx/> for a browser example.

Note that older versions of IE does not support HTML5 File API, so the base64 mode is provided for testing. On OSX you can get the base64 encoding by running:

Expand All @@ -39,18 +39,24 @@ Simple usage (walks through every cell of every sheet and dumps the values):

Some helper functions in `XLSX.utils` generate different views of the sheets:

- `XLSX.utils.sheet_to_csv` generates CSV
- `XLSX.utils.sheet_to_csv` generates CSV
- `XLSX.utils.sheet_to_row_object_array` interprets sheets as tables with a header column and generates an array of objects
- `XLSX.utils.get_formulae` generates a list of formulae

## Notes
## Notes

`.SheetNames` is an ordered list of the sheets in the workbook

`.Sheets[sheetname]` returns a data structure representing the sheet. Each key
that does not start with `!` corresponds to a cell (using `A-1` notation).
that does not start with `!` corresponds to a cell (using `A-1` notation).

`.Sheets[sheetname][address].v` returns the value of the specified cell and `.Sheets[sheetname][address].t` returns the type of the cell (constrained to the enumeration `ST_CellType` as documented in page 4215 of ISO/IEC 29500-1:2012(E) )
`.Sheets[sheetname][address]` returns the specified cell:

- `.v` returns the raw value of the cell
- `.w` returns the formatted text of the cell
- `.t` returns the type of the cell (constrained to the enumeration `ST_CellType` as documented in page 4215 of ISO/IEC 29500-1:2012(E) )

For dates, `.v` holds the raw date code from the sheet and `.w` holds the text

For more details:

Expand All @@ -69,12 +75,25 @@ Tests utilize the mocha testing framework. Travis-CI and Sauce Labs links:

- <https://travis-ci.org/SheetJS/js-xlsx> for XLSX module in node
- <https://travis-ci.org/SheetJS/SheetJS.github.io> for XLS* modules
- <https://saucelabs.com/u/sheetjs> for XLS* modules using Sauce Labs
- <https://saucelabs.com/u/sheetjs> for XLS* modules using Sauce Labs

## Test Files

Test files are housed in [another repo](https://github.com/SheetJS/test_files).

## Testing

`make test` will run the node-based tests. To run the in-browser tests, clone
[the oss.sheetjs.com repo](https://github.com/SheetJS/SheetJS.github.io) and
replace the xlsx.js file (then fire up the browser and go to `stress.html`):

```
$ cp xlsx.js ../SheetJS.github.io
$ cd ../SheetJS.github.io
$ simplehttpserver # or "python -mSimpleHTTPServer" or "serve"
$ open -a Chromium.app http://localhost:8000/stress.html
```

## XLS Support

XLS is available in [js-xls](https://github.com/SheetJS/js-xls).
Expand Down
2 changes: 1 addition & 1 deletion bits/31_version.js
@@ -1 +1 @@
XLSX.version = '0.4.3';
XLSX.version = '0.5.0';
2 changes: 1 addition & 1 deletion bits/52_sstxml.js
Expand Up @@ -133,7 +133,7 @@ var parse_sst_xml = function(data) {
/* 18.4.9 sst CT_Sst */
var sst = data.match(new RegExp("<sst([^>]*)>([\\s\\S]*)<\/sst>","m"));
if(isval(sst)) {
s = sst[2].replace(/<si>/g,"").split(/<\/si>/).map(parse_si).filter(function(x) { return x; });
s = sst[2].replace(/<(?:si|sstItem)>/g,"").split(/<\/(?:si|sstItem)>/).map(parse_si).filter(function(x) { return x; });
sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount;
}
return s;
Expand Down
7 changes: 1 addition & 6 deletions bits/72_wsxml.js
Expand Up @@ -70,12 +70,7 @@ function parse_worksheet(data) {
var cf = styles.CellXf[cell.s];
if(cf && cf.numFmtId) fmtid = cf.numFmtId;
}
p.raw = p.v;
p.rawt = p.t;
try {
p.v = SSF.format(fmtid,p.v,_ssfopts);
p.t = 'str';
} catch(e) { p.v = p.raw; p.t = p.rawt; }
try { p.w = SSF.format(fmtid,p.v,_ssfopts); } catch(e) { }

s[cell.r] = p;
});
Expand Down
8 changes: 6 additions & 2 deletions bits/77_wbxml.js
@@ -1,4 +1,8 @@
var XMLNS_WB = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
var XMLNS_WB = [
'http://schemas.openxmlformats.org/spreadsheetml/2006/main',
'http://schemas.microsoft.com/office/excel/2006/main',
'http://schemas.microsoft.com/office/excel/2006/2'
];

/* 18.2 Workbook */
function parse_workbook(data) {
Expand Down Expand Up @@ -102,7 +106,7 @@ function parse_workbook(data) {
case '</mc:AlternateContent>': pass=false; break;
}
});
if(wb.xmlns !== XMLNS_WB) throw new Error("Unknown Namespace: " + wb.xmlns);
if(XMLNS_WB.indexOf(wb.xmlns) === -1) throw new Error("Unknown Namespace: " + wb.xmlns);

var z;
/* defaults */
Expand Down
7 changes: 5 additions & 2 deletions bits/85_parsezip.js
Expand Up @@ -16,9 +16,12 @@ function parseZip(zip) {
if(dir.style) styles = parse_sty(getdata(getzipfile(zip, dir.style.replace(/^\//,''))),dir.style);

var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0]);
var propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
var props = {}, propdata = "";
try {
propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : "";
var props = propdata !== "" ? parseProps(propdata) : {};
props = propdata !== "" ? parseProps(propdata) : {};
} catch(e) { }
var deps = {};
if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,''))));
var sheets = {}, i=0;
Expand Down
5 changes: 3 additions & 2 deletions bits/90_utils.js
Expand Up @@ -20,7 +20,8 @@ function sheet_to_row_object_array(sheet, opts){
for(R=r.s.r, C = r.s.c; C <= r.e.c; ++C) {
val = sheet[encode_cell({c:C,r:R})];
if(!val) continue;
switch(val.t) {
if(val.w) hdr[C] = val.w;
else switch(val.t) {
case 's': case 'str': hdr[C] = val.v; break;
case 'n': hdr[C] = val.v; break;
}
Expand All @@ -33,7 +34,7 @@ function sheet_to_row_object_array(sheet, opts){
for (C = r.s.c; C <= r.e.c; ++C) {
val = sheet[encode_cell({c: C,r: R})];
if(!val || !val.t) continue;
if(typeof val.w !== 'undefined') { row[hdr[C]] = val.w; isempty = false; }
if(typeof val.w !== 'undefined' && !opts.raw) { row[hdr[C]] = val.w; isempty = false; }
else switch(val.t){
case 's': case 'str': case 'b': case 'n':
if(val.v !== undefined) {
Expand Down
2 changes: 1 addition & 1 deletion package.json
@@ -1,6 +1,6 @@
{
"name": "xlsx",
"version": "0.4.3",
"version": "0.5.0",
"author": "sheetjs",
"description": "XLSB / XLSX / XLSM parser",
"keywords": [ "xlsx", "xlsb", "xlsm", "office", "excel", "spreadsheet" ],
Expand Down
12 changes: 10 additions & 2 deletions tests.lst
Expand Up @@ -51,6 +51,7 @@ apachepoi_52348.xlsx
apachepoi_52716.xlsx
apachepoi_53101.xlsx
apachepoi_53282.xlsx
apachepoi_53282b.xlsx
apachepoi_53568.xlsx
apachepoi_53734.xlsx
apachepoi_53798.xlsx
Expand All @@ -66,6 +67,12 @@ apachepoi_54607.xlsx
apachepoi_55640.xlsx
apachepoi_55745.xlsx
apachepoi_55850.xlsx
apachepoi_55923.xlsx
apachepoi_55924.xlsx
apachepoi_55926.xlsx
apachepoi_55927.xlsx
apachepoi_56011.xlsx
apachepoi_56017.xlsx
apachepoi_AverageTaxRates.xlsx
apachepoi_Booleans.xlsx
apachepoi_BrNotClosed.xlsx
Expand Down Expand Up @@ -111,9 +118,10 @@ apachepoi_WithVariousData.xlsx
apachepoi_atp.xlsx
apachepoi_chart_sheet.xlsx.pending
apachepoi_comments.xlsx
apachepoi_headerFooterTest.xlsx
apachepoi_picture.xlsx
apachepoi_reordered_sheets.xlsx
apachepoi_sample-beta.xlsx.pending
apachepoi_sample-beta.xlsx
apachepoi_sample.xlsx
apachepoi_shared_formulas.xlsx
apachepoi_sheetProtection_allLocked.xlsx
Expand Down Expand Up @@ -154,7 +162,7 @@ openpyxl_g_empty-with-styles.xlsx
openpyxl_g_empty.xlsx
openpyxl_g_empty_libre.xlsx
openpyxl_g_empty_no_dimensions.xlsx
openpyxl_g_empty_with_no_properties.xlsx.pending
openpyxl_g_empty_with_no_properties.xlsx
openpyxl_g_guess_types.xlsx
openpyxl_g_libreoffice_nrt.xlsx
openpyxl_g_merge_range.xlsx
Expand Down
2 changes: 1 addition & 1 deletion tests/files
Submodule files updated 97 files
+1 −0 2011/LONumbers-2010.xls.sheetnames
+1 −0 2011/LONumbers-2010.xlsx.sheetnames
+1 −0 2011/LONumbers-2011.xls.sheetnames
+1 −0 2011/LONumbers-2011.xlsx.sheetnames
+1 −0 2011/LONumbers.xls.sheetnames
+1 −0 2011/LONumbers.xlsx.sheetnames
+3 −0 2011/apachepoi_51585.xlsx.sheetnames
+3 −0 2011/apachepoi_53282b.xlsx.sheetnames
+1 −0 2011/apachepoi_56011.xlsx.sheetnames
+3 −0 2011/apachepoi_56017.xlsx.sheetnames
+1 −0 2011/apachepoi_headerFooterTest.xlsx.sheetnames
+3 −0 2011/apachepoi_sample-beta.xlsx.sheetnames
+0 −0 2011/apachepoi_xor-encryption-abc.xls.sheetnames
+5 −0 2011/comments_stress_test.xls.sheetnames
+5 −0 2011/comments_stress_test.xlsx.sheetnames
+1 −0 2011/excel-reader-xlsx_error03.xlsx.sheetnames
+2 −0 2011/foobar.xls.sheetnames
+1 −0 2011/merge_cells.xls.sheetnames
+1 −0 2011/merge_cells.xlsx.sheetnames
+1 −0 2011/number_format.xls.sheetnames
+4 −0 2011/openpyxl_g_empty_with_no_properties.xlsx.sheetnames
+1 −0 2011/rich_text_stress.xls.sheetnames
+1 −0 2011/rich_text_stress.xlsx.sheetnames
+3 −0 2011/roo_1900_base.xls.sheetnames
+1 −0 2011/roo_1900_base.xlsx.sheetnames
+1 −0 2011/roo_1904_base.xls.sheetnames
+1 −0 2011/roo_1904_base.xlsx.sheetnames
+1 −0 2011/roo_Bibelbund.xls.sheetnames
+1 −0 2011/roo_Bibelbund.xlsx.sheetnames
+3 −0 2011/roo_Pfand_from_windows_phone.xlsx.sheetnames
+1 −0 2011/roo_bad_excel_date.xls.sheetnames
+3 −0 2011/roo_bbu.xls.sheetnames
+3 −0 2011/roo_bbu.xlsx.sheetnames
+3 −0 2011/roo_boolean.xls.sheetnames
+3 −0 2011/roo_boolean.xlsx.sheetnames
+3 −0 2011/roo_borders.xls.sheetnames
+3 −0 2011/roo_borders.xlsx.sheetnames
+1 −0 2011/roo_bug-numbered-sheet-names.xlsx.sheetnames
+3 −0 2011/roo_bug-row-column-fixnum-float.xls.sheetnames
+3 −0 2011/roo_comments.xls.sheetnames
+3 −0 2011/roo_comments.xlsx.sheetnames
+3 −0 2011/roo_datetime.xls.sheetnames
+3 −0 2011/roo_datetime.xlsx.sheetnames
+3 −0 2011/roo_datetime_floatconv.xls.sheetnames
+3 −0 2011/roo_emptysheets.xls.sheetnames
+3 −0 2011/roo_emptysheets.xlsx.sheetnames
+3 −0 2011/roo_false_encoding.xls.sheetnames
+2 −0 2011/roo_file_item_error.xlsx.sheetnames
+3 −0 2011/roo_formula.xls.sheetnames
+3 −0 2011/roo_formula.xlsx.sheetnames
+1 −0 2011/roo_formula_parse_error.xls.sheetnames
+3 −0 2011/roo_formula_string_error.xlsx.sheetnames
+1 −0 2011/roo_link.xls.sheetnames
+1 −0 2011/roo_link.xlsx.sheetnames
+3 −0 2011/roo_matrix.xls.sheetnames
+3 −0 2011/roo_named_cells.xls.sheetnames
+3 −0 2011/roo_named_cells.xlsx.sheetnames
+5 −0 2011/roo_numbers1.xls.sheetnames
+5 −0 2011/roo_numbers1.xlsx.sheetnames
+1 −0 2011/roo_numeric-link.xlsx.sheetnames
+1 −0 2011/roo_only_one_sheet.xls.sheetnames
+1 −0 2011/roo_only_one_sheet.xlsx.sheetnames
+3 −0 2011/roo_paragraph.xls.sheetnames
+3 −0 2011/roo_paragraph.xlsx.sheetnames
+1 −0 2011/roo_prova.xls.sheetnames
+3 −0 2011/roo_simple_spreadsheet.xls.sheetnames
+3 −0 2011/roo_simple_spreadsheet.xlsx.sheetnames
+3 −0 2011/roo_simple_spreadsheet_from_italo.xls.sheetnames
+3 −0 2011/roo_style.xls.sheetnames
+3 −0 2011/roo_style.xlsx.sheetnames
+3 −0 2011/roo_time-test.xls.sheetnames
+3 −0 2011/roo_time-test.xlsx.sheetnames
+0 −0 2011/roo_type_excel.xlsx.sheetnames
+3 −0 2011/roo_type_excelx.xls.sheetnames
+0 −0 2011/roo_type_openoffice.xls.sheetnames
+0 −0 2011/roo_type_openoffice.xlsx.sheetnames
+3 −0 2011/roo_whitespace.xls.sheetnames
+3 −0 2011/roo_whitespace.xlsx.sheetnames
+1 −0 2011/spreadsheet-parsexlsx_Test.xlsx.sheetnames
+4 −0 2011/spreadsheet-parsexlsx_bug-10.xlsx.sheetnames
+1 −0 2011/spreadsheet-parsexlsx_bug-11.xlsx.sheetnames
+1 −0 2011/spreadsheet-parsexlsx_bug-12.xlsx.sheetnames
+3 −0 2011/spreadsheet-parsexlsx_bug-13.xlsx.sheetnames
+1 −0 2011/spreadsheet-parsexlsx_bug-14.xlsx.sheetnames
+4 −0 2011/spreadsheet-parsexlsx_bug-15.xlsx.sheetnames
+3 −0 2011/spreadsheet-parsexlsx_bug-2.xlsx.sheetnames
+1 −0 2011/spreadsheet-parsexlsx_bug-3.xlsx.sheetnames
+3 −0 2011/spreadsheet-parsexlsx_bug-4.xlsx.sheetnames
+1 −0 2011/spreadsheet-parsexlsx_bug-5.xlsx.sheetnames
+9 −0 2011/spreadsheet-parsexlsx_bug-6-2.xlsx.sheetnames
+8 −0 2011/spreadsheet-parsexlsx_bug-6.xlsx.sheetnames
+3 −0 2011/spreadsheet-parsexlsx_bug-7.xlsx.sheetnames
+3 −0 2011/spreadsheet-parsexlsx_bug-8.xlsx.sheetnames
+4 −0 2011/xlrd_merged_cells.xlsx.sheetnames
+4 −0 2011/xlrd_test_comments_excel.xlsx.sheetnames
+1 −0 2011/xlrd_test_comments_gdocs.xlsx.sheetnames
+13 −3 test.sh
31 changes: 17 additions & 14 deletions xlsx.js
Expand Up @@ -420,7 +420,7 @@ SSF.load_table = function(tbl) { for(var i=0; i!=0x0188; ++i) if(tbl[i]) SSF.loa
make_ssf(SSF);
var XLSX = {};
(function(XLSX){
XLSX.version = '0.4.3';
XLSX.version = '0.5.0';
var current_codepage, current_cptable, cptable;
if(typeof module !== "undefined" && typeof require !== 'undefined') {
if(typeof cptable === 'undefined') cptable = require('codepage');
Expand Down Expand Up @@ -932,7 +932,7 @@ var parse_sst_xml = function(data) {
/* 18.4.9 sst CT_Sst */
var sst = data.match(new RegExp("<sst([^>]*)>([\\s\\S]*)<\/sst>","m"));
if(isval(sst)) {
s = sst[2].replace(/<si>/g,"").split(/<\/si>/).map(parse_si).filter(function(x) { return x; });
s = sst[2].replace(/<(?:si|sstItem)>/g,"").split(/<\/(?:si|sstItem)>/).map(parse_si).filter(function(x) { return x; });
sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount;
}
return s;
Expand Down Expand Up @@ -1385,12 +1385,7 @@ function parse_worksheet(data) {
var cf = styles.CellXf[cell.s];
if(cf && cf.numFmtId) fmtid = cf.numFmtId;
}
p.raw = p.v;
p.rawt = p.t;
try {
p.v = SSF.format(fmtid,p.v,_ssfopts);
p.t = 'str';
} catch(e) { p.v = p.raw; p.t = p.rawt; }
try { p.w = SSF.format(fmtid,p.v,_ssfopts); } catch(e) { }

s[cell.r] = p;
});
Expand Down Expand Up @@ -1635,7 +1630,11 @@ var CustomWBViewDef = {
xWindow: '0',
yWindow: '0'
};
var XMLNS_WB = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
var XMLNS_WB = [
'http://schemas.openxmlformats.org/spreadsheetml/2006/main',
'http://schemas.microsoft.com/office/excel/2006/main',
'http://schemas.microsoft.com/office/excel/2006/2'
];

/* 18.2 Workbook */
function parse_workbook(data) {
Expand Down Expand Up @@ -1739,7 +1738,7 @@ function parse_workbook(data) {
case '</mc:AlternateContent>': pass=false; break;
}
});
if(wb.xmlns !== XMLNS_WB) throw new Error("Unknown Namespace: " + wb.xmlns);
if(XMLNS_WB.indexOf(wb.xmlns) === -1) throw new Error("Unknown Namespace: " + wb.xmlns);

var z;
/* defaults */
Expand Down Expand Up @@ -2656,9 +2655,12 @@ function parseZip(zip) {
if(dir.style) styles = parse_sty(getdata(getzipfile(zip, dir.style.replace(/^\//,''))),dir.style);

var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0]);
var propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
var props = {}, propdata = "";
try {
propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : "";
var props = propdata !== "" ? parseProps(propdata) : {};
props = propdata !== "" ? parseProps(propdata) : {};
} catch(e) { }
var deps = {};
if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,''))));
var sheets = {}, i=0;
Expand Down Expand Up @@ -2755,7 +2757,8 @@ function sheet_to_row_object_array(sheet, opts){
for(R=r.s.r, C = r.s.c; C <= r.e.c; ++C) {
val = sheet[encode_cell({c:C,r:R})];
if(!val) continue;
switch(val.t) {
if(val.w) hdr[C] = val.w;
else switch(val.t) {
case 's': case 'str': hdr[C] = val.v; break;
case 'n': hdr[C] = val.v; break;
}
Expand All @@ -2768,7 +2771,7 @@ function sheet_to_row_object_array(sheet, opts){
for (C = r.s.c; C <= r.e.c; ++C) {
val = sheet[encode_cell({c: C,r: R})];
if(!val || !val.t) continue;
if(typeof val.w !== 'undefined') { row[hdr[C]] = val.w; isempty = false; }
if(typeof val.w !== 'undefined' && !opts.raw) { row[hdr[C]] = val.w; isempty = false; }
else switch(val.t){
case 's': case 'str': case 'b': case 'n':
if(val.v !== undefined) {
Expand Down

0 comments on commit a96d896

Please sign in to comment.