Permalink
Browse files

version bump 0.4.6: mo' tests, mo' problems

- different password record (1)
- updated CFB to version 0.6 (leveraging `find` to replace WORKBOOK patch)
- new tests for xls integrity
- EOF hack implemented properly
- nested files are skipped (have to do something real here at one point)
  • Loading branch information...
1 parent 1594129 commit b337d8c5eae9f1b28c3e1fec120c43713c8c0860 @SheetJSDev SheetJSDev committed Oct 30, 2013
Showing with 72 additions and 29 deletions.
  1. +4 −0 .jshintrc
  2. +1 −1 bits/08_blob.js
  3. +13 −1 bits/18_cfb.js
  4. +12 −10 bits/80_xls.js
  5. +2 −2 package.json
  6. +14 −3 test.js
  7. +26 −12 xls.js
View
@@ -0,0 +1,4 @@
+{
+ "bitwise": false,
+ "curly": false
+}
View
@@ -109,7 +109,7 @@ Array.prototype.utf16le = function(s,e) { var str = ""; for(var i=s; i<e; i+=2)
Array.prototype.utf8 = function(s,e) { var str = ""; for(var i=s; i<e; i++) str += String.fromCharCode(this.readUInt8(i)); return str; };
-Array.prototype.lpstr = function(i) { var len = this.readUInt32LE(i); return this.utf8(i+4,i+4+len-1);};
+Array.prototype.lpstr = function(i) { var len = this.readUInt32LE(i); return len > 0 ? this.utf8(i+4,i+4+len-1) : "";};
Array.prototype.lpwstr = function(i) { var len = 2*this.readUInt32LE(i); return this.utf8(i+4,i+4+len-1);};
function bconcat(bufs) { return (typeof Buffer !== 'undefined') ? Buffer.concat(bufs) : [].concat.apply([], bufs); }
View
@@ -222,6 +222,7 @@ function read_directory(idx) {
}
read_directory(dir_start);
+/* [MS-CFB] 2.6.4 Red-Black Tree */
function build_full_paths(Dir, pathobj, paths, patharr) {
var i;
var dad = new Array(patharr.length);
@@ -256,13 +257,24 @@ build_full_paths(FileIndex, FullPathDir, FullPaths, Paths);
var root_name = Paths.shift();
Paths.root = root_name;
+/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */
+function find_path(path) {
+ if(path[0] === "/") path = root_name + path;
+ var UCNames = (path.indexOf("/") !== -1 ? FullPaths : Paths).map(function(x) { return x.toUpperCase(); });
+ var UCPath = path.toUpperCase();
+ var w = UCNames.indexOf(UCPath);
+ if(w === -1) return null;
+ return path.indexOf("/") !== -1 ? FileIndex[w] : files[Paths[w]];
+}
+
var rval = {
raw: {header: header, sectors: sectors},
Paths: Paths,
FileIndex: FileIndex,
FullPaths: FullPaths,
FullPathDir: FullPathDir,
- Directory: files
+ Directory: files,
+ find: find_path
};
for(var name in files) {
View
@@ -13,7 +13,7 @@ function parse_compobj(obj) {
case 0x00000000: break;
case 0xffffffff: case 0xfffffffe: l+=4; break;
default:
- if(m > 0x190) throw "Unsupported Clipboard: " + m;
+ if(m > 0x190) throw "Unsupported Clipboard: " + m.toString(16);
l += m;
}
@@ -27,10 +27,8 @@ function parse_compobj(obj) {
function parse_xlscfb(cfb) {
var CompObj = cfb.Directory['!CompObj'];
var Summary = cfb.Directory['!SummaryInformation'];
-var Workbook = cfb.Directory.Workbook;
-if(!Workbook) Workbook = cfb.Directory.WORKBOOK;
-if(!Workbook) Workbook = cfb.Directory.Book;
-if(!Workbook) Workbook = cfb.Directory.BOOK;
+var Workbook = cfb.find('/Workbook');
+if(!Workbook) Workbook = cfb.find('/Book');
var CompObjP, SummaryP, WorkbookP;
@@ -90,14 +88,16 @@ function parse_workbook(blob) {
var sbc = 0, sbci = 0, sbcli = 0;
supbooks.SheetNames = opts.snames;
supbooks.sharedf = opts.sharedf;
+ var last_Rn = '';
+ var file_depth = 0; /* TODO: make a real stack */
while(blob.l < blob.length - 1) {
var s = blob.l;
var RecordType = read(2);
- if(RecordType === 0) break; /* TODO: can padding occur before EOF ? */
- /* In an effort to save two bytes, implied zero length for EOF */
+ if(RecordType === 0 && last_Rn === 'EOF') break;
var length = (blob.l === blob.length ? 0 : read(2)), y;
var R = RecordEnum[RecordType];
if(R && R.f) {
+ last_Rn = R.n;
if(R.r === 2 || R.r == 12) {
var rt = read(2); length -= 2;
if(!opts.enc && rt !== RecordType) throw "rt mismatch";
@@ -111,7 +111,7 @@ function parse_workbook(blob) {
/* Workbook Options */
case 'Date1904': wb.opts.Date1904 = val; break;
case 'WriteProtect': wb.opts.WriteProtect = true; break;
- case 'FilePass': opts.enc = val; if(XLS.verbose >= 2) console.error(val); break;
+ case 'FilePass': opts.enc = val; if(XLS.verbose >= 2) console.error(val); throw new Error("Password protection unsupported"); break;
case 'WriteAccess': opts.lastuser = val; break;
case 'FileSharing': break; //TODO
case 'CodePage':
@@ -227,14 +227,15 @@ function parse_workbook(blob) {
case 'ExternSheet': supbooks[sbc] = supbooks[sbc].concat(val); sbci += val.length; break;
case 'Protect': out["!protect"] = val; break; /* for sheet or book */
- case 'Password': if(val !== 0) throw new Error("Password protection unsupported: " + val); break;
+ case 'Password': if(val !== 0 && XLS.verbose >= 2) console.error("Password verifier: " + val); break;
case 'Prot4Rev': case 'Prot4RevPass': break; /*TODO: Revision Control*/
case 'BoundSheet8': {
Directory[val.pos] = val;
opts.snames.push(val.name);
} break;
case 'EOF': {
+ if(--file_depth) break;
var nout = {};
if(range.e) {
out["!range"] = range;
@@ -248,6 +249,7 @@ function parse_workbook(blob) {
if(cur_sheet === "") Preamble = nout; else Sheets[cur_sheet] = nout;
} break;
case 'BOF': {
+ if(file_depth++) break;
out = {};
cur_sheet = (Directory[s] || {name:""}).name;
lst.push([R.n, s, val, Directory[s]]);
@@ -367,7 +369,7 @@ function parse_workbook(blob) {
case 'LineFormat': case 'AreaFormat':
case 'Chart': case 'Chart3d': case 'Chart3DBarShape': case 'ChartFormat': case 'ChartFrtInfo': break;
case 'PlotArea': case 'PlotGrowth': break;
- case 'SeriesList': break;
+ case 'SeriesList': case 'SerParent': case 'SerAuxTrend': break;
case 'DataFormat': case 'SerToCrt': case 'FontX': break;
case 'CatSerRange': case 'AxcExt': case 'SerFmt': break;
case 'ShtProps': break;
View
@@ -1,6 +1,6 @@
{
"name": "xlsjs",
- "version": "0.4.5",
+ "version": "0.4.6",
"author": "Niggler",
"description": "(one day) a full-featured XLS parser and writer. For now, primitive parser",
"keywords": [
@@ -11,7 +11,7 @@
},
"main": "./xls",
"dependencies": {
- "cfb":">=0.5.0",
+ "cfb":">=0.6.0",
"codepage":"",
"commander":""
},
View
17 test.js
@@ -1,26 +1,37 @@
/* vim: set ts=2: */
var XLS;
-var fs = require('fs');
+var fs = require('fs'), assert = require('assert');
describe('source', function() { it('should load', function() { XLS = require('./'); }); });
var files = fs.readdirSync('test_files').filter(function(x){return x.substr(-4)==".xls";});
function parsetest(x, wb) {
+ describe(x + ' should have all bits', function() {
+ var sname = './test_files/' + x + '.sheetnames';
+ it('should have all sheets', function() {
+ wb.SheetNames.forEach(function(y) { assert(wb.Sheets[y], 'bad sheet ' + y); });
+ });
+ it('should have the right sheet names', fs.existsSync(sname) ? function() {
+ var file = fs.readFileSync(sname, 'utf-8');
+ var names = wb.SheetNames.join("\n") + "\n";
+ assert.equal(file, names);
+ } : null);
+ });
describe(x + ' should generate correct output', function() {
wb.SheetNames.forEach(function(ws, i) {
var name = ('./test_files/' + x + '.' + i + '.csv');
it('#' + i + ' (' + ws + ')', fs.existsSync(name) ? function() {
var file = fs.readFileSync(name, 'utf-8');
var csv = XLS.utils.make_csv(wb.Sheets[ws]);
- if(file.replace(/"/g,"") != csv.replace(/"/g,"")) throw "CSV badness";
+ assert.equal(file.replace(/"/g,""), csv.replace(/"/g,""), "CSV badness");
} : null);
});
});
}
describe('should parse test files', function() {
files.forEach(function(x) {
- it('should parse ' + x, function() {
+ it(x, function() {
var wb = XLS.readFile('./test_files/' + x);
parsetest(x, wb);
});
View
38 xls.js
@@ -147,7 +147,7 @@ Array.prototype.utf16le = function(s,e) { var str = ""; for(var i=s; i<e; i+=2)
Array.prototype.utf8 = function(s,e) { var str = ""; for(var i=s; i<e; i++) str += String.fromCharCode(this.readUInt8(i)); return str; };
-Array.prototype.lpstr = function(i) { var len = this.readUInt32LE(i); return this.utf8(i+4,i+4+len-1);};
+Array.prototype.lpstr = function(i) { var len = this.readUInt32LE(i); return len > 0 ? this.utf8(i+4,i+4+len-1) : "";};
Array.prototype.lpwstr = function(i) { var len = 2*this.readUInt32LE(i); return this.utf8(i+4,i+4+len-1);};
function bconcat(bufs) { return (typeof Buffer !== 'undefined') ? Buffer.concat(bufs) : [].concat.apply([], bufs); }
@@ -1030,6 +1030,7 @@ function read_directory(idx) {
}
read_directory(dir_start);
+/* [MS-CFB] 2.6.4 Red-Black Tree */
function build_full_paths(Dir, pathobj, paths, patharr) {
var i;
var dad = new Array(patharr.length);
@@ -1064,13 +1065,24 @@ build_full_paths(FileIndex, FullPathDir, FullPaths, Paths);
var root_name = Paths.shift();
Paths.root = root_name;
+/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */
+function find_path(path) {
+ if(path[0] === "/") path = root_name + path;
+ var UCNames = (path.indexOf("/") !== -1 ? FullPaths : Paths).map(function(x) { return x.toUpperCase(); });
+ var UCPath = path.toUpperCase();
+ var w = UCNames.indexOf(UCPath);
+ if(w === -1) return null;
+ return path.indexOf("/") !== -1 ? FileIndex[w] : files[Paths[w]];
+}
+
var rval = {
raw: {header: header, sectors: sectors},
Paths: Paths,
FileIndex: FileIndex,
FullPaths: FullPaths,
FullPathDir: FullPathDir,
- Directory: files
+ Directory: files,
+ find: find_path
};
for(var name in files) {
@@ -4222,7 +4234,7 @@ function parse_compobj(obj) {
case 0x00000000: break;
case 0xffffffff: case 0xfffffffe: l+=4; break;
default:
- if(m > 0x190) throw "Unsupported Clipboard: " + m;
+ if(m > 0x190) throw "Unsupported Clipboard: " + m.toString(16);
l += m;
}
@@ -4236,10 +4248,8 @@ function parse_compobj(obj) {
function parse_xlscfb(cfb) {
var CompObj = cfb.Directory['!CompObj'];
var Summary = cfb.Directory['!SummaryInformation'];
-var Workbook = cfb.Directory.Workbook;
-if(!Workbook) Workbook = cfb.Directory.WORKBOOK;
-if(!Workbook) Workbook = cfb.Directory.Book;
-if(!Workbook) Workbook = cfb.Directory.BOOK;
+var Workbook = cfb.find('/Workbook');
+if(!Workbook) Workbook = cfb.find('/Book');
var CompObjP, SummaryP, WorkbookP;
@@ -4299,14 +4309,16 @@ function parse_workbook(blob) {
var sbc = 0, sbci = 0, sbcli = 0;
supbooks.SheetNames = opts.snames;
supbooks.sharedf = opts.sharedf;
+ var last_Rn = '';
+ var file_depth = 0; /* TODO: make a real stack */
while(blob.l < blob.length - 1) {
var s = blob.l;
var RecordType = read(2);
- if(RecordType === 0) break; /* TODO: can padding occur before EOF ? */
- /* In an effort to save two bytes, implied zero length for EOF */
+ if(RecordType === 0 && last_Rn === 'EOF') break;
var length = (blob.l === blob.length ? 0 : read(2)), y;
var R = RecordEnum[RecordType];
if(R && R.f) {
+ last_Rn = R.n;
if(R.r === 2 || R.r == 12) {
var rt = read(2); length -= 2;
if(!opts.enc && rt !== RecordType) throw "rt mismatch";
@@ -4320,7 +4332,7 @@ function parse_workbook(blob) {
/* Workbook Options */
case 'Date1904': wb.opts.Date1904 = val; break;
case 'WriteProtect': wb.opts.WriteProtect = true; break;
- case 'FilePass': opts.enc = val; if(XLS.verbose >= 2) console.error(val); break;
+ case 'FilePass': opts.enc = val; if(XLS.verbose >= 2) console.error(val); throw new Error("Password protection unsupported"); break;
case 'WriteAccess': opts.lastuser = val; break;
case 'FileSharing': break; //TODO
case 'CodePage':
@@ -4436,14 +4448,15 @@ function parse_workbook(blob) {
case 'ExternSheet': supbooks[sbc] = supbooks[sbc].concat(val); sbci += val.length; break;
case 'Protect': out["!protect"] = val; break; /* for sheet or book */
- case 'Password': if(val !== 0) throw new Error("Password protection unsupported: " + val); break;
+ case 'Password': if(val !== 0 && XLS.verbose >= 2) console.error("Password verifier: " + val); break;
case 'Prot4Rev': case 'Prot4RevPass': break; /*TODO: Revision Control*/
case 'BoundSheet8': {
Directory[val.pos] = val;
opts.snames.push(val.name);
} break;
case 'EOF': {
+ if(--file_depth) break;
var nout = {};
if(range.e) {
out["!range"] = range;
@@ -4457,6 +4470,7 @@ function parse_workbook(blob) {
if(cur_sheet === "") Preamble = nout; else Sheets[cur_sheet] = nout;
} break;
case 'BOF': {
+ if(file_depth++) break;
out = {};
cur_sheet = (Directory[s] || {name:""}).name;
lst.push([R.n, s, val, Directory[s]]);
@@ -4576,7 +4590,7 @@ function parse_workbook(blob) {
case 'LineFormat': case 'AreaFormat':
case 'Chart': case 'Chart3d': case 'Chart3DBarShape': case 'ChartFormat': case 'ChartFrtInfo': break;
case 'PlotArea': case 'PlotGrowth': break;
- case 'SeriesList': break;
+ case 'SeriesList': case 'SerParent': case 'SerAuxTrend': break;
case 'DataFormat': case 'SerToCrt': case 'FontX': break;
case 'CatSerRange': case 'AxcExt': case 'SerFmt': break;
case 'ShtProps': break;

0 comments on commit b337d8c

Please sign in to comment.