Skip to content

Commit

Permalink
Strip quoted delims in file type magic 0x3C
Browse files Browse the repository at this point in the history
  • Loading branch information
ZaheerUdDeen committed Dec 5, 2021
1 parent fcf9182 commit 30c03a2
Show file tree
Hide file tree
Showing 11 changed files with 67 additions and 10 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Expand Up @@ -15,14 +15,15 @@ tmp
*.[pP][dD][fF]
*.[sS][lL][kK]
*.socialcalc
*.[xX][lL][sSwWcCaAtTmM]
*.[xX][lL][sSwWcCaAtTmMrR]
*.[xX][lL][sSaAtT][xXmMbB]
*.[oO][dD][sS]
*.[fF][oO][dD][sS]
*.[xX][mM][lL]
*.[uU][oO][sS]
*.[wW][kKqQbB][S1234567890]
*.[qQ][pP][wW]
*.[fF][mM][3tT]
*.[bB][iI][fF][fF][23458]
*.[rR][tT][fF]
*.[eE][tT][hH]
Expand Down
9 changes: 9 additions & 0 deletions .spelling
Expand Up @@ -59,6 +59,15 @@ webpack
weex

# Other terms
1.x
2.x
3.x
4.x
5.x
6.x
7.x
8.x
9.x
ActiveX
APIs
ArrayBuffer
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Expand Up @@ -73,7 +73,7 @@ DISTHDR=misc/suppress_export.js
.PHONY: dist
dist: dist-deps $(TARGET) bower.json ## Prepare JS files for distribution
mkdir -p dist
<$(TARGET) sed "s/require('stream')/{}/g;s/require('....*')/undefined/g" > dist/$(TARGET)
<$(TARGET) sed "s/require('....*')/undefined/g" > dist/$(TARGET)
cp LICENSE dist/
uglifyjs shim.js $(UGLIFYOPTS) -o dist/shim.min.js --preamble "$$(head -n 1 bits/00_header.js)"
uglifyjs $(DISTHDR) dist/$(TARGET) $(UGLIFYOPTS) -o dist/$(LIB).min.js --source-map dist/$(LIB).min.map --preamble "$$(head -n 1 bits/00_header.js)"
Expand Down
2 changes: 1 addition & 1 deletion bin/xlsx.njs
Expand Up @@ -5,7 +5,7 @@
var n = "xlsx";
var X = require('../');
try { X = require('../xlsx.flow'); } catch(e) {}
require('exit-on-epipe');
try { require('exit-on-epipe'); } catch(e) {}
var fs = require('fs'), program;
try { program = require('commander'); } catch(e) {
[
Expand Down
13 changes: 11 additions & 2 deletions bits/41_lotus.js
Expand Up @@ -34,6 +34,12 @@ var WK_ = /*#__PURE__*/ (function() {
var refguess = {s: {r:0, c:0}, e: {r:0, c:0} };
var sheetRows = o.sheetRows || 0;

if(d[2] == 0x00) {
if(d[3] == 0x08 || d[3] == 0x09) {
if(d.length >= 16 && d[14] == 0x05 && d[15] === 0x6c) throw new Error("Unsupported Works 3 for Mac file");
}
}

if(d[2] == 0x02) {
o.Enum = WK1Enum;
lotushopper(d, function(val, R, RT) { switch(RT) {
Expand Down Expand Up @@ -126,7 +132,8 @@ var WK_ = /*#__PURE__*/ (function() {

write_biff_rec(ba, 0x00, write_BOF_WK1(0x0406));
write_biff_rec(ba, 0x06, write_RANGE(range));
for(var R = range.s.r; R <= range.e.r; ++R) {
var max_R = Math.min(range.e.r, 8191);
for(var R = range.s.r; R <= max_R; ++R) {
var rr = encode_row(R);
for(var C = range.s.c; C <= range.e.c; ++C) {
if(R === range.s.r) cols[C] = encode_col(C);
Expand Down Expand Up @@ -165,7 +172,8 @@ var WK_ = /*#__PURE__*/ (function() {
var range = safe_decode_range(ws["!ref"]);
var dense = Array.isArray(ws);
var cols = [];
for(var R = range.s.r; R <= range.e.r; ++R) {
var max_R = Math.min(range.e.r, 8191);
for(var R = range.s.r; R <= max_R; ++R) {
var rr = encode_row(R);
for(var C = range.s.c; C <= range.e.c; ++C) {
if(R === range.s.r) cols[C] = encode_col(C);
Expand Down Expand Up @@ -211,6 +219,7 @@ var WK_ = /*#__PURE__*/ (function() {
if(rows < range.e.r) rows = range.e.r;
if(cols < range.e.c) cols = range.e.c;
}
if(rows > 8191) rows = 8191;
out.write_shift(2, rows);
out.write_shift(1, wscnt);
out.write_shift(1, cols);
Expand Down
1 change: 1 addition & 0 deletions bits/75_xlml.js
Expand Up @@ -183,6 +183,7 @@ function parse_xlml_xml(d, _opts)/*:Workbook*/ {
else str = utf8read(str);
}
var opening = str.slice(0, 1024).toLowerCase(), ishtml = false;
opening = opening.replace(/".*?"/g, "");
if((opening.indexOf(">") & 1023) > Math.min((opening.indexOf(",") & 1023), (opening.indexOf(";")&1023))) { var _o = dup(opts); _o.type = "string"; return PRN.to_workbook(str, _o); }
if(opening.indexOf("<?xml") == -1) ["html", "table", "head", "meta", "script", "style", "div"].forEach(function(tag) { if(opening.indexOf("<" + tag) >= 0) ishtml = true; });
if(ishtml) return HTML_.to_workbook(str, opts);
Expand Down
2 changes: 2 additions & 0 deletions bits/76_xls.js
Expand Up @@ -950,6 +950,8 @@ else/*:: if(cfb instanceof CFBContainer) */ {
else if((_data=CFB.find(cfb, 'PerfectOffice_MAIN')) && _data.content) WorkbookP = WK_.to_workbook(_data.content, (options.type = T, options));
/* Quattro Pro 9 */
else if((_data=CFB.find(cfb, 'NativeContent_MAIN')) && _data.content) WorkbookP = WK_.to_workbook(_data.content, (options.type = T, options));
/* Works 4 for Mac */
else if((_data=CFB.find(cfb, 'MN0')) && _data.content) throw new Error("Unsupported Works 4 for Mac file");
else throw new Error("Cannot find Workbook stream");
if(options.bookVBA && cfb.FullPaths && CFB.find(cfb, '/_VBA_PROJECT_CUR/VBA/dir')) WorkbookP.vbaraw = make_vba_xls(cfb);
}
Expand Down
14 changes: 11 additions & 3 deletions bits/87_read.js
Expand Up @@ -91,12 +91,20 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
case 0x54: if(n[1] === 0x41 && n[2] === 0x42 && n[3] === 0x4C) return DIF.to_workbook(d, o); break;
case 0x50: return (n[1] === 0x4B && n[2] < 0x09 && n[3] < 0x09) ? read_zip(d, o) : read_prn(data, d, o, str);
case 0xEF: return n[3] === 0x3C ? parse_xlml(d, o) : read_prn(data, d, o, str);
case 0xFF: if(n[1] === 0xFE) { return read_utf16(d, o); } break;
case 0x00: if(n[1] === 0x00 && n[2] >= 0x02 && n[3] === 0x00) return WK_.to_workbook(d, o); break;
case 0xFF:
if(n[1] === 0xFE) { return read_utf16(d, o); }
else if(n[1] === 0x00 && n[2] === 0x02 && n[3] === 0x00) return WK_.to_workbook(d, o);
break;
case 0x00:
if(n[1] === 0x00) {
if(n[2] >= 0x02 && n[3] === 0x00) return WK_.to_workbook(d, o);
if(n[2] === 0x00 && (n[3] === 0x08 || n[3] === 0x09)) return WK_.to_workbook(d, o);
}
break;
case 0x03: case 0x83: case 0x8B: case 0x8C: return DBF.to_workbook(d, o);
case 0x7B: if(n[1] === 0x5C && n[2] === 0x72 && n[3] === 0x74) return RTF.to_workbook(d, o); break;
case 0x0A: case 0x0D: case 0x20: return read_plaintext_raw(d, o);
case 0x89: if(n[1] === 0x50 && n[2] === 0x4E && n[3] === 0x47) throw new Error("PNG Image File is not a spreadsheet"); break;
case 0x89: if(n[1] === 0x50 && n[2] === 0x4E && n[3] === 0x47) throw new Error("PNG Image File is not a spreadsheet"); break;
}
if(DBF.versions.indexOf(n[0]) > -1 && n[2] <= 12 && n[3] <= 31) return DBF.to_workbook(d, o);
return read_prn(data, d, o, str);
Expand Down
2 changes: 1 addition & 1 deletion bits/99_footer.js
Expand Up @@ -6,6 +6,6 @@ else if(typeof module !== 'undefined' && module.exports) make_xlsx_lib(module.ex
else if(typeof define === 'function' && define.amd) define('xlsx', function() { if(!XLSX.version) make_xlsx_lib(XLSX); return XLSX; });
else make_xlsx_lib(XLSX);
/* NOTE: the following extra line is needed for "Lightning Locker Service" */
if(typeof window !== 'undefined' && !window.XLSX) window.XLSX = XLSX;
if(typeof window !== 'undefined' && !window.XLSX) try { window.XLSX = XLSX; } catch(e) {}
/*exported XLS, ODS */
var XLS = XLSX, ODS = XLSX;
4 changes: 3 additions & 1 deletion docbits/80_parseopts.md
Expand Up @@ -85,7 +85,7 @@ file but Excel will know how to handle it. This library applies similar logic:

| Byte 0 | Raw File Type | Spreadsheet Types |
|:-------|:--------------|:----------------------------------------------------|
| `0xD0` | CFB Container | BIFF 5/8 or password-protected XLSX/XLSB or WQ3/QPW |
| `0xD0` | CFB Container | BIFF 5/8 or protected XLSX/XLSB or WQ3/QPW or XLR |
| `0x09` | BIFF Stream | BIFF 2/3/4/5 |
| `0x3C` | XML/HTML | SpreadsheetML / Flat ODS / UOS1 / HTML / plain text |
| `0x50` | ZIP Archive | XLSB or XLSX/M or ODS or UOS2 or plain text |
Expand All @@ -102,6 +102,8 @@ file but Excel will know how to handle it. This library applies similar logic:
DBF files are detected based on the first byte as well as the third and fourth
bytes (corresponding to month and day of the file date)

Works for Windows files are detected based on the BOF record with type `0xFF`

Plain text format guessing follows the priority order:

| Format | Test |
Expand Down
25 changes: 25 additions & 0 deletions docbits/85_filetype.md
Expand Up @@ -27,6 +27,8 @@ Despite the library name `xlsx`, it supports numerous spreadsheet file formats:
| Lotus 1-2-3 (WK1/WK3) |||
| Lotus 1-2-3 (WKS/WK2/WK4/123) || |
| Quattro Pro Spreadsheet (WQ1/WQ2/WB1/WB2/WB3/QPW) || |
| Works 1.x-3.x DOS / 2.x-5.x Windows Spreadsheet (WKS) || |
| Works 6.x-9.x Spreadsheet (XLR) || |
| **Other Common Spreadsheet Output Formats** |:-----:|:-----:|
| HTML Tables |||
| Rich Text Format tables (RTF) | ||
Expand All @@ -44,6 +46,8 @@ range limits will be silently truncated:
| Excel 4.0 (XLS BIFF4) | IV16384 | 256 | 16384 |
| Excel 3.0 (XLS BIFF3) | IV16384 | 256 | 16384 |
| Excel 2.0/2.1 (XLS BIFF2) | IV16384 | 256 | 16384 |
| Lotus 1-2-3 R2-R5 (WK1/WK3/WK4) | IV8192 | 256 | 8192 |
| Lotus 1-2-3 R1 (WKS) | IV2048 | 256 | 2048 |

Excel 2003 SpreadsheetML range limits are governed by the version of Excel and
are not enforced by the writer.
Expand Down Expand Up @@ -180,6 +184,27 @@ BIFF8 XLS.

</details>

#### Works for DOS / Windows Spreadsheet (WKS/XLR)

<details>
<summary>(click to show)</summary>

All versions of Works were limited to a single worksheet.

Works for DOS 1.x - 3.x and Works for Windows 2.x extends the Lotus WKS format
with additional record types.

Works for Windows 3.x - 5.x uses the same format and WKS extension. The BOF
record has type `FF`

Works for Windows 6.x - 9.x use the XLR format. XLR is nearly identical to
BIFF8 XLS: it uses the CFB container with a Workbook stream. Works 9 saves the
exact Workbook stream for the XLR and the 97-2003 XLS export. Works 6 XLS
includes two empty worksheets but the main worksheet has an identical encoding.
XLR also includes a `WksSSWorkBook` stream similar to Lotus FM3/FMT files.

</details>

#### OpenDocument Spreadsheet (ODS/FODS)

<details>
Expand Down

0 comments on commit 30c03a2

Please sign in to comment.