diff --git a/src/84from.js b/src/84from.js index 3eb6dea3c1..2345a46df5 100755 --- a/src/84from.js +++ b/src/84from.js @@ -440,6 +440,71 @@ function XLSXLSX(X, filename, opts, cb, idx, query) { return text; } } + + function processSheet(workbook, sheetid, sheetOpt) { + var range; + var sheetRes = []; + if (typeof sheetOpt.range === 'undefined') { + range = workbook.Sheets[sheetid]['!ref']; + } else { + range = sheetOpt.range; + if (workbook.Sheets[sheetid][range]) { + range = workbook.Sheets[sheetid][range]; + } + } + // if range has some value then data is present in the current sheet + // else current sheet is empty + if (range) { + var rg = range.split(':'); + var col0 = rg[0].match(/[A-Z]+/)[0]; + var row0 = +rg[0].match(/[0-9]+/)[0]; + var col1 = rg[1].match(/[A-Z]+/)[0]; + var row1 = +rg[1].match(/[0-9]+/)[0]; + + var hh = {}; + var xlscnCol0 = alasql.utils.xlscn(col0); + var xlscnCol1 = alasql.utils.xlscn(col1); + for (var j = xlscnCol0; j <= xlscnCol1; j++) { + var col = alasql.utils.xlsnc(j); + if (sheetOpt.headers) { + if (workbook.Sheets[sheetid][col + '' + row0]) { + hh[col] = getHeaderText(workbook.Sheets[sheetid][col + '' + row0].v); + } else { + hh[col] = getHeaderText(col); + } + } else { + hh[col] = col; + } + } + if (sheetOpt.headers) { + row0++; + } + for (var i = row0; i <= row1; i++) { + var row = {}; + for (var j = xlscnCol0; j <= xlscnCol1; j++) { + var col = alasql.utils.xlsnc(j); + if (workbook.Sheets[sheetid][col + '' + i]) { + row[hh[col]] = workbook.Sheets[sheetid][col + '' + i].v; + } + } + sheetRes.push(row); + } + } else { + sheetRes.push([]); + } + + // Remove last empty line (issue #548) + if ( + sheetRes.length > 0 && + sheetRes[sheetRes.length - 1] && + Object.keys(sheetRes[sheetRes.length - 1]).length == 0 + ) { + sheetRes.pop(); + } + + return sheetRes; + } + filename = alasql.utils.autoExtFilename(filename, 'xls', opts); alasql.utils.loadBinaryFile( filename, @@ -460,71 +525,44 @@ function XLSXLSX(X, filename, opts, cb, idx, query) { ...opts, }); } - // console.log(workbook); - var sheetid; - if (typeof opt.sheetid === 'undefined') { - sheetid = workbook.SheetNames[0]; - } else if (typeof opt.sheetid === 'number') { - sheetid = workbook.SheetNames[opt.sheetid]; - } else { - sheetid = opt.sheetid; - } - var range; - var res = []; - if (typeof opt.range === 'undefined') { - range = workbook.Sheets[sheetid]['!ref']; - } else { - range = opt.range; - if (workbook.Sheets[sheetid][range]) { - range = workbook.Sheets[sheetid][range]; - } - } - // if range has some value then data is present in the current sheet - // else current sheet is empty - if (range) { - var rg = range.split(':'); - var col0 = rg[0].match(/[A-Z]+/)[0]; - var row0 = +rg[0].match(/[0-9]+/)[0]; - var col1 = rg[1].match(/[A-Z]+/)[0]; - var row1 = +rg[1].match(/[0-9]+/)[0]; - // console.log(114,rg,col0,col1,row0,row1); - // console.log(114,rg,alasql.utils.xlscn(col0),alasql.utils.xlscn(col1)); - - var hh = {}; - var xlscnCol0 = alasql.utils.xlscn(col0); - var xlscnCol1 = alasql.utils.xlscn(col1); - for (var j = xlscnCol0; j <= xlscnCol1; j++) { - var col = alasql.utils.xlsnc(j); - if (opt.headers) { - if (workbook.Sheets[sheetid][col + '' + row0]) { - hh[col] = getHeaderText(workbook.Sheets[sheetid][col + '' + row0].v); - } else { - hh[col] = getHeaderText(col); - } - } else { - hh[col] = col; - } - } - if (opt.headers) { - row0++; - } - for (var i = row0; i <= row1; i++) { - var row = {}; - for (var j = xlscnCol0; j <= xlscnCol1; j++) { - var col = alasql.utils.xlsnc(j); - if (workbook.Sheets[sheetid][col + '' + i]) { - row[hh[col]] = workbook.Sheets[sheetid][col + '' + i].v; + + // Check if we should process multiple sheets + var shouldProcessMultipleSheets = + opt.sheetid === '*' || (Array.isArray(opt.sheetid) && opt.sheetid.length > 0); + + if (shouldProcessMultipleSheets) { + // Process multiple sheets and combine into a single array + res = []; + var sheetsToProcess = opt.sheetid === '*' ? workbook.SheetNames : opt.sheetid; + + for (var s = 0; s < sheetsToProcess.length; s++) { + var currentSheetId = + opt.sheetid === '*' + ? sheetsToProcess[s] + : typeof sheetsToProcess[s] === 'number' + ? workbook.SheetNames[sheetsToProcess[s]] + : sheetsToProcess[s]; + + if (workbook.Sheets[currentSheetId]) { + var sheetData = processSheet(workbook, currentSheetId, opt); + // Add sheet name to each row + for (var r = 0; r < sheetData.length; r++) { + sheetData[r]._sheet = currentSheetId; } + res = res.concat(sheetData); } - res.push(row); } } else { - res.push([]); - } - - // Remove last empty line (issue #548) - if (res.length > 0 && res[res.length - 1] && Object.keys(res[res.length - 1]).length == 0) { - res.pop(); + // Process single sheet (original behavior) + var sheetid; + if (typeof opt.sheetid === 'undefined') { + sheetid = workbook.SheetNames[0]; + } else if (typeof opt.sheetid === 'number') { + sheetid = workbook.SheetNames[opt.sheetid]; + } else { + sheetid = opt.sheetid; + } + res = processSheet(workbook, sheetid, opt); } if (cb) { diff --git a/test/test848.js b/test/test848.js new file mode 100644 index 0000000000..24e5180b29 --- /dev/null +++ b/test/test848.js @@ -0,0 +1,136 @@ +if (typeof exports === 'object') { + var assert = require('assert'); + var alasql = require('..'); + var path = require('path'); + var dirname = path.normalize(__dirname) + '/'; +} else { + var dirname = './'; +} + +describe('Test 848 - Multi-sheet XLSX import', function () { + it('1. Import all sheets using sheetid: "*"', function (done) { + alasql( + 'select * from xlsx("' + dirname + 'test848.xlsx", {headers:true, sheetid:"*"})', + [], + function (data) { + // Should return a flat array with _sheet property on each row + assert(Array.isArray(data)); + assert(data.length > 0); + // Check that _sheet property exists + assert(data[0]._sheet); + // Should have rows from both sheets + var sheet1Rows = data.filter(function (row) { + return row._sheet === 'Sheet1'; + }); + var sheet2Rows = data.filter(function (row) { + return row._sheet === 'Sheet2'; + }); + assert(sheet1Rows.length === 3); + assert(sheet2Rows.length === 4); + done(); + } + ); + }); + + it('2. Import multiple specific sheets using sheetid array', function (done) { + alasql( + 'select * from xlsx("' + dirname + 'test848.xlsx", {headers:true, sheetid:?})', + [['Sheet1', 'Sheet2']], + function (data) { + // Should return a flat array with _sheet property + assert(Array.isArray(data)); + assert(data.length === 7); // 3 + 4 rows + assert(data[0]._sheet); + done(); + } + ); + }); + + it('3. Import specific sheets by index using sheetid array', function (done) { + alasql( + 'select * from xlsx("' + dirname + 'test848.xlsx", {headers:true, sheetid:?})', + [[0, 1]], + function (data) { + // Should return a flat array with _sheet property + assert(Array.isArray(data)); + assert(data.length === 7); // 3 + 4 rows + assert(data[0]._sheet === 'Sheet1'); + done(); + } + ); + }); + + it('4. Original single sheet behavior should still work', function (done) { + alasql( + 'select * from xlsx("' + dirname + 'test848.xlsx", {headers:true})', + [], + function (data) { + // Should return an array (original behavior) + assert(Array.isArray(data)); + assert(data.length === 3); + // Should not have _sheet property + assert(!data[0]._sheet); + done(); + } + ); + }); + + it('5. Original single sheet with explicit sheetid should still work', function (done) { + alasql( + 'select * from xlsx("' + dirname + 'test848.xlsx", {headers:true, sheetid:"Sheet2"})', + [], + function (data) { + // Should return an array (original behavior) + assert(Array.isArray(data)); + assert(data.length === 4); + assert(data[3].five === 800); + // Should not have _sheet property + assert(!data[0]._sheet); + done(); + } + ); + }); + + it('6. Query rows from specific sheet using WHERE clause', function (done) { + alasql( + 'select * from xlsx("' + + dirname + + 'test848.xlsx", {headers:true, sheetid:"*"}) WHERE _sheet = "Sheet2"', + [], + function (data) { + // Should only return rows from Sheet2 + assert(Array.isArray(data)); + assert(data.length === 4); + assert( + data.every(function (row) { + return row._sheet === 'Sheet2'; + }) + ); + done(); + } + ); + }); + + it('7. Count rows per sheet', function (done) { + // First get the data, then query it + alasql( + 'SELECT * FROM xlsx("' + dirname + 'test848.xlsx", {headers:true, sheetid:"*"})', + [], + function (allData) { + // Now count using a separate query + var counts = alasql('SELECT [_sheet], COUNT(*) FROM ? GROUP BY [_sheet]', [allData]); + assert(Array.isArray(counts)); + assert(counts.length === 2); + var sheet1 = counts.find(function (row) { + return row._sheet === 'Sheet1'; + }); + var sheet2 = counts.find(function (row) { + return row._sheet === 'Sheet2'; + }); + assert(sheet1['COUNT(*)'] === 3); + assert(sheet2['COUNT(*)'] === 4); + done(); + } + ); + }); +}); diff --git a/test/test848.xlsx b/test/test848.xlsx new file mode 100644 index 0000000000..44aea2dc86 Binary files /dev/null and b/test/test848.xlsx differ