Skip to content

Commit

Permalink
fix(osx): specials characters osx handling
Browse files Browse the repository at this point in the history
  • Loading branch information
avallete committed Jul 20, 2022
1 parent d6f0952 commit a68d765
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 1 deletion.
34 changes: 33 additions & 1 deletion index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,37 @@ const path = require('path')
const { promisify } = require('util')
const stream = require('stream')
const yauzl = require('yauzl')
const chardet = require('chardet')

const openZip = promisify(yauzl.open)
const pipeline = promisify(stream.pipeline)

var cp437 = '\u0000☺☻♥♦♣♠•◘○◙♂♀♪♫☼►◄↕‼¶§▬↨↑↓→←∟↔▲▼ !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~⌂ÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜ¢£¥₧ƒáíóúñѪº¿⌐¬½¼¡«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ '
function decodeBuffer (buffer, start, end, entryUtf8Flag) {
const analyzedBufferEncoding = chardet.analyse(buffer)
let chardetIsUtf8 = false
// let isUtf8 = false
if (analyzedBufferEncoding.length > 0) {
debug(`analyzed encoding via chardet is ${analyzedBufferEncoding[0].name} confidence ${analyzedBufferEncoding[0].confidence}`)
// On MACOSX created archives doesn't contain the utf-8 encoding flag, making the classical yauzl encoding detection fail.
// That's why we use chardet to detect the encoding and if it's not utf-8 we fallback to the yauzl default encoding.
if (analyzedBufferEncoding[0].name === 'UTF-8' && analyzedBufferEncoding[0].confidence > 90) {
chardetIsUtf8 = true
}
}
if (entryUtf8Flag || chardetIsUtf8) {
debug('decoding buffer as utf8')
return buffer.toString('utf8', start, end)
} else {
debug('decoding buffer as cp437')
var result = ''
for (var i = start; i < end; i++) {
result += cp437[buffer[i]]
}
return result
}
}

class Extractor {
constructor (zipPath, opts) {
this.zipPath = zipPath
Expand All @@ -19,7 +46,7 @@ class Extractor {
async extract () {
debug('opening', this.zipPath, 'with opts', this.opts)

this.zipfile = await openZip(this.zipPath, { lazyEntries: true })
this.zipfile = await openZip(this.zipPath, { lazyEntries: true, decodeStrings: false })
this.canceled = false

return new Promise((resolve, reject) => {
Expand All @@ -43,6 +70,11 @@ class Extractor {
return
}

// We need to manually decode the entry name, because yauzl fail to properly decode it when passing OSX archives with
// specials characters in it.
const entryUtf8Flag = (entry.generalPurposeBitFlag & 0x800) !== 0
entry.fileName = decodeBuffer(entry.fileName, 0, entry.fileNameLength, entryUtf8Flag)
entry.comment = decodeBuffer(entry.comment, 0, entry.fileCommentLength, entryUtf8Flag)
debug('zipfile entry', entry.fileName)

if (entry.fileName.startsWith('__MACOSX/')) {
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"dependencies": {
"debug": "^4.1.1",
"get-stream": "^5.1.0",
"chardet": "1.4.0",
"yauzl": "^2.10.0"
},
"optionalDependencies": {
Expand Down
Binary file added test/accents_osx_native.zip
Binary file not shown.
Binary file added test/accents_windows_7zip.zip
Binary file not shown.
Binary file added test/accents_windows_winrar.zip
Binary file not shown.
18 changes: 18 additions & 0 deletions test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ const subdirZip = path.join(__dirname, 'file-in-subdir-without-subdir-entry.zip'
const symlinkDestZip = path.join(__dirname, 'symlink-dest.zip')
const symlinkZip = path.join(__dirname, 'symlink.zip')
const brokenZip = path.join(__dirname, 'broken.zip')
const accentsWindowsWinrarZip = path.join(__dirname, 'accents_windows_winrar.zip')
const accentsWindows7Zip = path.join(__dirname, 'accents_windows_7zip.zip')
const accentsOsxNative = path.join(__dirname, 'accents_osx_native.zip')

const relativeTarget = './cats'

Expand Down Expand Up @@ -45,6 +48,21 @@ test('files', async t => {
await pathExists(t, path.join(dirPath, 'cats', 'gJqEYBs.jpg'), 'file created')
})

test('windows winrar accents', async t => {
const dirPath = await tempExtract(t, 'windows_accents_winrar', accentsWindowsWinrarZip)
await pathExists(t, path.join(dirPath, 'Archive', 'àâæçéèêëïîôœùûüÿ'), 'directory created')
})

test('windows 7zip accents', async t => {
const dirPath = await tempExtract(t, 'window_accents_7zip', accentsWindows7Zip)
await pathExists(t, path.join(dirPath, 'Archive', 'àâæçéèêëïîôœùûüÿ'), 'directory created')
})

test('osx native accents', async t => {
const dirPath = await tempExtract(t, 'osx_accents_native', accentsOsxNative)
await pathExists(t, path.join(dirPath, 'Archive', 'àâæçéèêëïîôœùûüÿ'), 'directory created')
})

test('symlinks', async t => {
const dirPath = await tempExtract(t, 'symlinks', catsZip)
const symlink = path.join(dirPath, 'cats', 'orange_symlink')
Expand Down

0 comments on commit a68d765

Please sign in to comment.