Skip to content

Commit

Permalink
lib: improve normalize encoding performance
Browse files Browse the repository at this point in the history
This focuses on the common case by making sure they are prioritized.
It also changes some typeof checks to test for undefined since
that is faster and it adds a benchmark.

PR-URL: nodejs#18790
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com>
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
  • Loading branch information
BridgeAR committed Mar 2, 2018
1 parent 876836b commit 341770f
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 33 deletions.
43 changes: 43 additions & 0 deletions benchmark/buffers/buffer-normalize-encoding.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
'use strict';

const common = require('../common.js');

const bench = common.createBenchmark(main, {
encoding: [
'ascii',
'ASCII',
'base64',
'BASE64',
'binary',
'BINARY',
'hex',
'HEX',
'latin1',
'LATIN1',
'ucs-2',
'UCS-2',
'ucs2',
'UCS2',
'utf-16le',
'UTF-16LE',
'utf-8',
'UTF-8',
'utf16le',
'UTF16LE',
'utf8',
'UTF8'
],
n: [1e6]
}, {
flags: ['--expose-internals']
});

function main({ encoding, n }) {
const { normalizeEncoding } = require('internal/util');

bench.start();
for (var i = 0; i < n; i++) {
normalizeEncoding(encoding);
}
bench.end(n);
}
4 changes: 2 additions & 2 deletions lib/buffer.js
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ function assertSize(size) {
err = new errors.RangeError('ERR_INVALID_OPT_VALUE', 'size', size);
}

if (err) {
if (err !== null) {
Error.captureStackTrace(err, assertSize);
throw err;
}
Expand Down Expand Up @@ -428,7 +428,7 @@ Buffer.compare = function compare(a, b) {

Buffer.isEncoding = function isEncoding(encoding) {
return typeof encoding === 'string' &&
typeof normalizeEncoding(encoding) === 'string';
normalizeEncoding(encoding) !== undefined;
};
Buffer[kIsEncodingSymbol] = Buffer.isEncoding;

Expand Down
77 changes: 50 additions & 27 deletions lib/internal/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -96,36 +96,59 @@ function assertCrypto() {
throw new errors.Error('ERR_NO_CRYPTO');
}

// The loop should only run at most twice, retrying with lowercased enc
// if there is no match in the first pass.
// We use a loop instead of branching to retry with a helper
// function in order to avoid the performance hit.
// Return undefined if there is no match.
// Move the "slow cases" to a separate function to make sure this function gets
// inlined properly. That prioritizes the common case.
function normalizeEncoding(enc) {
if (enc == null || enc === '') return 'utf8';
let retried;
while (true) {
switch (enc) {
case 'utf8':
case 'utf-8':
return 'utf8';
case 'ucs2':
case 'ucs-2':
case 'utf16le':
case 'utf-16le':
if (enc == null || enc === 'utf8' || enc === 'utf-8') return 'utf8';
return slowCases(enc);
}

function slowCases(enc) {
switch (enc.length) {
case 4:
if (enc === 'UTF8') return 'utf8';
if (enc === 'ucs2' || enc === 'UCS2') return 'utf16le';
enc = `${enc}`.toLowerCase();
if (enc === 'utf8') return 'utf8';
if (enc === 'ucs2' || enc === 'UCS2') return 'utf16le';
break;
case 3:
if (enc === 'hex' || enc === 'HEX' || `${enc}`.toLowerCase() === 'hex')
return 'hex';
break;
case 5:
if (enc === 'ascii') return 'ascii';
if (enc === 'ucs-2') return 'utf16le';
if (enc === 'UTF-8') return 'utf8';
if (enc === 'ASCII') return 'ascii';
if (enc === 'UCS-2') return 'utf16le';
enc = `${enc}`.toLowerCase();
if (enc === 'utf-8') return 'utf8';
if (enc === 'ascii') return 'ascii';
if (enc === 'usc-2') return 'utf16le';
break;
case 6:
if (enc === 'base64') return 'base64';
if (enc === 'latin1' || enc === 'binary') return 'latin1';
if (enc === 'BASE64') return 'base64';
if (enc === 'LATIN1' || enc === 'BINARY') return 'latin1';
enc = `${enc}`.toLowerCase();
if (enc === 'base64') return 'base64';
if (enc === 'latin1' || enc === 'binary') return 'latin1';
break;
case 7:
if (enc === 'utf16le' || enc === 'UTF16LE' ||
`${enc}`.toLowerCase() === 'utf16le')
return 'utf16le';
case 'latin1':
case 'binary':
return 'latin1';
case 'base64':
case 'ascii':
case 'hex':
return enc;
default:
if (retried) return; // undefined
enc = ('' + enc).toLowerCase();
retried = true;
}
break;
case 8:
if (enc === 'utf-16le' || enc === 'UTF-16LE' ||
`${enc}`.toLowerCase() === 'utf-16le')
return 'utf16le';
break;
default:
if (enc === '') return 'utf8';
}
}

Expand Down
10 changes: 6 additions & 4 deletions lib/string_decoder.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,12 @@ const kNativeDecoder = Symbol('kNativeDecoder');
// modules monkey-patch it to support additional encodings
function normalizeEncoding(enc) {
const nenc = internalUtil.normalizeEncoding(enc);
if (typeof nenc !== 'string' &&
(Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc)))
throw new errors.TypeError('ERR_UNKNOWN_ENCODING', enc);
return nenc || enc;
if (nenc === undefined) {
if (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc))
throw new errors.TypeError('ERR_UNKNOWN_ENCODING', enc);
return enc;
}
return nenc;
}

const encodingsMap = {};
Expand Down

0 comments on commit 341770f

Please sign in to comment.