From 496b9e7ddbbe1367ba21bd19c98e9f439b5cb904 Mon Sep 17 00:00:00 2001 From: Mithgol the Webmaster Date: Thu, 23 Apr 2015 00:28:33 +0300 Subject: [PATCH] =?UTF-8?q?dirty=C2=A0hacks=20for=20ashtuchkin/iconv-lite#?= =?UTF-8?q?73=20and=20ashtuchkin/iconv-lite#96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 13 +++++++--- fiunis.js | 74 ++++++++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 74 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 18a04e2..796f4ba 100644 --- a/README.md +++ b/README.md @@ -34,13 +34,18 @@ Replaces all occurrences of Fidonet Unicode substrings in the given text by  For example, `.decode('The video “&+mAJcFlwNbHpOS3p/iTJbUHvH-;” is interesting.')` returns `'The video “頂尖對決之穿褲子篇” is interesting.'`. -### encode(text) +### encode(text, encoding) -Returns the Fidonet Unicode substring equivalent of the given text. +This method makes one or more Fidonet Unicode substrings from the given `text`. This method has the two different possible behaviours: -For example, `.encode('頂尖對決之穿褲子篇')` returns `'&+mAJcFlwNbHpOS3p/iTJbUHvH-;'`. +* If `encoding` is `undefined`, this method returns a JavaScript **string** with the Fidonet Unicode substring equivalent of the given `text`. + * For example, `.encode('頂尖對決之穿褲子篇')` returns `'&+mAJcFlwNbHpOS3p/iTJbUHvH-;'`. + * The whole given `text` is converted (including ASCII characters). You should either detect (beforehand) which substrings should be given to this encoder (and what other substrings would be left to a traditional 8-bit encoding) or use the other encoder that decides that automatically (i.e. also give some `encoding` to the method, see below). -Any given text is converted (including ASCII characters). You should detect (beforehand) which substrings should be given to this encoder and what other substrings would be left to a traditional 8-bit encoding. +* If `encoding` is not `undefined`, this method returns a Node.js **Buffer** with the given `text` converted to the given `encoding`. + * If some fragments of the given `text` consist of characters that cannot be represented in the given `encoding`, then Fidonet Unicode substrings are used to encode such fragments. + * The given `encoding` must be an encoding understood by the [`iconv-lite`](https://github.com/ashtuchkin/iconv-lite) module (otherwise an error is thrown). + * The given `encoding` should also be a single-byte encoding (that generates exactly one byte for each of the characters it can represent). Otherwise the behaviour of this method is not reliable. (An error is thrown only if a multi-byte encoding can be detected, i.e. if the Buffer's length is not equal to the text's length.) ## Testing Fiunis diff --git a/fiunis.js b/fiunis.js index 98b5ba4..ca29617 100644 --- a/fiunis.js +++ b/fiunis.js @@ -1,4 +1,5 @@ -require('iconv-lite').extendNodeEncodings(); +var iconvLite = require('iconv-lite'); +iconvLite.extendNodeEncodings(); var Fiunis = function(){ if (!(this instanceof Fiunis)) return new Fiunis(); @@ -14,15 +15,70 @@ Fiunis.prototype.decode = function(text){ }).join(''); }; -Fiunis.prototype.encode = function(text){ - var base64string = Buffer(text, 'utf16be').toString('base64'); - while( - base64string.length > 0 && - base64string.charAt(base64string.length - 1) === '=' - ){ - base64string = base64string.slice(0, -1); +Fiunis.prototype.encode = function(text, targetEncoding){ + if( typeof targetEncoding === 'undefined' ){ // encode whole string + var base64string = Buffer(text, 'utf16be').toString('base64'); + while( + base64string.length > 0 && + base64string.charAt(base64string.length - 1) === '=' + ){ + base64string = base64string.slice(0, -1); + } + return '&+' + base64string + '-;'; + } + + // otherwise detect and render Fidonet Unicode substrings + if( !iconvLite.encodingExists(targetEncoding) ){ + throw new Error(this.errors.UNKNOWN_ENCODING); + } + if( iconvLite.defaultCharSingleByte !== '?' ){ + throw new Error(this.errors.ICONVLITE_TAINTED); + } + + var primalBuffer = Buffer(text, targetEncoding); + if( primalBuffer.length !== text.length ){ + throw new Error(this.errors.MULTIBYTE_ENCODING); } - return '&+' + base64string + '-;'; + var zebra = primalBuffer.toString(targetEncoding).split( /(\?+)/ ); + if( zebra.length < 2 ){ // zero non-encodable substrings + return primalBuffer; + } else primalBuffer = void 0; + + var _here = this; + var remainingStr = text; + var collected = []; + zebra.forEach(function(zebraLine, IDX){ + if( IDX % 2 === 0 ){ // encodable substring's index: 0, 2, 4... + collected = collected.concat([zebraLine]); + remainingStr = remainingStr.slice(zebraLine.length); + } else { // non-encodable substring's index: 1, 3, 5... + var srcZebra = remainingStr.slice(0, zebraLine.length).split( + /(\?+)/ + ).map(function(srcLine, srcIDX){ + if( srcIDX % 2 === 0 ){ + // actual non-encodable substring's index: 0, 2, 4... + if( srcLine.length < 1 ) return srcLine; + return _here.encode(srcLine); + } else { + // encodable source defaultCharSingleByte index: 1, 3, 5... + return srcLine; + } + }); + collected = collected.concat(srcZebra); + remainingStr = remainingStr.slice(zebraLine.length); + } + }); + return Buffer(collected.join(''), targetEncoding); +}; + +Fiunis.prototype.errors = { + UNKNOWN_ENCODING: 'The given encoding is unknown!', + MULTIBYTE_ENCODING: 'The given encoding is not a single-byte encoding!', + ICONVLITE_TAINTED: [ + 'The issue ', + 'https://github.com/ashtuchkin/iconv-lite/issues/96', + ' is possible!' + ].join('') }; module.exports = new Fiunis(); \ No newline at end of file