Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Respect LZ4 end of block conditions #13

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 65 additions & 67 deletions lz4.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ var util = require('./util.js');

// Compression format parameters/constants.
var minMatch = 4;
var minLength = 13;
var searchLimit = 5;
var matchSearchLimit = 12;
var minTrailingLitterals = 5;
var skipTrigger = 6;
var hashSize = 1 << 16;

Expand Down Expand Up @@ -265,11 +265,11 @@ exports.decompressBlock = function decompressBlock (src, dst, sIndex, sLength, d
dst.copyWithin(dIndex, dIndex - mOffset, dIndex - mOffset + mLength);
dIndex += mLength;
} else {
for (i = dIndex - mOffset, n = i + mLength; i < n;) {
dst[dIndex++] = dst[i++] | 0;
}
for (i = dIndex - mOffset, n = i + mLength; i < n;) {
dst[dIndex++] = dst[i++] | 0;
}
}
}

return dIndex;
};
Expand All @@ -284,83 +284,81 @@ exports.compressBlock = function compressBlock (src, dst, sIndex, sLength, hashT
sEnd = sLength + sIndex;
mAnchor = sIndex;

// Process only if block is large enough.
if (sLength >= minLength) {
var searchMatchCount = (1 << skipTrigger) + 3;
var searchMatchCount = (1 << skipTrigger) + 3;

// Consume until last n literals (Lz4 spec limitation.)
while (sIndex + minMatch < sEnd - searchLimit) {
var seq = util.readU32(src, sIndex);
var hash = util.hashU32(seq) >>> 0;
// Search for matches with a limit of matchSearchLimit bytes
// before the end of block (Lz4 spec limitation.)
while (sIndex <= sEnd - matchSearchLimit) {
var seq = util.readU32(src, sIndex);
var hash = util.hashU32(seq) >>> 0;

// Crush hash to 16 bits.
hash = ((hash >> 16) ^ hash) >>> 0 & 0xffff;
// Crush hash to 16 bits.
hash = ((hash >> 16) ^ hash) >>> 0 & 0xffff;

// Look for a match in the hashtable. NOTE: remove one; see below.
mIndex = hashTable[hash] - 1;
// Look for a match in the hashtable. NOTE: remove one; see below.
mIndex = hashTable[hash] - 1;

// Put pos in hash table. NOTE: add one so that zero = invalid.
hashTable[hash] = sIndex + 1;
// Put pos in hash table. NOTE: add one so that zero = invalid.
hashTable[hash] = sIndex + 1;

// Determine if there is a match (within range.)
if (mIndex < 0 || ((sIndex - mIndex) >>> 16) > 0 || util.readU32(src, mIndex) !== seq) {
mStep = searchMatchCount++ >> skipTrigger;
sIndex += mStep;
continue;
}
// Determine if there is a match (within range.)
if (mIndex < 0 || ((sIndex - mIndex) >>> 16) > 0 || util.readU32(src, mIndex) !== seq) {
mStep = searchMatchCount++ >> skipTrigger;
sIndex += mStep;
continue;
}

searchMatchCount = (1 << skipTrigger) + 3;
searchMatchCount = (1 << skipTrigger) + 3;

// Calculate literal count and offset.
literalCount = sIndex - mAnchor;
mOffset = sIndex - mIndex;
// Calculate literal count and offset.
literalCount = sIndex - mAnchor;
mOffset = sIndex - mIndex;

// We've already matched one word, so get that out of the way.
sIndex += minMatch;
mIndex += minMatch;
// We've already matched one word, so get that out of the way.
sIndex += minMatch;
mIndex += minMatch;

// Determine match length.
// N.B.: mLength does not include minMatch, Lz4 adds it back
// in decoding.
mLength = sIndex;
while (sIndex < sEnd - searchLimit && src[sIndex] === src[mIndex]) {
sIndex++;
mIndex++;
}
mLength = sIndex - mLength;

// Write token + literal count.
var token = mLength < mlMask ? mLength : mlMask;
if (literalCount >= runMask) {
dst[dIndex++] = (runMask << mlBits) + token;
for (n = literalCount - runMask; n >= 0xff; n -= 0xff) {
dst[dIndex++] = 0xff;
}
dst[dIndex++] = n;
} else {
dst[dIndex++] = (literalCount << mlBits) + token;
// Determine match length.
// N.B.: mLength does not include minMatch, Lz4 adds it back
// in decoding.
mLength = sIndex;
while (sIndex < sEnd - minTrailingLitterals && src[sIndex] === src[mIndex]) {
sIndex++;
mIndex++;
}
mLength = sIndex - mLength;

// Write token + literal count.
var token = mLength < mlMask ? mLength : mlMask;
if (literalCount >= runMask) {
dst[dIndex++] = (runMask << mlBits) + token;
for (n = literalCount - runMask; n >= 0xff; n -= 0xff) {
dst[dIndex++] = 0xff;
}
dst[dIndex++] = n;
} else {
dst[dIndex++] = (literalCount << mlBits) + token;
}

// Write literals.
for (var i = 0; i < literalCount; i++) {
dst[dIndex++] = src[mAnchor + i];
}
// Write literals.
for (var i = 0; i < literalCount; i++) {
dst[dIndex++] = src[mAnchor + i];
}

// Write offset.
dst[dIndex++] = mOffset;
dst[dIndex++] = (mOffset >> 8);
// Write offset.
dst[dIndex++] = mOffset;
dst[dIndex++] = (mOffset >> 8);

// Write match length.
if (mLength >= mlMask) {
for (n = mLength - mlMask; n >= 0xff; n -= 0xff) {
dst[dIndex++] = 0xff;
}
dst[dIndex++] = n;
// Write match length.
if (mLength >= mlMask) {
for (n = mLength - mlMask; n >= 0xff; n -= 0xff) {
dst[dIndex++] = 0xff;
}

// Move the anchor.
mAnchor = sIndex;
dst[dIndex++] = n;
}

// Move the anchor.
mAnchor = sIndex;
}

// Nothing was encoded.
Expand Down
61 changes: 61 additions & 0 deletions test/cases/lz4.js
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,67 @@ describe('lz4', function () {

expect(lz4.compress(input)).to.be.deep.equal(output);
});

it('should respect the 5 litteral rule on last sequence', function () {
var input = byteArray([
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
]);

var output = byteArray([
0x04, 0x22, 0x4d, 0x18, 0x40, 0x70, 0xdf, 0x1a,
0x00, 0x00, 0x00, 0xf7, 0x01, 0x00, 0x01, 0x02,
0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x00, 0x50,
0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x00, 0x00,
0x00
]);

expect(lz4.compress(input)).to.be.deep.equal(output);
});

it('should not create a match starting less than 12 bytes before end of input', function () {
var input = byteArray([
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a,
]);

var output = byteArray([
0x04, 0x22, 0x4d, 0x18, 0x40, 0x70, 0xdf, 0x1b,
0x00, 0x00, 0x80, 0x00, 0x01, 0x02, 0x03, 0x04,
0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c,
0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04,
0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x00, 0x00,
0x00, 0x00
]);

expect(lz4.compress(input)).to.be.deep.equal(output);
});

it('should create a match starting 12 bytes before end of input or more', function () {
var input = byteArray([
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b
]);

var output = byteArray([
0x04, 0x22, 0x4d, 0x18, 0x40, 0x70, 0xdf, 0x1a,
0x00, 0x00, 0x00, 0xf3, 0x01, 0x00, 0x01, 0x02,
0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x00, 0x50,
0x07, 0x08, 0x09, 0x0a, 0x0b, 0x00, 0x00, 0x00,
0x00
]);

expect(lz4.compress(input)).to.be.deep.equal(output);
});

});

describe('#compressBlock', function () {
Expand Down