Benzinga · rlespinet · Feb 14, 2023
diff --git a/lz4.js b/lz4.js
@@ -16,8 +16,8 @@ var util = require('./util.js');
 
 // Compression format parameters/constants.
 var minMatch = 4;
-var minLength = 13;
-var searchLimit = 5;
+var matchSearchLimit = 12;
+var minTrailingLitterals = 5;
 var skipTrigger = 6;
 var hashSize = 1 << 16;
 
@@ -265,11 +265,11 @@ exports.decompressBlock = function decompressBlock (src, dst, sIndex, sLength, d
       dst.copyWithin(dIndex, dIndex - mOffset, dIndex - mOffset + mLength);
       dIndex += mLength;
     } else {
-      for (i = dIndex - mOffset, n = i + mLength; i < n;) {
-        dst[dIndex++] = dst[i++] | 0;
-      }
+    for (i = dIndex - mOffset, n = i + mLength; i < n;) {
+      dst[dIndex++] = dst[i++] | 0;
     }
   }
+  }
 
   return dIndex;
 };
@@ -284,83 +284,81 @@ exports.compressBlock = function compressBlock (src, dst, sIndex, sLength, hashT
   sEnd = sLength + sIndex;
   mAnchor = sIndex;
 
-  // Process only if block is large enough.
-  if (sLength >= minLength) {
-    var searchMatchCount = (1 << skipTrigger) + 3;
+  var searchMatchCount = (1 << skipTrigger) + 3;
 
-    // Consume until last n literals (Lz4 spec limitation.)
-    while (sIndex + minMatch < sEnd - searchLimit) {
-      var seq = util.readU32(src, sIndex);
-      var hash = util.hashU32(seq) >>> 0;
+  // Search for matches with a limit of matchSearchLimit bytes 
+  // before the end of block (Lz4 spec limitation.)
+  while (sIndex <= sEnd - matchSearchLimit) {
+    var seq = util.readU32(src, sIndex);
+    var hash = util.hashU32(seq) >>> 0;
 
-      // Crush hash to 16 bits.
-      hash = ((hash >> 16) ^ hash) >>> 0 & 0xffff;
+    // Crush hash to 16 bits.
+    hash = ((hash >> 16) ^ hash) >>> 0 & 0xffff;
 
-      // Look for a match in the hashtable. NOTE: remove one; see below.
-      mIndex = hashTable[hash] - 1;
+    // Look for a match in the hashtable. NOTE: remove one; see below.
+    mIndex = hashTable[hash] - 1;
 
-      // Put pos in hash table. NOTE: add one so that zero = invalid.
-      hashTable[hash] = sIndex + 1;
+    // Put pos in hash table. NOTE: add one so that zero = invalid.
+    hashTable[hash] = sIndex + 1;
 
-      // Determine if there is a match (within range.)
-      if (mIndex < 0 || ((sIndex - mIndex) >>> 16) > 0 || util.readU32(src, mIndex) !== seq) {
-        mStep = searchMatchCount++ >> skipTrigger;
-        sIndex += mStep;
-        continue;
-      }
+    // Determine if there is a match (within range.)
+    if (mIndex < 0 || ((sIndex - mIndex) >>> 16) > 0 || util.readU32(src, mIndex) !== seq) {
+      mStep = searchMatchCount++ >> skipTrigger;
+      sIndex += mStep;
+      continue;
+    }
 
-      searchMatchCount = (1 << skipTrigger) + 3;
+    searchMatchCount = (1 << skipTrigger) + 3;
 
-      // Calculate literal count and offset.
-      literalCount = sIndex - mAnchor;
-      mOffset = sIndex - mIndex;
+    // Calculate literal count and offset.
+    literalCount = sIndex - mAnchor;
+    mOffset = sIndex - mIndex;
 
-      // We've already matched one word, so get that out of the way.
-      sIndex += minMatch;
-      mIndex += minMatch;
+    // We've already matched one word, so get that out of the way.
+    sIndex += minMatch;
+    mIndex += minMatch;
 
-      // Determine match length.
-      // N.B.: mLength does not include minMatch, Lz4 adds it back
-      // in decoding.
-      mLength = sIndex;
-      while (sIndex < sEnd - searchLimit && src[sIndex] === src[mIndex]) {
-        sIndex++;
-        mIndex++;
-      }
-      mLength = sIndex - mLength;
-
-      // Write token + literal count.
-      var token = mLength < mlMask ? mLength : mlMask;
-      if (literalCount >= runMask) {
-        dst[dIndex++] = (runMask << mlBits) + token;
-        for (n = literalCount - runMask; n >= 0xff; n -= 0xff) {
-          dst[dIndex++] = 0xff;
-        }
-        dst[dIndex++] = n;
-      } else {
-        dst[dIndex++] = (literalCount << mlBits) + token;
+    // Determine match length.
+    // N.B.: mLength does not include minMatch, Lz4 adds it back
+    // in decoding.
+    mLength = sIndex;
+    while (sIndex < sEnd - minTrailingLitterals && src[sIndex] === src[mIndex]) {
+      sIndex++;
+      mIndex++;
+    }
+    mLength = sIndex - mLength;
+
+    // Write token + literal count.
+    var token = mLength < mlMask ? mLength : mlMask;
+    if (literalCount >= runMask) {
+      dst[dIndex++] = (runMask << mlBits) + token;
+      for (n = literalCount - runMask; n >= 0xff; n -= 0xff) {
+        dst[dIndex++] = 0xff;
       }
+      dst[dIndex++] = n;
+    } else {
+      dst[dIndex++] = (literalCount << mlBits) + token;
+    }
 
-      // Write literals.
-      for (var i = 0; i < literalCount; i++) {
-        dst[dIndex++] = src[mAnchor + i];
-      }
+    // Write literals.
+    for (var i = 0; i < literalCount; i++) {
+      dst[dIndex++] = src[mAnchor + i];
+    }
 
-      // Write offset.
-      dst[dIndex++] = mOffset;
-      dst[dIndex++] = (mOffset >> 8);
+    // Write offset.
+    dst[dIndex++] = mOffset;
+    dst[dIndex++] = (mOffset >> 8);
 
-      // Write match length.
-      if (mLength >= mlMask) {
-        for (n = mLength - mlMask; n >= 0xff; n -= 0xff) {
-          dst[dIndex++] = 0xff;
-        }
-        dst[dIndex++] = n;
+    // Write match length.
+    if (mLength >= mlMask) {
+      for (n = mLength - mlMask; n >= 0xff; n -= 0xff) {
+        dst[dIndex++] = 0xff;
       }
-
-      // Move the anchor.
-      mAnchor = sIndex;
+      dst[dIndex++] = n;
     }
+
+    // Move the anchor.
+    mAnchor = sIndex;
   }
 
   // Nothing was encoded.

diff --git a/test/cases/lz4.js b/test/cases/lz4.js
@@ -229,6 +229,67 @@ describe('lz4', function () {
 
       expect(lz4.compress(input)).to.be.deep.equal(output);
     });
+
+    it('should respect the 5 litteral rule on last sequence', function () {
+      var input = byteArray([
+        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+        0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+        0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+      ]);
+
+      var output = byteArray([
+        0x04, 0x22, 0x4d, 0x18, 0x40, 0x70, 0xdf, 0x1a,
+        0x00, 0x00, 0x00, 0xf7, 0x01, 0x00, 0x01, 0x02,
+        0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
+        0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x00, 0x50,
+        0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x00, 0x00,
+        0x00
+      ]);
+
+      expect(lz4.compress(input)).to.be.deep.equal(output);
+    });
+
+    it('should not create a match starting less than 12 bytes before end of input', function () {
+      var input = byteArray([
+        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+        0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+        0x08, 0x09, 0x0a,
+      ]);
+
+      var output = byteArray([
+        0x04, 0x22, 0x4d, 0x18, 0x40, 0x70, 0xdf, 0x1b,
+        0x00, 0x00, 0x80, 0x00, 0x01, 0x02, 0x03, 0x04,
+        0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c,
+        0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04,
+        0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x00, 0x00,
+        0x00, 0x00
+      ]);
+
+      expect(lz4.compress(input)).to.be.deep.equal(output);
+    });
+
+    it('should create a match starting 12 bytes before end of input or more', function () {
+      var input = byteArray([
+        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+        0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+        0x08, 0x09, 0x0a, 0x0b
+      ]);
+
+      var output = byteArray([
+        0x04, 0x22, 0x4d, 0x18, 0x40, 0x70, 0xdf, 0x1a,
+        0x00, 0x00, 0x00, 0xf3, 0x01, 0x00, 0x01, 0x02,
+        0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
+        0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x00, 0x50,
+        0x07, 0x08, 0x09, 0x0a, 0x0b, 0x00, 0x00, 0x00,
+        0x00
+      ]);
+
+      expect(lz4.compress(input)).to.be.deep.equal(output);
+    });
+
   });
 
   describe('#compressBlock', function () {