Skip to content

Commit

Permalink
Use fixed buffers for BigString compression and decompression to redu…
Browse files Browse the repository at this point in the history
…ce GC churn.

fixup! Use fixed buffers for BigString compression and decompression to reduce GC churn.
  • Loading branch information
vlofgren committed Jun 19, 2023
1 parent 379bccc commit e437228
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 26 deletions.
Expand Up @@ -11,7 +11,5 @@ static BigString encode(String stringValue) {
}
String decode();

byte[] getBytes();

int length();
}
@@ -1,35 +1,23 @@
package nu.marginalia.bigstring;

import net.jpountz.lz4.LZ4Compressor;
import net.jpountz.lz4.LZ4Factory;
import net.jpountz.lz4.LZ4FastDecompressor;

import java.nio.charset.StandardCharsets;
import java.nio.ByteBuffer;

public class CompressedBigString implements BigString {
private final int originalSize;
private final int length;
private final byte[] encoded;
private final ByteBuffer encoded;

private static final LZ4Factory lz4Factory = LZ4Factory.fastestInstance();
private static final LZ4Compressor compressor = lz4Factory.fastCompressor();
private static final LZ4FastDecompressor decompressor = lz4Factory.fastDecompressor();
private final static CompressionBufferPool bufferPool = new CompressionBufferPool();

public CompressedBigString(String stringValue) {
byte[] byteValue = stringValue.getBytes(StandardCharsets.UTF_16);
originalSize = byteValue.length;
encoded = compressor.compress(byteValue);
encoded = bufferPool.bufferForThread().compress(stringValue);
originalSize = encoded.position();
length = stringValue.length();
}

@Override
public String decode() {
return new String(getBytes(), StandardCharsets.UTF_16);
}

@Override
public byte[] getBytes() {
return decompressor.decompress(encoded, originalSize);
return bufferPool.bufferForThread().decompress(encoded, length, originalSize);
}

@Override
Expand Down
@@ -0,0 +1,67 @@
package nu.marginalia.bigstring;

import net.jpountz.lz4.LZ4Compressor;
import net.jpountz.lz4.LZ4Factory;
import net.jpountz.lz4.LZ4FastDecompressor;

import java.nio.ByteBuffer;

/** Buffers for compression and decompression of strings.
* Operations are synchronized on the buffers.
* <p>
* @see CompressionBufferPool CompressionBufferPool */
public class CompressionBuffer {
private static final int BUFFER_SIZE = 8_000_000;
private final ByteBuffer buffer;

private static final LZ4Factory lz4Factory = LZ4Factory.fastestInstance();
private static final LZ4Compressor compressor = lz4Factory.fastCompressor();
private static final LZ4FastDecompressor decompressor = lz4Factory.fastDecompressor();


public CompressionBuffer() {
this.buffer = ByteBuffer.allocateDirect(BUFFER_SIZE);
}

/**
* @param stringValue the string to compress
* @return a compressed version of the string in a newly allocated ByteBuffer
*/
public synchronized ByteBuffer compress(String stringValue) {
final int splitPoint = stringValue.length() * 2;

buffer.clear();

var rawBuffer = buffer.slice(0, splitPoint);
var compressedBuffer = buffer.slice(splitPoint, BUFFER_SIZE - splitPoint);

rawBuffer.clear();
rawBuffer.asCharBuffer().append(stringValue);

// can't flip here because position and limit is in the CharBuffer representation
rawBuffer.position(0);
rawBuffer.limit(stringValue.length() * 2);

compressedBuffer.clear();
compressor.compress(rawBuffer, compressedBuffer);
compressedBuffer.flip();

ByteBuffer retBuffer = ByteBuffer.allocate(compressedBuffer.limit());
retBuffer.put(compressedBuffer);
return retBuffer;
}

public synchronized String decompress(ByteBuffer encoded, int length, int originalSize) {
buffer.position(0);
buffer.limit(length * 2);

encoded.position(0);
encoded.limit(originalSize);

decompressor.decompress(encoded, buffer);

buffer.flip();

return buffer.asCharBuffer().toString();
}
}
@@ -0,0 +1,26 @@
package nu.marginalia.bigstring;

import java.util.Arrays;
import java.util.concurrent.ThreadLocalRandom;

/** To avoid contention on the compression buffer, while keeping allocation churn low,
* we use a pool of buffers, randomly selected allocated upon invocation
* <p>
* @see CompressionBuffer CompressionBuffer
* */
public class CompressionBufferPool {
private static final int BUFFER_COUNT = 16;
private final CompressionBuffer[] destBuffer;

public CompressionBufferPool() {
destBuffer = new CompressionBuffer[BUFFER_COUNT];
Arrays.setAll(destBuffer, i -> new CompressionBuffer());
}

/** Get the buffer for the current thread */
public CompressionBuffer bufferForThread() {
int idx = ThreadLocalRandom.current().nextInt(0, BUFFER_COUNT);

return destBuffer[idx];
}
}
Expand Up @@ -14,11 +14,6 @@ public String decode() {
return value;
}

@Override
public byte[] getBytes() {
return value.getBytes(StandardCharsets.UTF_8);
}

@Override
public int length() {
return value.length();
Expand Down
@@ -0,0 +1,16 @@
package nu.marginalia.bigstring;

import nu.marginalia.bigstring.CompressedBigString;
import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.*;

class CompressedBigStringTest {

@Test
public void testCompressDecompress() {
String testString = "This is a test string that is longer than 64 characters. It should be compressed.";
var bigString = new CompressedBigString(testString);
assertEquals(testString, bigString.decode());
}
}
Expand Up @@ -330,7 +330,7 @@ private Optional<SimpleRobotRules> fetchRobotsForProto(String proto, EdgeDomain

private SimpleRobotRules parseRobotsTxt(CrawledDocument doc) {
return robotsParser.parseContent(doc.url,
doc.documentBody.getBytes(),
doc.documentBody.decode().getBytes(),
doc.contentType,
userAgent);
}
Expand Down

0 comments on commit e437228

Please sign in to comment.