Skip to content

Commit

Permalink
josm-mirror: bumped externals
Browse files Browse the repository at this point in the history
  • Loading branch information
JOSM GitHub mirror committed Jan 21, 2017
1 parent be407a5 commit 6f027ed
Show file tree
Hide file tree
Showing 2 changed files with 200 additions and 9 deletions.
Expand Up @@ -38,6 +38,7 @@
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
import org.apache.commons.compress.compressors.lz4.BlockLZ4CompressorInputStream;
import org.apache.commons.compress.compressors.lz4.BlockLZ4CompressorOutputStream;
import org.apache.commons.compress.compressors.lzma.LZMACompressorInputStream;
import org.apache.commons.compress.compressors.lzma.LZMACompressorOutputStream;
import org.apache.commons.compress.compressors.lzma.LZMAUtils;
Expand Down Expand Up @@ -511,7 +512,8 @@ public CompressorInputStream createCompressorInputStream(final String name, fina
*
* @param name
* the compressor name, i.e. {@value #GZIP}, {@value #BZIP2},
* {@value #XZ}, {@value #PACK200}, {@value SNAPPY_FRAMED}
* {@value #XZ}, {@value #PACK200}, {@value SNAPPY_FRAMED},
* {@value LZ4_BLOCK}
* or {@value #DEFLATE}
* @param out
* the output stream
Expand Down Expand Up @@ -558,6 +560,10 @@ public CompressorOutputStream createCompressorOutputStream(final String name, fi
return new FramedSnappyCompressorOutputStream(out);
}

if (LZ4_BLOCK.equalsIgnoreCase(name)) {
return new BlockLZ4CompressorOutputStream(out);
}

} catch (final IOException e) {
throw new CompressorException("Could not create CompressorOutputStream", e);
}
Expand Down Expand Up @@ -595,12 +601,12 @@ public Boolean getDecompressUntilEOF() {

@Override
public Set<String> getInputStreamCompressorNames() {
return Sets.newHashSet(GZIP, BZIP2, XZ, LZMA, PACK200, SNAPPY_RAW, SNAPPY_FRAMED, Z, DEFLATE);
return Sets.newHashSet(GZIP, BZIP2, XZ, LZMA, PACK200, SNAPPY_RAW, SNAPPY_FRAMED, Z, DEFLATE, LZ4_BLOCK);
}

@Override
public Set<String> getOutputStreamCompressorNames() {
return Sets.newHashSet(GZIP, BZIP2, XZ, LZMA, PACK200, DEFLATE, SNAPPY_FRAMED);
return Sets.newHashSet(GZIP, BZIP2, XZ, LZMA, PACK200, DEFLATE, SNAPPY_FRAMED, LZ4_BLOCK);
}

/**
Expand Down
Expand Up @@ -85,6 +85,9 @@ public class BlockLZ4CompressorOutputStream extends CompressorOutputStream {
private boolean finished = false;

private Deque<Pair> pairs = new LinkedList<>();
// keeps track of the last window-size bytes (64k) in order to be
// able to expand back-references when needed
private Deque<byte[]> expandedBlocks = new LinkedList<>();

/**
* Creates a new LZ4 output stream.
Expand Down Expand Up @@ -144,12 +147,15 @@ public void finish() throws IOException {

private void addLiteralBlock(LZ77Compressor.LiteralBlock block) throws IOException {
Pair last = writeBlocksAndReturnUnfinishedPair(block.getLength());
last.addLiteral(block);
recordLiteral(last.addLiteral(block));
clearUnusedBlocksAndPairs();
}

private void addBackReference(LZ77Compressor.BackReference block) throws IOException {
Pair last = writeBlocksAndReturnUnfinishedPair(block.getLength());
last.setBackReference(block);
recordBackReference(block);
clearUnusedBlocksAndPairs();
}

private Pair writeBlocksAndReturnUnfinishedPair(int length) throws IOException {
Expand All @@ -162,6 +168,106 @@ private Pair writeBlocksAndReturnUnfinishedPair(int length) throws IOException {
return last;
}

private void recordLiteral(byte[] b) {
expandedBlocks.addFirst(b);
}

private void clearUnusedBlocksAndPairs() {
clearUnusedBlocks();
clearUnusedPairs();
}

private void clearUnusedBlocks() {
int blockLengths = 0;
int blocksToKeep = 0;
for (byte[] b : expandedBlocks) {
blocksToKeep++;
blockLengths += b.length;
if (blockLengths >= BlockLZ4CompressorInputStream.WINDOW_SIZE) {
break;
}
}
final int size = expandedBlocks.size();
for (int i = blocksToKeep; i < size; i++) {
expandedBlocks.removeLast();
}
}

private void recordBackReference(LZ77Compressor.BackReference block) {
expandedBlocks.addFirst(expand(block.getOffset(), block.getLength()));
}

private byte[] expand(final int offset, final int length) {
byte[] expanded = new byte[length];
if (offset == 1) { // surprisingly common special case
byte[] block = expandedBlocks.peekFirst();
byte b = block[block.length - 1];
for (int i = 0; i < expanded.length; i++) {
expanded[i] = b;
}
} else {
expandFromList(expanded, offset, length);
}
return expanded;
}

private void expandFromList(final byte[] expanded, int offset, int length) {
int offsetRemaining = offset;
int lengthRemaining = length;
int writeOffset = 0;
while (lengthRemaining > 0) {
// find block that contains offsetRemaining
byte[] block = null;
int copyLen, copyOffset;
if (offsetRemaining > 0) {
int blockOffset = 0;
for (byte[] b : expandedBlocks) {
if (b.length + blockOffset >= offsetRemaining) {
block = b;
break;
}
blockOffset += b.length;
}
copyOffset = blockOffset + block.length - offsetRemaining;
copyLen = Math.min(lengthRemaining, block.length - copyOffset);
} else {
// offsetRemaining is negative and points into the expanded bytes
block = expanded;
copyOffset = writeOffset + offsetRemaining;
copyLen = Math.min(lengthRemaining, writeOffset + offsetRemaining);
}
if (copyLen < 1) {
throw new IllegalStateException("zero copy");
}
System.arraycopy(block, copyOffset, expanded, writeOffset, copyLen);
offsetRemaining -= copyLen;
lengthRemaining -= copyLen;
writeOffset += copyLen;
}
}

private void clearUnusedPairs() {
int pairLengths = 0;
int pairsToKeep = 0;
for (Iterator<Pair> it = pairs.descendingIterator(); it.hasNext(); ) {
Pair p = it.next();
pairsToKeep++;
pairLengths += p.length();
if (pairLengths >= BlockLZ4CompressorInputStream.WINDOW_SIZE) {
break;
}
}
final int size = pairs.size();
for (int i = pairsToKeep; i < size; i++) {
Pair p = pairs.peekFirst();
if (p.hasBeenWritten()) {
pairs.removeFirst();
} else {
break;
}
}
}

private void writeFinalLiteralBlock() throws IOException {
rewriteLastPairs();
for (Pair p : pairs) {
Expand Down Expand Up @@ -195,16 +301,85 @@ private void writeWritablePairs(int lengthOfBlocksAfterLastPair) throws IOExcept
}

private void rewriteLastPairs() {
LinkedList<Pair> lastPairs = new LinkedList<>();
LinkedList<Integer> pairLength = new LinkedList<>();
int offset = 0;
for (Iterator<Pair> it = pairs.descendingIterator(); it.hasNext(); ) {
Pair p = it.next();
if (p.hasBeenWritten()) {
break;
}
int len = p.length();
pairLength.addFirst(len);
lastPairs.addFirst(p);
offset += len;
if (offset >= MIN_OFFSET_OF_LAST_BACK_REFERENCE) {
break;
}
}
for (Pair p : lastPairs) {
pairs.remove(p);
}
// lastPairs may contain between one and four Pairs:
// * the last pair may be a one byte literal
// * all other Pairs contain a back-reference which must be four bytes long at minimum
// we could merge them all into a single literal block but
// this may harm compression. For example compressing
// "bla.tar" from our tests yields a last block containing a
// back-reference of length > 2k and we'd end up with a last
// literal of that size rather than a 2k back-reference and a
// 12 byte literal at the end.

// Instead we merge all but the first of lastPairs into a new
// literal-only Pair "replacement" and look at the
// back-reference in the first of lastPairs and see if we can
// split it. We can split it if it is longer than 16 -
// replacement.length (i.e. the minimal length of four is kept
// while making sure the last literal is at least twelve bytes
// long). If we can't split it, we expand the first of the pairs
// as well.

// this is not optimal, we could get better compression
// results with more complex approaches as the last literal
// only needs to be five bytes long if the previous
// back-reference has an offset big enough

final int allButFirstOfLastPairs = lastPairs.size() - 1;
int toExpand = 0;
for (int i = 1; i < allButFirstOfLastPairs; i++) {
toExpand += pairLength.get(i);
}
Pair replacement = new Pair();
if (toExpand > 0) {
replacement.prependLiteral(expand(toExpand, toExpand));
}
Pair splitCandidate = lastPairs.get(0);
int splitLen = splitCandidate.length();
int stillNeeded = MIN_OFFSET_OF_LAST_BACK_REFERENCE - toExpand;
if (splitCandidate.hasBackReference()
&& splitCandidate.backReferenceLength() > MIN_BACK_REFERENCE_LENGTH + stillNeeded) {
replacement.prependLiteral(expand(toExpand + stillNeeded, stillNeeded));
pairs.add(splitCandidate.splitWithNewBackReferenceLengthOf(splitCandidate.backReferenceLength()
- stillNeeded));
} else {
replacement.prependLiteral(expand(toExpand + splitLen, splitLen));
}
pairs.add(replacement);
}

final static class Pair {
private final List<byte[]> literals = new LinkedList<>();
private final Deque<byte[]> literals = new LinkedList<>();
private int brOffset, brLength;
private boolean written;

void addLiteral(LZ77Compressor.LiteralBlock block) {
literals.add(Arrays.copyOfRange(block.getData(), block.getOffset(),
block.getOffset() + block.getLength()));
private void prependLiteral(byte[] data) {
literals.addFirst(data);
}
byte[] addLiteral(LZ77Compressor.LiteralBlock block) {
byte[] copy = Arrays.copyOfRange(block.getData(), block.getOffset(),
block.getOffset() + block.getLength());
literals.add(copy);
return copy;
}
void setBackReference(LZ77Compressor.BackReference block) {
if (hasBackReference()) {
Expand All @@ -224,7 +399,7 @@ boolean canBeWritten(int lengthOfBlocksAfterThisPair) {
int length() {
return literalLength() + brLength;
}
boolean hasBeenWritten() {
private boolean hasBeenWritten() {
return written;
}
void writeTo(OutputStream out) throws IOException {
Expand Down Expand Up @@ -264,5 +439,15 @@ private static void writeLength(int length, OutputStream out) throws IOException
}
out.write(length);
}
private int backReferenceLength() {
return brLength;
}
Pair splitWithNewBackReferenceLengthOf(int newBackReferenceLength) {
Pair p = new Pair();
p.literals.addAll(literals);
p.brOffset = brOffset;
p.brLength = newBackReferenceLength;
return p;
}
}
}

0 comments on commit 6f027ed

Please sign in to comment.