Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions testsuite/compress-zlib-insert_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env python3
# Regression test for issue #951.
#
# When rsync is built against a system zlib (no bundled Z_INSERT_ONLY
# extension), send_deflated_token() falls back to Z_SYNC_FLUSH to add a
# matched block to the compressor history -- but Z_SYNC_FLUSH emits a flush
# block into a fixed-size obuf. A large incompressible matched block
# overflowed obuf and aborted the transfer with
# "deflate on token returned 0 (N bytes left)" at token.c
# The fix loops, discarding the (never-sent) output, until the input is
# consumed. A bundled-zlib build emits no output here, so this test passes
# on either build; it is RED only on a pre-fix system-zlib build.
#
# The matched-block insert path needs all of: --compress-choice=zlib (the
# only method that feeds matched blocks into the deflate history), a large
# --block-size so a single matched token exceeds obuf, incompressible
# (random) data, and a delta over a real connection (compression is skipped
# for purely local transfers). We assert the upload SUCCEEDS *and* the
# result matches the source, so the fix is verified correct, not merely
# non-crashing.

import filecmp
import shutil
import subprocess

from rsyncfns import (
SCRATCHDIR, make_data_file, makepath, rmtree, rsync_argv,
start_test_daemon, test_fail, write_daemon_conf,
)

DAEMON_PORT = 12922
SIZE = 8 * 1024 * 1024 # enough blocks to exercise many inserts
# 65535 (0xffff) is a single insert fragment larger than the deflate output
# buffer (AVAIL_OUT_SIZE(CHUNK_SIZE) ~= 32816). It exercises both failure
# modes of the pre-fix code: the obuf overflow abort, and -- once that is
# loop-drained -- pending insert output left in the stream that leaks into
# the next send. A block that splits into chunks ending in a tiny fragment
# (e.g. 131072 = 65535+65535+2) would hide the pending case.
BLOCK = 65535

moddir = SCRATCHDIR / 'zmod'
srcdir = SCRATCHDIR / 'zsrc'
rmtree(moddir)
rmtree(srcdir)
makepath(moddir)
makepath(srcdir)

# Source is incompressible. The basis (already in the module) is the same
# data with a few bytes changed in one block, so every other 128KB block
# matches exactly and is sent as a token -> the deflate insert path.
make_data_file(srcdir / 'big.dat', SIZE)
shutil.copy(srcdir / 'big.dat', moddir / 'big.dat')
with open(srcdir / 'big.dat', 'r+b') as f:
f.seek(SIZE // 2 + 1000)
f.write(b'\x00' * 32)

conf = write_daemon_conf([('zmod', {'path': str(moddir), 'read only': 'no'})])
url = start_test_daemon(conf, DAEMON_PORT) + 'zmod/'

# -I forces the delta even though the basis has the same size (otherwise the
# quick check skips the file and the matched-block insert path never runs).
proc = subprocess.run(
rsync_argv('-zI', '--compress-choice=zlib', '--no-whole-file',
f'--block-size={BLOCK}', str(srcdir / 'big.dat'), url),
stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
if proc.returncode != 0:
print(proc.stdout)
test_fail(f"zlib delta upload failed (rc={proc.returncode}); "
"regression of #951 deflate-token overflow")

if not filecmp.cmp(srcdir / 'big.dat', moddir / 'big.dat', shallow=False):
test_fail("uploaded file differs from source -- zlib delta corruption")
31 changes: 23 additions & 8 deletions token.c
Original file line number Diff line number Diff line change
Expand Up @@ -481,14 +481,29 @@ send_deflated_token(int f, int32 token, struct map_struct *buf, OFF_T offset, in
tx_strm.avail_in = n1;
if (protocol_version >= 31) /* Newer protocols avoid a data-duplicating bug */
offset += n1;
tx_strm.next_out = (Bytef *) obuf;
tx_strm.avail_out = AVAIL_OUT_SIZE(CHUNK_SIZE);
r = deflate(&tx_strm, Z_INSERT_ONLY);
if (r != Z_OK || tx_strm.avail_in != 0) {
rprintf(FERROR, "deflate on token returned %d (%d bytes left)\n",
r, tx_strm.avail_in);
exit_cleanup(RERR_STREAMIO);
}
/* With our bundled zlib's Z_INSERT_ONLY this produces no
* output and consumes the input in one call. A build
* against a system zlib lacks Z_INSERT_ONLY and falls back
* to Z_SYNC_FLUSH (see top of file), which emits a flush
* block we discard -- and for an incompressible token that
* block can exceed obuf. Loop, resetting the output buffer,
* until all the input is consumed so a large token can't
* overflow obuf and abort the transfer (#951). Drain until
* avail_out != 0 too: a full output buffer can leave pending
* bytes that would otherwise leak into the next real deflate
* send and corrupt the stream (same condition as the data loop
* above). The discarded output is not sent: the receiver
* rebuilds the matching history itself in see_deflate_token(). */
do {
tx_strm.next_out = (Bytef *) obuf;
tx_strm.avail_out = AVAIL_OUT_SIZE(CHUNK_SIZE);
r = deflate(&tx_strm, Z_INSERT_ONLY);
if (r != Z_OK) {
rprintf(FERROR, "deflate on token returned %d (%d bytes left)\n",
r, tx_strm.avail_in);
exit_cleanup(RERR_STREAMIO);
}
} while (tx_strm.avail_in != 0 || tx_strm.avail_out == 0);
} while (toklen > 0);
}
}
Expand Down
Loading