Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ VERSION=@RSYNC_VERSION@
.SUFFIXES:
.SUFFIXES: .c .o

SIMD_x86_64=simd-checksum-x86_64.o lib/md5-asm-x86_64.o
SIMD_x86_64=simd-checksum-x86_64.o simd-md5-parallel-x86_64.o lib/md5-asm-x86_64.o

GENFILES=configure.sh aclocal.m4 config.h.in proto.h proto.h-tstamp rsync.1 rsync.1.html \
rsync-ssl.1 rsync-ssl.1.html rsyncd.conf.5 rsyncd.conf.5.html
HEADERS=byteorder.h config.h errcode.h proto.h rsync.h ifuncs.h itypes.h inums.h \
lib/pool_alloc.h
LIBOBJ=lib/wildmatch.o lib/compat.o lib/snprintf.o lib/mdfour.o lib/md5.o \
LIBOBJ=lib/wildmatch.o lib/compat.o lib/snprintf.o lib/mdfour.o lib/md5.o lib/md5p8.o \
lib/permstring.o lib/pool_alloc.o lib/sysacls.o lib/sysxattrs.o @LIBOBJS@
zlib_OBJS=zlib/deflate.o zlib/inffast.o zlib/inflate.o zlib/inftrees.o \
zlib/trees.o zlib/zutil.o zlib/adler32.o zlib/compress.o zlib/crc32.o
Expand Down Expand Up @@ -133,6 +133,9 @@ rounding.h: rounding.c rsync.h proto.h
simd-checksum-x86_64.o: simd-checksum-x86_64.cpp
$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $(srcdir)/simd-checksum-x86_64.cpp

simd-md5-parallel-x86_64.o: simd-md5-parallel-x86_64.cpp
$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $(srcdir)/simd-md5-parallel-x86_64.cpp

lib/md5-asm-x86_64.o: lib/md5-asm-x86_64.s
$(CC) -c -o $@ $(srcdir)/lib/md5-asm-x86_64.s

Expand Down
44 changes: 42 additions & 2 deletions checksum.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ struct name_num_obj valid_checksums = {
{ CSUM_XXH64, "xxh64", NULL },
{ CSUM_XXH64, "xxhash", NULL },
#endif
{ CSUM_MD5P8, "md5p8", NULL },
{ CSUM_MD5, "md5", NULL },
{ CSUM_MD4, "md4", NULL },
{ CSUM_NONE, "none", NULL },
Expand Down Expand Up @@ -131,6 +132,7 @@ int csum_len_for_type(int cst, BOOL flist_csum)
case CSUM_MD4_OLD:
case CSUM_MD4_BUSTED:
return MD4_DIGEST_LEN;
case CSUM_MD5P8:
case CSUM_MD5:
return MD5_DIGEST_LEN;
#ifdef SUPPORT_XXHASH
Expand All @@ -156,6 +158,7 @@ int canonical_checksum(int csum_type)
case CSUM_MD4_BUSTED:
break;
case CSUM_MD4:
case CSUM_MD5P8:
case CSUM_MD5:
return -1;
#ifdef SUPPORT_XXHASH
Expand All @@ -168,7 +171,9 @@ int canonical_checksum(int csum_type)
return 0;
}

#ifndef HAVE_SIMD /* See simd-checksum-*.cpp. */
#ifdef HAVE_SIMD /* See simd-checksum-*.cpp. */
#define get_checksum2 get_checksum2_nosimd
#else
/*
a simple 32 bit checksum that can be updated from either end
(inspired by Mark Adler's Adler-32 checksum)
Expand All @@ -189,16 +194,26 @@ uint32 get_checksum1(char *buf1, int32 len)
}
return (s1 & 0xffff) + (s2 << 16);
}

void checksum2_enable_prefetch(UNUSED(struct map_struct *map), UNUSED(OFF_T len), UNUSED(int32 blocklen))
{
}

void checksum2_disable_prefetch()
{
}
#endif

void get_checksum2(char *buf, int32 len, char *sum)
/* Renamed to get_checksum2_nosimd() with HAVE_SIMD */
void get_checksum2(char *buf, int32 len, char *sum, UNUSED(OFF_T prefetch_offset))
{
switch (xfersum_type) {
#ifdef SUPPORT_XXHASH
case CSUM_XXH64:
SIVAL64(sum, 0, XXH64(buf, len, checksum_seed));
break;
#endif
case CSUM_MD5P8: // == CSUM_MD5 for checksum2
case CSUM_MD5: {
MD5_CTX m5;
uchar seedbuf[4];
Expand Down Expand Up @@ -314,6 +329,21 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
break;
}
#endif
case CSUM_MD5P8: {
MD5P8_CTX m5p8;

MD5P8_Init(&m5p8);

for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
MD5P8_Update(&m5p8, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);

remainder = (int32)(len - i);
if (remainder > 0)
MD5P8_Update(&m5p8, (uchar *)map_ptr(buf, i, remainder), remainder);

MD5P8_Final((uchar *)sum, &m5p8);
break;
}
case CSUM_MD5: {
MD5_CTX m5;

Expand Down Expand Up @@ -389,6 +419,7 @@ static union {
#ifdef SUPPORT_XXHASH
static XXH64_state_t* xxh64_state;
#endif
static MD5P8_CTX m5p8;
static int cursum_type;

void sum_init(int csum_type, int seed)
Expand All @@ -407,6 +438,9 @@ void sum_init(int csum_type, int seed)
XXH64_reset(xxh64_state, 0);
break;
#endif
case CSUM_MD5P8:
MD5P8_Init(&m5p8);
break;
case CSUM_MD5:
MD5_Init(&ctx.m5);
break;
Expand Down Expand Up @@ -449,6 +483,9 @@ void sum_update(const char *p, int32 len)
XXH64_update(xxh64_state, p, len);
break;
#endif
case CSUM_MD5P8:
MD5P8_Update(&m5p8, (uchar *)p, len);
break;
case CSUM_MD5:
MD5_Update(&ctx.m5, (uchar *)p, len);
break;
Expand Down Expand Up @@ -503,6 +540,9 @@ int sum_end(char *sum)
SIVAL64(sum, 0, XXH64_digest(xxh64_state));
break;
#endif
case CSUM_MD5P8:
MD5P8_Final((uchar *)sum, &m5p8);
break;
case CSUM_MD5:
MD5_Final((uchar *)sum, &ctx.m5);
break;
Expand Down
3 changes: 3 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,9 @@ if test x"$enable_simd" != x"no"; then
CXX_VERSION=`echo "$CXX_VERSION" | sed 's/.*version //g' | sed 's/\..*//g'`
if test "$CXX_VERSION" -ge "7"; then
CXX_OK=yes

# or clang 10 will halve AVX2 performance
CXXFLAGS="$CXXFLAGS -fno-slp-vectorize"
fi
;;
*)
Expand Down
12 changes: 8 additions & 4 deletions generator.c
Original file line number Diff line number Diff line change
Expand Up @@ -708,10 +708,12 @@ static int generate_and_send_sums(int fd, OFF_T len, int f_out, int f_copy)
if (append_mode > 0 && f_copy < 0)
return 0;

if (len > 0)
if (len > 0) {
mapbuf = map_file(fd, len, MAX_MAP_SIZE, sum.blength);
else
checksum2_enable_prefetch(mapbuf, len, sum.blength);
} else {
mapbuf = NULL;
}

for (i = 0; i < sum.count; i++) {
int32 n1 = (int32)MIN(len, (OFF_T)sum.blength);
Expand All @@ -729,7 +731,7 @@ static int generate_and_send_sums(int fd, OFF_T len, int f_out, int f_copy)
}

sum1 = get_checksum1(map, n1);
get_checksum2(map, n1, sum2);
get_checksum2(map, n1, sum2, offset - n1);

if (DEBUG_GTE(DELTASUM, 3)) {
rprintf(FINFO,
Expand All @@ -741,8 +743,10 @@ static int generate_and_send_sums(int fd, OFF_T len, int f_out, int f_copy)
write_buf(f_out, sum2, sum.s2length);
}

if (mapbuf)
if (mapbuf) {
unmap_file(mapbuf);
checksum2_disable_prefetch();
}

return 0;
}
Expand Down
128 changes: 128 additions & 0 deletions lib/md5p8.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/*
* MD5-based hash friendly to parallel processing, reference implementation
*
* Author: Jorrit Jongma, 2020
*
* Released in the public domain falling back to the MIT license
* ( http://www.opensource.org/licenses/MIT ) in case public domain does not
* apply in your country.
*/
/*
* MD5P8 is an MD5-based hash friendly to parallel processing. The input
* stream is divided into 8 independent streams. For each 512 bytes of input,
* the first 64 bytes are send to the first stream, the second 64 bytes to
* the second stream, etc. The input stream is padded with zeros to the next
* multiple of 512 bytes, then a normal MD5 hash is computed on a buffer
* containing the A, B, C, and D states of the 8 individual streams, followed
* by the (unpadded) length of the input.
*
* On non-SIMD accelerated CPUs the performance of MD5P8 is slightly lower
* than normal MD5 (particularly on files smaller than 10 kB), but with
* SIMD-based parallel processing it can be two to six times as fast. Even in
* the best-case scenario, xxHash is still at least twice as fast and should
* be preferred when available.
*/

#include "rsync.h"

#ifdef HAVE_SIMD
#define MD5P8_Init MD5P8_Init_c
#define MD5P8_Update MD5P8_Update_c
#define MD5P8_Final MD5P8_Final_c
#endif

/* each MD5_CTX needs to be 8-byte aligned */
#define MD5P8_Contexts_c(ctx, index) ((MD5_CTX*)((((uintptr_t)((ctx)->context_storage) + 7) & ~7) + (index)*((sizeof(MD5_CTX) + 7) & ~7)))

void MD5P8_Init(MD5P8_CTX *ctx)
{
int i;
for (i = 0; i < 8; i++) {
MD5_Init(MD5P8_Contexts_c(ctx, i));
}
ctx->used = 0;
ctx->next = 0;
}

void MD5P8_Update(MD5P8_CTX *ctx, const uchar *input, uint32 length)
{
uint32 pos = 0;

if ((ctx->used) || (length < 64)) {
int cpy = MIN(length, 64 - ctx->used);
memmove(&ctx->buffer[ctx->used], input, cpy);
ctx->used += cpy;
length -= cpy;
pos += cpy;

if (ctx->used == 64) {
MD5_Update(MD5P8_Contexts_c(ctx, ctx->next), ctx->buffer, 64);
ctx->used = 0;
ctx->next = (ctx->next + 1) % 8;
}
}

while (length >= 64) {
MD5_Update(MD5P8_Contexts_c(ctx, ctx->next), &input[pos], 64);
ctx->next = (ctx->next + 1) % 8;
pos += 64;
length -= 64;
}

if (length) {
memcpy(ctx->buffer, &input[pos], length);
ctx->used = length;
}
}

void MD5P8_Final(uchar digest[MD5_DIGEST_LEN], MD5P8_CTX *ctx)
{
int i;
uint32 low = 0, high = 0, sub = ctx->used ? 64 - ctx->used : 0;
if (ctx->used) {
uchar tmp[64];
memset(tmp, 0, 64);
MD5P8_Update(ctx, tmp, 64 - ctx->used);
}
memset(ctx->buffer, 0, 64);
while (ctx->next != 0) {
MD5P8_Update(ctx, ctx->buffer, 64);
sub += 64;
}

uchar state[34*4] = {0};

for (i = 0; i < 8; i++) {
MD5_CTX* md = MD5P8_Contexts_c(ctx, i);
#ifdef USE_OPENSSL
if (low + md->Nl < low) high++;
low += md->Nl;
high += md->Nh;
#else
if (low + md->totalN < low) high++;
low += md->totalN;
high += md->totalN2;
#endif
SIVALu(state, i*16, md->A);
SIVALu(state, i*16 + 4, md->B);
SIVALu(state, i*16 + 8, md->C);
SIVALu(state, i*16 + 12, md->D);
}

#ifndef USE_OPENSSL
high = (low >> 29) | (high << 3);
low = (low << 3);
#endif

sub <<= 3;
if (low - sub > low) high--;
low -= sub;

SIVALu(state, 32*4, low);
SIVALu(state, 33*4, high);

MD5_CTX md;
MD5_Init(&md);
MD5_Update(&md, state, 34*4);
MD5_Final(digest, &md);
}
12 changes: 12 additions & 0 deletions lib/mdigest.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#define CSUM_MD4 4
#define CSUM_MD5 5
#define CSUM_XXH64 6
#define CSUM_MD5P8 7

typedef struct {
uint32 A, B, C, D;
Expand All @@ -40,3 +41,14 @@ void md5_begin(md_context *ctx);
void md5_update(md_context *ctx, const uchar *input, uint32 length);
void md5_result(md_context *ctx, uchar digest[MD5_DIGEST_LEN]);
#endif

typedef struct {
uchar context_storage[1024];
uchar buffer[512];
unsigned int used;
unsigned int next;
} MD5P8_CTX;

void MD5P8_Init(MD5P8_CTX *ctx);
void MD5P8_Update(MD5P8_CTX *ctx, const uchar *input, uint32 length);
void MD5P8_Final(uchar digest[MD5_DIGEST_LEN], MD5P8_CTX *ctx);
8 changes: 6 additions & 2 deletions match.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ static void hash_search(int f,struct sum_struct *s,
if (DEBUG_GTE(DELTASUM, 3))
rprintf(FINFO, "sum=%.8x k=%ld\n", sum, (long)k);

checksum2_enable_prefetch(buf, len, s->blength);

offset = aligned_offset = aligned_i = 0;

end = len + 1 - s->sums[s->count-1].len;
Expand Down Expand Up @@ -228,7 +230,7 @@ static void hash_search(int f,struct sum_struct *s,

if (!done_csum2) {
map = (schar *)map_ptr(buf,offset,l);
get_checksum2((char *)map,l,sum2);
get_checksum2((char *)map, l, sum2, offset);
done_csum2 = 1;
}

Expand Down Expand Up @@ -270,7 +272,7 @@ static void hash_search(int f,struct sum_struct *s,
sum = get_checksum1((char *)map, l);
if (sum != s->sums[i].sum1)
goto check_want_i;
get_checksum2((char *)map, l, sum2);
get_checksum2((char *)map, l, sum2, aligned_offset);
if (memcmp(sum2, s->sums[i].sum2, s->s2length) != 0)
goto check_want_i;
/* OK, we have a re-alignment match. Bump the offset
Expand Down Expand Up @@ -337,6 +339,8 @@ static void hash_search(int f,struct sum_struct *s,
matched(f, s, buf, offset - s->blength, -2);
} while (++offset < end);

checksum2_disable_prefetch();

matched(f, s, buf, len, -1);
map_ptr(buf, len-1, 1);
}
Expand Down
Loading