diff --git a/build/Makefile.in b/build/Makefile.in index 7ea0cc971c..75f4e86b5f 100644 --- a/build/Makefile.in +++ b/build/Makefile.in @@ -413,6 +413,7 @@ HEADERS = src/moar.h \ src/instrument/line_coverage.h \ src/gen/config.h \ src/debug/debugserver.h \ + src/strings/siphash/csiphash.h \ src/strings/uthash_types.h \ src/strings/uthash.h \ 3rdparty/cmp/cmp.h \ @@ -516,6 +517,7 @@ install: all $(MKPATH) "$(DESTDIR)$(PREFIX)/include/moar/spesh" $(MKPATH) "$(DESTDIR)$(PREFIX)/include/moar/debug" $(MKPATH) "$(DESTDIR)$(PREFIX)/include/moar/strings" + $(MKPATH) "$(DESTDIR)$(PREFIX)/include/moar/strings/siphash" $(MKPATH) "$(DESTDIR)$(PREFIX)/include/moar/jit" $(MKPATH) "$(DESTDIR)$(PREFIX)/include/moar/instrument" $(CP) 3rdparty/*.h "$(DESTDIR)$(PREFIX)/include/moar" @@ -533,6 +535,7 @@ install: all $(CP) src/spesh/*.h "$(DESTDIR)$(PREFIX)/include/moar/spesh" $(CP) src/debug/*.h "$(DESTDIR)$(PREFIX)/include/moar/debug" $(CP) src/strings/*.h "$(DESTDIR)$(PREFIX)/include/moar/strings" + $(CP) src/strings/siphash/*.h "$(DESTDIR)$(PREFIX)/include/moar/strings/siphash" $(CP) src/jit/*.h "$(DESTDIR)$(PREFIX)/include/moar/jit" $(CP) src/instrument/*.h "$(DESTDIR)$(PREFIX)/include/moar/instrument" @install@ diff --git a/src/6model/reprs/MVMString.h b/src/6model/reprs/MVMString.h index 6a55b4ab8f..c559b6bd4a 100644 --- a/src/6model/reprs/MVMString.h +++ b/src/6model/reprs/MVMString.h @@ -47,7 +47,7 @@ struct MVMStringBody { MVMuint16 storage_type; MVMuint16 num_strands; MVMuint32 num_graphs; - MVMhashv cached_hash_code; + MVMHashv cached_hash_code; }; /* A strand of a string. */ diff --git a/src/core/instance.h b/src/core/instance.h index 1817b8a9c4..3ee459a21e 100644 --- a/src/core/instance.h +++ b/src/core/instance.h @@ -509,7 +509,7 @@ struct MVMInstance { /* Flag for if NFA debugging is enabled. */ MVMint8 nfa_debug_enabled; - /* Hash Secret which is used as the hash seed. This is to avoid denial of + /* Hash Secrets which is used as the hash seed. This is to avoid denial of * service type attacks. */ - MVMuint32 hashSecret; + MVMuint64 hashSecrets[2]; }; diff --git a/src/moar.c b/src/moar.c index 6d1ec2804a..90adbec157 100644 --- a/src/moar.c +++ b/src/moar.c @@ -85,7 +85,6 @@ MVMInstance * MVM_vm_create_instance(void) { char *jit_log, *jit_expr_disable, *jit_disable, *jit_bytecode_dir, *jit_last_frame, *jit_last_bb; char *dynvar_log; int init_stat; - MVMuint32 hashSecret; MVMuint64 now = MVM_platform_now(); /* Set up instance data structure. */ @@ -93,9 +92,11 @@ MVMInstance * MVM_vm_create_instance(void) { /* Create the main thread's ThreadContext and stash it. */ instance->main_thread = MVM_tc_create(NULL, instance); - MVM_getrandom(instance->main_thread, &hashSecret, sizeof(MVMuint32)); - instance->hashSecret ^= now; - instance->hashSecret ^= MVM_proc_getpid(instance->main_thread) * now; + /* Get the 128-bit hashSecret */ + MVM_getrandom(instance->main_thread, instance->hashSecrets, sizeof(MVMuint64) * 2); + /* Just in case MVM_getrandom didn't work, XOR it with some poorly randomized data */ + instance->hashSecrets[1] ^= now; + instance->hashSecrets[1] ^= MVM_proc_getpid(instance->main_thread) * now; instance->main_thread->thread_id = 1; /* Next thread to be created gets ID 2 (the main thread got ID 1). */ diff --git a/src/moar.h b/src/moar.h index c88baccd71..68566e90ae 100644 --- a/src/moar.h +++ b/src/moar.h @@ -84,7 +84,7 @@ typedef double MVMnum64; /* stuff for uthash */ #define uthash_fatal(msg) MVM_exception_throw_adhoc(tc, "internal hash error: " msg) -typedef uint32_t MVMhashv; +typedef MVMuint64 MVMHashv; #include "strings/uthash_types.h" diff --git a/src/profiler/heapsnapshot.c b/src/profiler/heapsnapshot.c index f1b0aea0ef..b2b8419038 100644 --- a/src/profiler/heapsnapshot.c +++ b/src/profiler/heapsnapshot.c @@ -176,14 +176,14 @@ static void saw(MVMThreadContext *tc, MVMHeapSnapshotState *ss, void *addr, MVMu MVMHeapSnapshotSeen *seen = MVM_calloc(1, sizeof(MVMHeapSnapshotSeen)); seen->address = addr; seen->idx = idx; - HASH_ADD_KEYPTR(hash_handle, ss->seen, (char *)&(seen->address), sizeof(void *), seen); + HASH_ADD_KEYPTR(hash_handle, ss->seen, &(seen->address), sizeof(void *), seen); } /* Checks for an entry in the seen hash. If we find an entry, write the index * into the index pointer passed. */ static MVMuint32 seen(MVMThreadContext *tc, MVMHeapSnapshotState *ss, void *addr, MVMuint64 *idx) { MVMHeapSnapshotSeen *entry; - HASH_FIND(hash_handle, ss->seen, (char *)&(addr), sizeof(void *), entry); + HASH_FIND(hash_handle, ss->seen, &addr, sizeof(void *), entry); if (entry) { *idx = entry->idx; return 1; diff --git a/src/strings/ops.c b/src/strings/ops.c index 8f07b637ca..ebdda093c4 100644 --- a/src/strings/ops.c +++ b/src/strings/ops.c @@ -2825,124 +2825,72 @@ MVMString * MVM_string_chr(MVMThreadContext *tc, MVMint64 cp) { * cache field of the string. Hashing code is derived from the Jenkins hash * implementation in uthash.h. */ typedef union { - MVMint32 graphs[3]; - unsigned char bytes[12]; + MVMuint32 graphs[2]; + MVMuint64 u64; } MVMJenHashGraphemeView; -MVM_STATIC_INLINE void MVM_hash_add_three (MVMJenHashGraphemeView *hash_block, MVMuint32 *hj_i, MVMuint32 *hj_j, MVMuint32 *hashv) { - *hj_i += hash_block->graphs[0]; - *hj_j += hash_block->graphs[1]; - *hashv += hash_block->graphs[2]; - HASH_JEN_MIX(*hj_i, *hj_j, *hashv); -} -MVM_STATIC_INLINE void MVM_hash_finish (MVMJenHashGraphemeView *hash_block, MVMuint32 *hj_i, MVMuint32 *hj_j, MVMuint32 *hashv, MVMStringIndex sgraphs, MVMStringIndex graphs_remaining) { - /* Mix in key length (in bytes, not graphemes). */ - *hashv += sgraphs * sizeof(MVMGrapheme32); - - /* Now handle trailing graphemes (must be 2, 1, or 0). */ - /* NOTE: this is weird since it changes the order in different cases. This - * is just replicating old functionality. */ - switch (graphs_remaining) { - case 2: - *hj_j += hash_block->graphs[0]; - *hj_i += hash_block->graphs[1]; - break; - /* Fallthrough */ - case 1: - *hj_i += hash_block->graphs[0]; - } - HASH_JEN_MIX(*hj_i, *hj_j, *hashv); - /* Because we check if MVMString->body.cached_hash_code == 0 to tell if - * we have not yet computed the hash code, ensure that hashv is never 0 - * by adding the length of the string to hashv iff hashv == 0. Since both - * the hashv and MVMStringIndex are both uint32, there should never be any - * overflow. Only problematic case is if the string is of length 0 and - * hashv is zero, though this is very very unlikely (if possible at all) - * and it should be very fast to calculate the hash so as to be negligible. */ - if (*hashv == 0) { - *hashv += sgraphs; - } -} +/* To force little endian representation on big endian machines, set + * MVM_HASH_FORCE_LITTLE_ENDIAN in strings/siphash/csiphash.h + * If this isn't set, MVM_MAYBE_TO_LITTLE_ENDIAN_32 does nothing (the default). + * This would mainly be useful for debugging or if there were some other reason + * someone cared that hashes were identical on different endian platforms */ void MVM_string_compute_hash_code(MVMThreadContext *tc, MVMString *s) { - /* The hash algorithm works in bytes. Since we can represent strings in a - * number of ways, and we want consistent hashing, then we'll read the - * strings using the grapheme iterator in groups of 3, using 32-bit ints - * for the graphemes no matter what the string really holds them as. Then - * we'll use the bytes view of that in the hashing function. */ - - MVMStringIndex graphs_remaining, sgraphs; - - /* Initialize hash state. */ - MVMhashv hashv = tc->instance->hashSecret; - MVMuint32 hj_i, hj_j; - hj_i = hj_j = 0x9e3779b9; - graphs_remaining = sgraphs = MVM_string_graphs(tc, s); - +#if defined(MVM_HASH_FORCE_LITTLE_ENDIAN) + const MVMuint64 key[2] = { + MVM_MAYBE_TO_LITTLE_ENDIAN_64(tc->instance->hashSecrets[0]), + MVM_MAYBE_TO_LITTLE_ENDIAN_64(tc->instance->hashSecrets[1]) + }; +#else + const MVMuint64 *key = tc->instance->hashSecrets; +#endif + MVMuint64 hash = 0; + MVMStringIndex s_len = MVM_string_graphs_nocheck(tc, s); switch (s->body.storage_type) { - case MVM_STRING_GRAPHEME_ASCII: - case MVM_STRING_GRAPHEME_8: { - int i; - MVMJenHashGraphemeView hash_block; - for (i = 0; 3 <= sgraphs - i; i += 3) { - hash_block.graphs[0] = s->body.storage.blob_8[i]; - hash_block.graphs[1] = s->body.storage.blob_8[i+1]; - hash_block.graphs[2] = s->body.storage.blob_8[i+2]; - MVM_hash_add_three( - &hash_block, - &hj_i, &hj_j, &hashv); - } - graphs_remaining = sgraphs - i; - switch (graphs_remaining) { - case 1: - hash_block.graphs[0] = s->body.storage.blob_8[i]; - break; - case 2: - hash_block.graphs[0] = s->body.storage.blob_8[i]; - hash_block.graphs[1] = s->body.storage.blob_8[i+1]; - break; + case MVM_STRING_GRAPHEME_8: + case MVM_STRING_GRAPHEME_ASCII: { + size_t i; + MVMJenHashGraphemeView gv; + siphash sh; + siphashinit(&sh, s_len * sizeof(MVMGrapheme32), key); + for (i = 0; i + 1 < s_len;) { + gv.graphs[0] = MVM_MAYBE_TO_LITTLE_ENDIAN_32(s->body.storage.blob_8[i++]); + gv.graphs[1] = MVM_MAYBE_TO_LITTLE_ENDIAN_32(s->body.storage.blob_8[i++]); + siphashadd64bits(&sh, gv.u64); } - MVM_hash_finish(&hash_block, &hj_i, &hj_j, &hashv, sgraphs, graphs_remaining); + /* If there is a final 32 bit grapheme pass it through, otherwise + * pass through 0. */ + hash = siphashfinish_32bits(&sh, + i < s_len + ? MVM_MAYBE_TO_LITTLE_ENDIAN_32(s->body.storage.blob_8[i]) : 0); break; } +#if !defined(MVM_HASH_FORCE_LITTLE_ENDIAN) case MVM_STRING_GRAPHEME_32: { - int i; - for (i = 0; 3 <= sgraphs - i; i += 3) { - MVM_hash_add_three( - (MVMJenHashGraphemeView*)(s->body.storage.blob_32 + i), - &hj_i, &hj_j, &hashv); - } - graphs_remaining = sgraphs - i; - MVM_hash_finish((MVMJenHashGraphemeView*)(s->body.storage.blob_32 + i), &hj_i, &hj_j, &hashv, sgraphs, graphs_remaining); + hash = siphash24( + (MVMuint8*)s->body.storage.blob_32, + s_len * sizeof(MVMGrapheme32), + key); break; } +#endif default: { + siphash sh; MVMGraphemeIter gi; - MVMJenHashGraphemeView hash_block; - /* Work through the string 3 graphemes at a time. */ + MVMJenHashGraphemeView gv; + size_t i; + siphashinit(&sh, s_len * sizeof(MVMGrapheme32), key); MVM_string_gi_init(tc, &gi, s); - while (3 <= graphs_remaining) { - hash_block.graphs[0] = MVM_string_gi_get_grapheme(tc, &gi); - hash_block.graphs[1] = MVM_string_gi_get_grapheme(tc, &gi); - hash_block.graphs[2] = MVM_string_gi_get_grapheme(tc, &gi); - MVM_hash_add_three( - &hash_block, - &hj_i, &hj_j, &hashv); - graphs_remaining -= 3; + for (i = 0; i + 1 < s_len; i += 2) { + gv.graphs[0] = MVM_MAYBE_TO_LITTLE_ENDIAN_32(MVM_string_gi_get_grapheme(tc, &gi)); + gv.graphs[1] = MVM_MAYBE_TO_LITTLE_ENDIAN_32(MVM_string_gi_get_grapheme(tc, &gi)); + siphashadd64bits(&sh, gv.u64); } - /* Now handle trailing graphemes (must be 2, 1, or 0). */ - switch (graphs_remaining) { - case 1: - hash_block.graphs[0] = MVM_string_gi_get_grapheme(tc, &gi); - break; - case 2: - hash_block.graphs[0] = MVM_string_gi_get_grapheme(tc, &gi); - hash_block.graphs[1] = MVM_string_gi_get_grapheme(tc, &gi); - break; - - } - MVM_hash_finish(&hash_block, &hj_i, &hj_j, &hashv, sgraphs, graphs_remaining); + hash = siphashfinish_32bits(&sh, + i < s_len + ? MVM_MAYBE_TO_LITTLE_ENDIAN_32(MVM_string_gi_get_grapheme(tc, &gi)) + : 0); + break; } } - /* Store computed hash value. */ - s->body.cached_hash_code = hashv; + s->body.cached_hash_code = hash; } diff --git a/src/strings/siphash/Makefile b/src/strings/siphash/Makefile new file mode 100644 index 0000000000..1f2c1cc42b --- /dev/null +++ b/src/strings/siphash/Makefile @@ -0,0 +1,4 @@ + +all: + gcc -O3 test.c -g -Wall -Wextra -ggdb -o siphashtest -D MVM_CAN_UNALIGNED_INT64 && ./siphashtest + gcc -O3 test.c -g -Wall -Wextra -ggdb -o siphashtest && ./siphashtest diff --git a/src/strings/siphash/csiphash.h b/src/strings/siphash/csiphash.h new file mode 100644 index 0000000000..190816f533 --- /dev/null +++ b/src/strings/siphash/csiphash.h @@ -0,0 +1,178 @@ +#include +/* + Copyright (c) 2013 Marek Majkowski + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + + + Original location: + https://github.com/majek/csiphash/ + + Solution inspired by code from: + Samuel Neves (supercop/crypto_auth/siphash24/little) + djb (supercop/crypto_auth/siphash24/little2) + Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c) + + Modifications for MoarVM by Samantha McVey +*/ +/* Define this for our test.c test */ +#ifndef MVM_STATIC_INLINE +#define MVM_STATIC_INLINE static +#endif +struct siphash { + uint64_t v0; + uint64_t v1; + uint64_t v2; + uint64_t v3; + uint64_t b; +}; +typedef struct siphash siphash; +#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \ + __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define MVM_TO_LITTLE_ENDIAN_64(x) ((uint64_t)(x)) +# define MVM_TO_LITTLE_ENDIAN_32(x) ((uint32_t)(x)) +#elif defined(_WIN32) +/* Windows is always little endian, unless you're on xbox360 + http://msdn.microsoft.com/en-us/library/b0084kay(v=vs.80).aspx */ +# define MVM_TO_LITTLE_ENDIAN_64(x) ((uint64_t)(x)) +# define MVM_TO_LITTLE_ENDIAN_32(x) ((uint32_t)(x)) +#elif defined(__APPLE__) +# include +# define MVM_TO_LITTLE_ENDIAN_64(x) OSSwapLittleToHostInt64(x) +# define MVM_TO_LITTLE_ENDIAN_32(x) OSSwapLittleToHostInt32(x) +#else + /* See: http://sourceforge.net/p/predef/wiki/Endianness/ */ + # if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) + # include + # else + # include + # endif + # if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \ + __BYTE_ORDER == __LITTLE_ENDIAN + # define MVM_TO_LITTLE_ENDIAN_64(x) ((uint64_t)(x)) + # define MVM_TO_LITTLE_ENDIAN_32(x) ((uint32_t)(x)) + # else + # define MVM_TO_LITTLE_ENDIAN_64(x) le64toh(x) + # define MVM_TO_LITTLE_ENDIAN_32(x) le32toh(x) + # endif +#endif +#if defined(MVM_HASH_FORCE_LITTLE_ENDIAN) + #define MVM_MAYBE_TO_LITTLE_ENDIAN_64(x) MVM_TO_LITTLE_ENDIAN_64(x) + #define MVM_MAYBE_TO_LITTLE_ENDIAN_32(x) MVM_TO_LITTLE_ENDIAN_32(x) +#else + #define MVM_MAYBE_TO_LITTLE_ENDIAN_64(x) ((uint64_t)(x)) + #define MVM_MAYBE_TO_LITTLE_ENDIAN_32(x) ((uint32_t)(x)) +#endif +#ifndef MVM_CAN_UNALIGNED_INT64 + #include +#endif +#define ROTATE(x, b) (uint64_t)( ((x) << (b)) | ( (x) >> (64 - (b))) ) + +#define HALF_ROUND(a,b,c,d,s,t) \ + a += b; c += d; \ + b = ROTATE(b, s) ^ a; \ + d = ROTATE(d, t) ^ c; \ + a = ROTATE(a, 32); + +#define DOUBLE_ROUND(v0,v1,v2,v3) \ + HALF_ROUND(v0,v1,v2,v3,13,16); \ + HALF_ROUND(v2,v1,v0,v3,17,21); \ + HALF_ROUND(v0,v1,v2,v3,13,16); \ + HALF_ROUND(v2,v1,v0,v3,17,21); + +MVM_STATIC_INLINE void siphashinit (siphash *sh, size_t src_sz, const uint64_t key[2]) { + const uint64_t k0 = MVM_MAYBE_TO_LITTLE_ENDIAN_64(key[0]); + const uint64_t k1 = MVM_MAYBE_TO_LITTLE_ENDIAN_64(key[1]); + sh->b = (uint64_t)src_sz << 56; + sh->v0 = k0 ^ 0x736f6d6570736575ULL; + sh->v1 = k1 ^ 0x646f72616e646f6dULL; + sh->v2 = k0 ^ 0x6c7967656e657261ULL; + sh->v3 = k1 ^ 0x7465646279746573ULL; +} +MVM_STATIC_INLINE void siphashadd64bits (siphash *sh, const uint64_t in) { + const uint64_t mi = MVM_MAYBE_TO_LITTLE_ENDIAN_64(in); + sh->v3 ^= mi; + DOUBLE_ROUND(sh->v0,sh->v1,sh->v2,sh->v3); + sh->v0 ^= mi; +} +MVM_STATIC_INLINE uint64_t siphashfinish_last_part (siphash *sh, uint64_t t) { + sh->b |= MVM_MAYBE_TO_LITTLE_ENDIAN_64(t); + sh->v3 ^= sh->b; + DOUBLE_ROUND(sh->v0,sh->v1,sh->v2,sh->v3); + sh->v0 ^= sh->b; + sh->v2 ^= 0xff; + DOUBLE_ROUND(sh->v0,sh->v1,sh->v2,sh->v3); + DOUBLE_ROUND(sh->v0,sh->v1,sh->v2,sh->v3); + return (sh->v0 ^ sh->v1) ^ (sh->v2 ^ sh->v3); +} +MVM_STATIC_INLINE uint64_t siphashfinish_32bits (siphash *sh, const uint32_t src) { + uint64_t t = 0; +#ifdef MVM_CAN_UNALIGNED_INT64 + uint32_t *pt = (uint32_t*)&t; + *((uint32_t*)pt) = src; +#else + memcpy(&t, &src, sizeof(uint32_t)); +#endif + return siphashfinish_last_part(sh, t); +} +MVM_STATIC_INLINE uint64_t siphashfinish (siphash *sh, const uint8_t *src, size_t src_sz) { + const uint64_t *in = (uint64_t*)src; + uint64_t t = 0; + uint8_t *pt = (uint8_t *)&t; + uint8_t *m = (uint8_t *)in; + switch (src_sz) { + /* Falls through */ + case 7: pt[6] = m[6]; + /* Falls through */ + case 6: pt[5] = m[5]; + /* Falls through */ + case 5: pt[4] = m[4]; + /* Falls through */ + case 4: + *((uint32_t*)&pt[0]) = *((uint32_t*)&m[0]); + break; + case 3: pt[2] = m[2]; + /* Falls through */ + case 2: pt[1] = m[1]; + /* Falls through */ + case 1: pt[0] = m[0]; + } + return siphashfinish_last_part(sh, t); +} +MVM_STATIC_INLINE uint64_t siphash24(const uint8_t *src, size_t src_sz, const uint64_t key[2]) { + siphash sh; +#ifdef MVM_CAN_UNALIGNED_INT64 + const uint64_t *in = (uint64_t*)src; + siphashinit(&sh, src_sz, key); + while (src_sz >= 8) { + siphashadd64bits(&sh, *in); + in += 1; src_sz -= 8; + } +#else + const uint8_t *in = src; + siphashinit(&sh, src_sz, key); + while (src_sz >= 8) { + uint64_t in_64; + memcpy(&in_64, in, sizeof(uint64_t)); + siphashadd64bits(&sh, in_64); + in += 8; src_sz -= 8; + } +#endif + return siphashfinish(&sh, (uint8_t *)in, src_sz); +} diff --git a/src/strings/siphash/test.c b/src/strings/siphash/test.c new file mode 100644 index 0000000000..6c4dc6455c --- /dev/null +++ b/src/strings/siphash/test.c @@ -0,0 +1,115 @@ +#include +#include +#include +#include +#define MVM_HASH_FORCE_LITTLE_ENDIAN 1 +#include "csiphash.h" +uint64_t gettime_ns() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (uint64_t)tv.tv_sec * 1000000000ULL + tv.tv_usec * 1000ULL; +} + +#define REPEATS 20000 + +uint64_t vectors[64] = { + 0x726fdb47dd0e0e31LLU, 0x74f839c593dc67fdLLU, 0x0d6c8009d9a94f5aLLU, 0x85676696d7fb7e2dLLU, + 0xcf2794e0277187b7LLU, 0x18765564cd99a68dLLU, 0xcbc9466e58fee3ceLLU, 0xab0200f58b01d137LLU, + 0x93f5f5799a932462LLU, 0x9e0082df0ba9e4b0LLU, 0x7a5dbbc594ddb9f3LLU, 0xf4b32f46226bada7LLU, + 0x751e8fbc860ee5fbLLU, 0x14ea5627c0843d90LLU, 0xf723ca908e7af2eeLLU, 0xa129ca6149be45e5LLU, + 0x3f2acc7f57c29bdbLLU, 0x699ae9f52cbe4794LLU, 0x4bc1b3f0968dd39cLLU, 0xbb6dc91da77961bdLLU, + 0xbed65cf21aa2ee98LLU, 0xd0f2cbb02e3b67c7LLU, 0x93536795e3a33e88LLU, 0xa80c038ccd5ccec8LLU, + 0xb8ad50c6f649af94LLU, 0xbce192de8a85b8eaLLU, 0x17d835b85bbb15f3LLU, 0x2f2e6163076bcfadLLU, + 0xde4daaaca71dc9a5LLU, 0xa6a2506687956571LLU, 0xad87a3535c49ef28LLU, 0x32d892fad841c342LLU, + 0x7127512f72f27cceLLU, 0xa7f32346f95978e3LLU, 0x12e0b01abb051238LLU, 0x15e034d40fa197aeLLU, + 0x314dffbe0815a3b4LLU, 0x027990f029623981LLU, 0xcadcd4e59ef40c4dLLU, 0x9abfd8766a33735cLLU, + 0x0e3ea96b5304a7d0LLU, 0xad0c42d6fc585992LLU, 0x187306c89bc215a9LLU, 0xd4a60abcf3792b95LLU, + 0xf935451de4f21df2LLU, 0xa9538f0419755787LLU, 0xdb9acddff56ca510LLU, 0xd06c98cd5c0975ebLLU, + 0xe612a3cb9ecba951LLU, 0xc766e62cfcadaf96LLU, 0xee64435a9752fe72LLU, 0xa192d576b245165aLLU, + 0x0a8787bf8ecb74b2LLU, 0x81b3e73d20b49b6fLLU, 0x7fa8220ba3b2eceaLLU, 0x245731c13ca42499LLU, + 0xb78dbfaf3a8d83bdLLU, 0xea1ad565322a1a0bLLU, 0x60e61c23a3795013LLU, 0x6606d7e446282b93LLU, + 0x6ca4ecb15c5f91e1LLU, 0x9f626da15c9625f3LLU, 0xe51b38608ef25f57LLU, 0x958a324ceb064572LLU, +}; +#define MVMGrapheme32 int32_t +#define MVMint32 int32_t +#define MVMuint64 uint64_t +#define MVMuint8 uint8_t +typedef union { + MVMint32 graphs[2]; + MVMuint8 bytes[4]; + uint64_t u64; +} MVMJenHashGraphemeView; +int testmvm (void) { + size_t i; + int rep_count = 0; + char key[16] = {0,1,2,3,4,5,6,7,8,9,0xa,0xb,0xc,0xd,0xe,0xf}; + size_t s_len = 9; + int32_t Grapheme32[9] = { 171, -72, 69, 76, 76, 79, 9829, 9826, 187 }; + int32_t Grapheme32_LE[9] = { 171, -72, 69, 76, 76, 79, 9829, 9826, 187 }; + for (i = 0; i < 9; i++) { + Grapheme32[i] = MVM_TO_LITTLE_ENDIAN_32(Grapheme32[i]); + } + for (rep_count = 0; rep_count < REPEATS; rep_count++) { + /* Using siphashfinish */ + { + siphash sh; + MVMuint64 hash; + MVMJenHashGraphemeView gv; + siphashinit(&sh, s_len * sizeof(MVMGrapheme32), (uint64_t*)key); + for (i = 0; i + 1 < s_len;) { + gv.graphs[0] = MVM_TO_LITTLE_ENDIAN_32(Grapheme32[i++]); + gv.graphs[1] = MVM_TO_LITTLE_ENDIAN_32(Grapheme32[i++]); + siphashadd64bits(&sh, gv.u64); + } + if (i < s_len) { + //printf("some left"); + gv.graphs[0] = MVM_TO_LITTLE_ENDIAN_32(Grapheme32[i]); + hash = siphashfinish(&sh, gv.bytes, sizeof(MVMGrapheme32)); + } + else { + hash = siphashfinish(&sh, NULL, 0); + } + assert(hash == 4563223716124497198LLU); + } + /* Using siphashfinish_32bits */ + { + siphash sh; + MVMuint64 hash; + MVMJenHashGraphemeView gv; + siphashinit(&sh, s_len * sizeof(MVMGrapheme32), (uint64_t*)key); + for (i = 0; i + 1 < s_len;) { + gv.graphs[0] = MVM_TO_LITTLE_ENDIAN_32(Grapheme32[i++]); + gv.graphs[1] = MVM_TO_LITTLE_ENDIAN_32(Grapheme32[i++]); + siphashadd64bits(&sh, gv.u64); + } + hash = siphashfinish_32bits(&sh, i < s_len ? MVM_TO_LITTLE_ENDIAN_32(Grapheme32[i]) : 0); + assert(hash == 4563223716124497198LLU); + } + { + assert(siphash24((uint8_t*)Grapheme32_LE, 9 * sizeof(int32_t), (uint64_t*)key) == 4563223716124497198LLU); + } + } + return 0; +} +int main() { + int i; + char key[16] = {0,1,2,3,4,5,6,7,8,9,0xa,0xb,0xc,0xd,0xe,0xf}; + uint8_t plaintext[64]; + for (i=0; i<64; i++) plaintext[i] = i; + int j; + uint64_t t0, t1, t2, t3; + t0 = gettime_ns(); + for (j=0; j /* memcmp,strlen */ #include /* ptrdiff_t */ #include /* exit() */ - +#include "strings/siphash/csiphash.h" /* These macros use decltype or the earlier __typeof GNU extension. As decltype is only available in newer compilers (VS2010 or gcc 4.3+ when compiling c++ source) this code uses whatever method is needed @@ -117,7 +117,7 @@ do { } while (0) #define HASH_FIND(hh,head,keyptr,keylen,out) \ do { \ - MVMhashv _hf_hashv; \ + MVMHashv _hf_hashv; \ unsigned _hf_bkt; \ out=NULL; \ if (head) { \ @@ -128,7 +128,7 @@ do { } while (0) #define HASH_FIND_prev(hh,head,keyptr,keylen,out,prev) \ do { \ - MVMhashv _hf_hashv; \ + MVMHashv _hf_hashv; \ unsigned _hf_bkt; \ out=NULL; \ prev=NULL; \ @@ -146,21 +146,21 @@ do { * or % to get the bucket number because it uses the full bit width of the hash. * If the size of the hashv is changed we will need to change max_hashv_div_phi, * to be max_hashv / phi rounded to the nearest *odd* number. - * max_hashv / phi = 2654435769 */ -const static uint32_t max_hashv_div_phi = UINT32_C(2654435769); + * max_hashv / phi = 11400714819323198485 */ +#define max_hashv_div_phi UINT64_C(11400714819323198485) #define DETERMINE_BUCKET_FIB(hashv, offset) \ - (((hashv) * max_hashv_div_phi) >> ((sizeof(MVMhashv)*8) - offset)) + (((hashv) * max_hashv_div_phi) >> ((sizeof(MVMHashv)*8) - offset)) #define WHICH_BUCKET(hashv, num_bkts, offset)\ (DETERMINE_BUCKET_FIB((hashv), (offset))) #define HASH_FIND_VM_STR(tc,hh,head,key,out) \ do { \ - MVMhashv _hf_hashv; \ + MVMHashv _hf_hashv; \ unsigned _hf_bkt; \ out=NULL; \ if (head) { \ - MVMhashv cached_hash = (key)->body.cached_hash_code; \ + MVMHashv cached_hash = (key)->body.cached_hash_code; \ if (cached_hash) { \ _hf_hashv = cached_hash; \ _hf_bkt = WHICH_BUCKET((_hf_hashv), (head)->hh.tbl->num_buckets, (head)->hh.tbl->log2_num_buckets); \ @@ -175,12 +175,12 @@ do { #define HASH_FIND_VM_STR_prev(tc,hh,head,key,out, prev) \ do { \ - MVMhashv _hf_hashv; \ + MVMHashv _hf_hashv; \ unsigned _hf_bkt; \ out=NULL; \ prev=NULL; \ if (head) { \ - MVMhashv cached_hash = (key)->body.cached_hash_code; \ + MVMHashv cached_hash = (key)->body.cached_hash_code; \ if (cached_hash) { \ _hf_hashv = cached_hash; \ _hf_bkt = WHICH_BUCKET((_hf_hashv), (head)->hh.tbl->num_buckets, (head)->hh.tbl->log2_num_buckets); \ @@ -263,7 +263,7 @@ do { #define HASH_ADD_KEYPTR_VM_STR(tc,hh,head,key_in,add) \ do { \ unsigned _ha_bkt; \ - MVMhashv cached_hash = (key_in)->body.cached_hash_code; \ + MVMHashv cached_hash = (key_in)->body.cached_hash_code; \ (add)->hh.key = (key_in); \ if (!(head)) { \ head = (add); \ @@ -375,9 +375,9 @@ do { #define HASH_FSCK(hh,head) #endif -/* Use Jenkin's hash as the hash function. */ -#define HASH_FCN HASH_JEN -#define HASH_FCN_VM_STR HASH_JEN_VM_STR +/* Use Siphash as the hash function. */ +#define HASH_FCN HASH_SIP +#define HASH_FCN_VM_STR HASH_SIP_VM_STR #define HASH_JEN_MIX(a,b,c) \ do { \ @@ -391,12 +391,16 @@ do { b -= c; b -= a; b ^= ( a << 10 ); \ c -= a; c -= b; c ^= ( b >> 15 ); \ } while (0) - +#define HASH_SIP(key, keylen, num_bkts,hashv, bkt, offset) \ +do { \ + hashv = siphash24((MVMuint8*)key, keylen, tc->instance->hashSecrets); \ + bkt = WHICH_BUCKET(hashv, num_bkts, offset); \ +} while (0) #define HASH_JEN(key,keylen,num_bkts,hashv,bkt,offset) \ do { \ unsigned _hj_i,_hj_j,_hj_k; \ unsigned char *_hj_key=(unsigned char*)(key); \ - hashv = tc->instance->hashSecret; \ + hashv = tc->instance->hashSecrets[1]; \ _hj_i = _hj_j = 0x9e3779b9; \ _hj_k = (unsigned)(keylen); \ while (_hj_k >= 12) { \ @@ -436,7 +440,7 @@ do { bkt = WHICH_BUCKET(hashv, num_bkts, offset); \ } while(0) -#define HASH_JEN_VM_STR(tc,key,num_bkts,hashv,bkt,offset) \ +#define HASH_SIP_VM_STR(tc,key,num_bkts,hashv,bkt,offset) \ do { \ MVM_string_compute_hash_code(tc, key); \ hashv = (key)->body.cached_hash_code; \ diff --git a/src/strings/uthash_types.h b/src/strings/uthash_types.h index 9686b73c82..0cdc4f0d5c 100644 --- a/src/strings/uthash_types.h +++ b/src/strings/uthash_types.h @@ -67,6 +67,6 @@ typedef struct UT_hash_handle { * low-level hashes, MVMString * for high level * hashes) */ unsigned keylen; /* enclosing struct's key len */ - MVMhashv hashv; /* result of hash-fcn(key) */ + MVMHashv hashv; /* result of hash-fcn(key) */ } UT_hash_handle; #endif