From b2eb9e1e7134625c5a6963abea96241ee8ce8ecf Mon Sep 17 00:00:00 2001 From: Samantha McVey Date: Thu, 12 Apr 2018 01:20:30 -0700 Subject: [PATCH 1/4] Randomly seed a hash secret to mitigate ease of DOS attack Without a random hash seed it is easy for an attacker to generate strings which will result in the same hash. This devolves to O(n**2) time for the hash insertion and is increased even more by the fact that a string must be compared with all strings in the bucket until the right string is found. This attack is done by creating a function that essentially is our hashing function backward. We hash our target string, `t`. We then use random 3 character sequences (in our case graphemes) and plug them into our backward hashing function along with the hash for our target `t`. The backward hash and the random character sequence are stored in the dictionary and the process is repeated until we have a very large number of backward hash's and random 3 grapheme prefixes. We can then use this dictionary to construct successively longer strings (or short if we so desire) which are the same hash as our target string `t`. This has been fixed in most programming languages (Python, Ruby, Perl), and several CVE's have been issues over the years for this exploit. It may also be a good idea to later implement a stronger hashing function. Many languages are now using SipHash which is meant to protect against an attacker discovering a hash secret remotely. This change decreases the ease of this attack and makes the hash secret unpredictable. Randomness source: We prefer function calls rather than reading from /dev/urandom. Reasons include: not having to open a file descriptor and that /dev/urandom may not exist if we are in a chroot. Since we don't want to stop startup of MoarVM, if we have to fallback to /dev/urandom since the OS doesn't support the function call we continue starting MoarVM normally. Linux, FreeBSD, OpenBSD and MacOS all use system provided random calls to get the data rather than having to open /dev/urandom. All these OS's guarantee these to be non-blocking, though MacOS's documentation does not comment on it. Whether the calls block is primarily a concern during very early boot which is why Python 3 makes sure to use non-blocking calls when their hash secret is seeded an interpreter start up. If not available we fall back to using /dev/urandom on Unix like OS's. This change was tested on Linux both pre-addition of `getrandom()` and after, as well as Solaris, FreeBSD and OpenBSD. CI testing for Windows and MacOS look good. All Unix are supported due to /dev/urandom fallback and Windows is supported to version 95 with the API we use. --- 3rdparty/uthash.h | 2 +- build/Makefile.in | 2 + src/core/instance.h | 4 ++ src/moar.c | 9 ++- src/platform/random.c | 142 ++++++++++++++++++++++++++++++++++++++++++ src/platform/random.h | 1 + src/strings/ops.c | 2 +- 7 files changed, 159 insertions(+), 3 deletions(-) create mode 100644 src/platform/random.c create mode 100644 src/platform/random.h diff --git a/3rdparty/uthash.h b/3rdparty/uthash.h index 64b777aecb..6878b74fc2 100644 --- a/3rdparty/uthash.h +++ b/3rdparty/uthash.h @@ -295,7 +295,7 @@ do { do { \ unsigned _hj_i,_hj_j,_hj_k; \ unsigned char *_hj_key=(unsigned char*)(key); \ - hashv = 0xfeedbeef; \ + hashv = tc->instance->hashSecret; \ _hj_i = _hj_j = 0x9e3779b9; \ _hj_k = (unsigned)(keylen); \ while (_hj_k >= 12) { \ diff --git a/build/Makefile.in b/build/Makefile.in index 1915f88e39..2d18a90096 100644 --- a/build/Makefile.in +++ b/build/Makefile.in @@ -225,6 +225,7 @@ OBJECTS = src/core/callsite@obj@ \ src/instrument/crossthreadwrite@obj@ \ src/instrument/line_coverage@obj@ \ src/platform/sys@obj@ \ + src/platform/random@obj@ \ src/moar@obj@ \ @platform@ \ @jit_obj@ @@ -387,6 +388,7 @@ HEADERS = src/moar.h \ src/platform/sys.h \ src/platform/setjmp.h \ src/platform/memmem.h \ + src/platform/random.h \ src/jit/graph.h \ src/jit/label.h \ src/jit/expr.h \ diff --git a/src/core/instance.h b/src/core/instance.h index 2ece94eefb..afdd91e745 100644 --- a/src/core/instance.h +++ b/src/core/instance.h @@ -508,4 +508,8 @@ struct MVMInstance { /* Flag for if NFA debugging is enabled. */ MVMint8 nfa_debug_enabled; + + /* Hash Secret which is used as the hash seed. This is to avoid denial of + * service type attacks. */ + MVMuint32 hashSecret; }; diff --git a/src/moar.c b/src/moar.c index 447b0572e0..3cc9ddda15 100644 --- a/src/moar.c +++ b/src/moar.c @@ -1,6 +1,7 @@ #include "moar.h" #include - +#include "platform/random.h" +#include "platform/time.h" #if defined(_MSC_VER) #define snprintf _snprintf #endif @@ -84,12 +85,18 @@ MVMInstance * MVM_vm_create_instance(void) { char *jit_log, *jit_expr_disable, *jit_disable, *jit_bytecode_dir, *jit_last_frame, *jit_last_bb; char *dynvar_log; int init_stat; + MVMuint32 hashSecret; + MVMuint64 now = MVM_platform_now(); + /* Set up instance data structure. */ instance = MVM_calloc(1, sizeof(MVMInstance)); /* Create the main thread's ThreadContext and stash it. */ instance->main_thread = MVM_tc_create(NULL, instance); + MVM_getrandom(instance->main_thread, &hashSecret, sizeof(MVMuint32)); + instance->hashSecret ^= now; + instance->hashSecret ^= MVM_proc_getpid(instance->main_thread) * now; instance->main_thread->thread_id = 1; /* Next thread to be created gets ID 2 (the main thread got ID 1). */ diff --git a/src/platform/random.c b/src/platform/random.c new file mode 100644 index 0000000000..9016d8b869 --- /dev/null +++ b/src/platform/random.c @@ -0,0 +1,142 @@ +/* Get random numbers from OS. Returns 1 if it succeeded and otherwise 0 + * Does not block. Designed for getting small amounts of random data at a time */ +#include +/* Solaris has both getrandom and getentropy. We use getrandom since getentropy + * can block. Solaris has had getrandom() and getentropy() since 11.3 */ +#if defined(__sun) + #include + /* On solaris, _GRND_ENTROPY is defined if getentropy/getrandom are available */ + #if defined(_GRND_ENTROPY) + #define MVM_random_use_getrandom 1 + #endif +#endif +/* Linux added getrandom to kernel in 3.17 */ +#if defined(__linux__) + #include + #if defined(SYS_getrandom) + /* With glibc you are supposed to declare _GNU_SOURCE to use the + * syscall function */ + #define _GNU_SOURCE + #define GRND_NONBLOCK 0x01 + #include + #define MVM_random_use_getrandom_syscall 1 + #else + #define MVM_random_use_urandom 1 + #endif +#endif +/* FreeBSD added it with SVN revision 331279 Wed Mar 21, 2018 + * This coorasponds to __FreeBSD_version version identifier: 1200061. + * https://svnweb.freebsd.org/base?view=revision&revision=r331279 */ +#if defined(__FreeBSD__) + #include + #if __FreeBSD_version >= 1200061 + #include + #define MVM_random_use_getrandom + #endif +#endif +/* OpenBSD's getentropy never blocks and always succeeds. OpenBSD has had + * getentropy() since 5.6 */ +#if defined(__OpenBSD__) + #include + #if OpenBSD >= 201301 + #define MVM_random_use_getentropy + #endif +#endif +/* MacOS has had getentropy() since 10.12 */ +#if defined(__APPLE__) + #include + #include + #if !defined(MAC_OS_X_VERSION_10_12) + #define MAC_OS_X_VERSION_10_12 101200 + #endif + //#include + #if __MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_12 + #include + #define MVM_random_use_getentropy 1 + #endif +#endif +/* Other info: + * NetBSD: I have not found evidence it has getentropy() or getrandom() + * Note: Uses __NetBSD_Version__ included from file . + * All BSD's should support arc4random + * AIX is a unix but has no arc4random, does have /dev/urandom */ +#include "moar.h" + +#if defined(MVM_random_use_getrandom_syscall) +/* getrandom() was added to glibc much later than it was added to the kernel. Since + * we detect the presence of the system call to decide whether to use this, + * just use the syscall instead since the wrapper is not guaranteed to exist.*/ + MVMint32 MVM_getrandom (MVMThreadContext *tc, void *out, size_t size) { + return syscall(SYS_getrandom, out, size, GRND_NONBLOCK) <= 0 ? 0 : 1; + } +#elif defined(MVM_random_use_getrandom) + /* Call the getrandom() wrapper in Solaris and FreeBSD since they were + * added at the same time as getentropy() and this allows us to avoid blocking. */ + MVMint32 MVM_getrandom (MVMThreadContext *tc, void *out, size_t size) { + return getrandom(out, size, GRND_NONBLOCK) <= 0 ? 0 : 1; + } + +#elif defined(MVM_random_use_getentropy) + MVMint32 MVM_getrandom (MVMThreadContext *tc, void *out, size_t size) { + return getentropy(out, size) < 0 ? 0 : 1; + } + +#elif defined(_WIN32) + #include + #include + typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\ + LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\ + DWORD dwFlags ); + typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\ + BYTE *pbBuffer ); + /* This is needed to so pCryptGenRandom() can be called. */ + static CRYPTGENRANDOM pCryptGenRandom = NULL; + static HCRYPTPROV hCryptContext = 0; + static int win32_urandom_init(void) { + HINSTANCE hAdvAPI32 = NULL; + /* This is needed to so pCryptAcquireContext() can be called. */ + CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL; + /* Get Module Handle to CryptoAPI */ + hAdvAPI32 = GetModuleHandle("advapi32.dll"); + if (hAdvAPI32 == NULL) return 0; + /* Check the pointers to the CryptoAPI functions. These shouldn't fail + * but makes sure we won't have problems getting the context or getting + * random. */ + if (!GetProcAddress(hAdvAPI32, "CryptAcquireContextA") + || !GetProcAddress(hAdvAPI32, "CryptGenRandom")) { + return 0; + } + /* Get the pCrypt Context */ + if (!pCryptAcquireContext(&hCryptContext, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) + return 0; + + return 1; + } + MVMint32 MVM_getrandom (MVMThreadContext *tc, void *out, size_t size) { + if (!hCryptContext) { + int rtrn = win32_urandom_init(); + if (!rtrn) return 0; + } + if (!pCryptGenRandom(hCryptContext, (DWORD)size, (BYTE*)out)) { + return 0; + } + return 1; + } +#else + #include + MVMint32 MVM_getrandom (MVMThreadContext *tc, void *out, size_t size) { + int fd = open("/dev/urandom", O_RDONLY); + ssize_t num_read = 0; + if (fd < 0 || (num_read = read(fd, out, size) <= 0)) { + if (fd) close(fd); + #if defined(BSD) + #include + arc4random_buf(out, size); + return 1; + #else + return 0; + #endif + } + return 1; + } +#endif diff --git a/src/platform/random.h b/src/platform/random.h new file mode 100644 index 0000000000..95bbb5b1fa --- /dev/null +++ b/src/platform/random.h @@ -0,0 +1 @@ +MVMint32 MVM_getrandom (MVMThreadContext *tc, void *out, size_t size); diff --git a/src/strings/ops.c b/src/strings/ops.c index d827d5c4e5..57ec742268 100644 --- a/src/strings/ops.c +++ b/src/strings/ops.c @@ -2898,7 +2898,7 @@ void MVM_string_compute_hash_code(MVMThreadContext *tc, MVMString *s) { MVMuint32 graphs_remaining = MVM_string_graphs(tc, s); /* Initialize hash state. */ - MVMuint32 hashv = 0xfeedbeef; + MVMuint32 hashv = tc->instance->hashSecret; MVMuint32 _hj_i, _hj_j; _hj_i = _hj_j = 0x9e3779b9; From b027eb2bda5dd0a806a6643e7910adaeed01791c Mon Sep 17 00:00:00 2001 From: Samantha McVey Date: Thu, 12 Apr 2018 02:29:01 -0700 Subject: [PATCH 2/4] On Linux and BSD's, fall back to /dev/urandom if call fails This is mostly important on Linux since if MoarVM was compiled on a kernel <= 3.17 and then the user runs MoarVM on kernel without the getrandom call, the syscall wrapper will gently return a false return value. If this happens try /dev/urandom. On the BSD's, they seem to guarantee the call will succeed, though MacOS's documentation does not specify information about this. --- 3rdparty/uthash.h | 2 +- src/moar.c | 1 - src/platform/random.c | 53 +++++++++++++++++++++++++++---------------- 3 files changed, 35 insertions(+), 21 deletions(-) diff --git a/3rdparty/uthash.h b/3rdparty/uthash.h index 6878b74fc2..aa64bc1c76 100644 --- a/3rdparty/uthash.h +++ b/3rdparty/uthash.h @@ -295,7 +295,7 @@ do { do { \ unsigned _hj_i,_hj_j,_hj_k; \ unsigned char *_hj_key=(unsigned char*)(key); \ - hashv = tc->instance->hashSecret; \ + hashv = tc->instance->hashSecret; \ _hj_i = _hj_j = 0x9e3779b9; \ _hj_k = (unsigned)(keylen); \ while (_hj_k >= 12) { \ diff --git a/src/moar.c b/src/moar.c index 3cc9ddda15..dfa425d8fe 100644 --- a/src/moar.c +++ b/src/moar.c @@ -88,7 +88,6 @@ MVMInstance * MVM_vm_create_instance(void) { MVMuint32 hashSecret; MVMuint64 now = MVM_platform_now(); - /* Set up instance data structure. */ instance = MVM_calloc(1, sizeof(MVMInstance)); diff --git a/src/platform/random.c b/src/platform/random.c index 9016d8b869..b793c92d53 100644 --- a/src/platform/random.c +++ b/src/platform/random.c @@ -49,7 +49,6 @@ #if !defined(MAC_OS_X_VERSION_10_12) #define MAC_OS_X_VERSION_10_12 101200 #endif - //#include #if __MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_12 #include #define MVM_random_use_getentropy 1 @@ -61,35 +60,64 @@ * All BSD's should support arc4random * AIX is a unix but has no arc4random, does have /dev/urandom */ #include "moar.h" +/* On on Unix like platforms that don't support getrandom() or getentropy() + * we use /dev/urandom. On platforms that do support them, we fall back to + * /dev/urandom. This is also important on Linux, since if MoarVM was compiled + * on a kernel >= 3.17 it will be set to use the syscall. If the syscall doesn't + * exist, we then fallback to /dev/urandom */ +#if !defined(_WIN32) + #include + MVMint32 MVM_getrandom_urandom (MVMThreadContext *tc, void *out, size_t size) { + int fd = open("/dev/urandom", O_RDONLY); + ssize_t num_read = 0; + if (fd < 0 || (num_read = read(fd, out, size) <= 0)) { + if (fd) close(fd); + /* If using /dev/urandom fails (maybe we're in a chroot), on BSD's + * use arc4random, which is likely seeded from the system's random + * number generator */ + #if defined(BSD) + #include + arc4random_buf(out, size); + return 1; + #else + return 0; + #endif + } + return 1; + } +#endif #if defined(MVM_random_use_getrandom_syscall) /* getrandom() was added to glibc much later than it was added to the kernel. Since * we detect the presence of the system call to decide whether to use this, * just use the syscall instead since the wrapper is not guaranteed to exist.*/ MVMint32 MVM_getrandom (MVMThreadContext *tc, void *out, size_t size) { - return syscall(SYS_getrandom, out, size, GRND_NONBLOCK) <= 0 ? 0 : 1; + long rtrn = syscall(SYS_getrandom +1000000, out, size, GRND_NONBLOCK); + return rtrn <= 0 ? MVM_getrandom_urandom(tc, out, size) : 1; } #elif defined(MVM_random_use_getrandom) /* Call the getrandom() wrapper in Solaris and FreeBSD since they were * added at the same time as getentropy() and this allows us to avoid blocking. */ MVMint32 MVM_getrandom (MVMThreadContext *tc, void *out, size_t size) { - return getrandom(out, size, GRND_NONBLOCK) <= 0 ? 0 : 1; + ssize_t rtrn = getrandom(out, size, GRND_NONBLOCK); + return rtrn <= 0 ? MVM_getrandom_urandom(tc, out, size) : 1; } #elif defined(MVM_random_use_getentropy) MVMint32 MVM_getrandom (MVMThreadContext *tc, void *out, size_t size) { - return getentropy(out, size) < 0 ? 0 : 1; + int rtrn = getentropy(out, size); + return rtrn <= 0 ? MVM_getrandom_urandom(tc, out, size) : 1; } #elif defined(_WIN32) #include #include + /* This is needed so pCryptGenRandom() can be called. */ typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\ LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\ DWORD dwFlags ); typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\ BYTE *pbBuffer ); - /* This is needed to so pCryptGenRandom() can be called. */ static CRYPTGENRANDOM pCryptGenRandom = NULL; static HCRYPTPROV hCryptContext = 0; static int win32_urandom_init(void) { @@ -123,20 +151,7 @@ return 1; } #else - #include MVMint32 MVM_getrandom (MVMThreadContext *tc, void *out, size_t size) { - int fd = open("/dev/urandom", O_RDONLY); - ssize_t num_read = 0; - if (fd < 0 || (num_read = read(fd, out, size) <= 0)) { - if (fd) close(fd); - #if defined(BSD) - #include - arc4random_buf(out, size); - return 1; - #else - return 0; - #endif - } - return 1; + return MVM_getrandom_urandom(tc, out, size); } #endif From 9da2a007ba8ab371976ff90edb5c1a23b6c1573f Mon Sep 17 00:00:00 2001 From: Samantha McVey Date: Thu, 12 Apr 2018 03:06:10 -0700 Subject: [PATCH 3/4] Fix some of the Windows MVM_getrandom code and clean it up Had some missing variable assignments which did not seem required and were removed in error. Put them back since the functions we call are not defined in --- src/platform/random.c | 41 +++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/src/platform/random.c b/src/platform/random.c index b793c92d53..2a54aaf208 100644 --- a/src/platform/random.c +++ b/src/platform/random.c @@ -112,43 +112,36 @@ #elif defined(_WIN32) #include #include - /* This is needed so pCryptGenRandom() can be called. */ + /* Signatures for pCryptAcquireContext() and pCryptGenRandom() */ typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\ LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\ DWORD dwFlags ); typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\ BYTE *pbBuffer ); + /* The functions themselves */ static CRYPTGENRANDOM pCryptGenRandom = NULL; static HCRYPTPROV hCryptContext = 0; static int win32_urandom_init(void) { - HINSTANCE hAdvAPI32 = NULL; - /* This is needed to so pCryptAcquireContext() can be called. */ - CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL; /* Get Module Handle to CryptoAPI */ - hAdvAPI32 = GetModuleHandle("advapi32.dll"); - if (hAdvAPI32 == NULL) return 0; - /* Check the pointers to the CryptoAPI functions. These shouldn't fail - * but makes sure we won't have problems getting the context or getting - * random. */ - if (!GetProcAddress(hAdvAPI32, "CryptAcquireContextA") - || !GetProcAddress(hAdvAPI32, "CryptGenRandom")) { - return 0; + HINSTANCE hAdvAPI32 = GetModuleHandle("advapi32.dll"); + if (hAdvAPI32) { + CRYPTACQUIRECONTEXTA pCryptAcquireContext = + GetProcAddress(hAdvAPI32, "CryptAcquireContextA"); + pCryptGenRandom = GetProcAddress(hAdvAPI32, "CryptGenRandom"); + /* Check the pointers to the CryptoAPI functions. These shouldn't fail + * but makes sure we won't have problems getting the context or getting + * random. If those aren't NULL then get the pCrypt context */ + return pCryptAcquireContext && pCryptGenRandom && + pCryptAcquireContext(&hCryptContext, NULL, NULL, PROV_RSA_FULL, + CRYPT_VERIFYCONTEXT) ? 1 : 0; } - /* Get the pCrypt Context */ - if (!pCryptAcquireContext(&hCryptContext, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) - return 0; - - return 1; + return 0; } MVMint32 MVM_getrandom (MVMThreadContext *tc, void *out, size_t size) { - if (!hCryptContext) { - int rtrn = win32_urandom_init(); - if (!rtrn) return 0; - } - if (!pCryptGenRandom(hCryptContext, (DWORD)size, (BYTE*)out)) { + /* Return 0 if the context doesn't exist and we are unable to create it */ + if (!hCryptContext && !win32_urandom_init()) return 0; - } - return 1; + return pCryptGenRandom(hCryptContext, (DWORD)size, (BYTE*)out) ? 1 : 0; } #else MVMint32 MVM_getrandom (MVMThreadContext *tc, void *out, size_t size) { From 7d83f32bf5a39e4d970f93ef51ba899663841175 Mon Sep 17 00:00:00 2001 From: Samantha McVey Date: Thu, 12 Apr 2018 03:25:58 -0700 Subject: [PATCH 4/4] Make a few of the MVM_getrandom comments more clear --- src/platform/random.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/platform/random.c b/src/platform/random.c index 2a54aaf208..6e2e23a2a8 100644 --- a/src/platform/random.c +++ b/src/platform/random.c @@ -10,7 +10,7 @@ #define MVM_random_use_getrandom 1 #endif #endif -/* Linux added getrandom to kernel in 3.17 */ +/* Linux added getrandom to the kernel in 3.17 */ #if defined(__linux__) #include #if defined(SYS_getrandom) @@ -25,7 +25,7 @@ #endif #endif /* FreeBSD added it with SVN revision 331279 Wed Mar 21, 2018 - * This coorasponds to __FreeBSD_version version identifier: 1200061. + * This corresponds to __FreeBSD_version version identifier: 1200061. * https://svnweb.freebsd.org/base?view=revision&revision=r331279 */ #if defined(__FreeBSD__) #include @@ -55,16 +55,17 @@ #endif #endif /* Other info: - * NetBSD: I have not found evidence it has getentropy() or getrandom() - * Note: Uses __NetBSD_Version__ included from file . - * All BSD's should support arc4random - * AIX is a unix but has no arc4random, does have /dev/urandom */ + * - All BSD's should support arc4random + * - AIX is a Unix but has no arc4random, does have /dev/urandom. + * - NetBSD: I have not found evidence it has getentropy() or getrandom() + * Note: Uses __NetBSD_Version__ included from file . */ #include "moar.h" -/* On on Unix like platforms that don't support getrandom() or getentropy() - * we use /dev/urandom. On platforms that do support them, we fall back to - * /dev/urandom. This is also important on Linux, since if MoarVM was compiled - * on a kernel >= 3.17 it will be set to use the syscall. If the syscall doesn't - * exist, we then fallback to /dev/urandom */ +/* On Unix like platforms that don't support getrandom() or getentropy() + * we defualt to /dev/urandom. On platforms that do support these calls, we + * only use /dev/urandom if those calls fail. This is also important on Linux, + * since if MoarVM was compiled on a kernel >= 3.17 it will be set to use the + * syscall. If the syscall doesn't exist, the syscall wrapper will gracefully + * return a false return value and we will fallback to /dev/urandom */ #if !defined(_WIN32) #include MVMint32 MVM_getrandom_urandom (MVMThreadContext *tc, void *out, size_t size) {