From f1eb9cf6c302a10a939b76252e9704da176167e5 Mon Sep 17 00:00:00 2001 From: Samantha McVey Date: Fri, 7 Apr 2017 22:19:22 -0700 Subject: [PATCH] Use FreeBSD memmem function on MacOS and improve comments Use FreeBSD memmem on MacOS, because although MacOS comes with a memmem, it is much slower than the FBSD one which implements Crochemore+Perrin two-way string matching. Also improve the comments to be more descriptive. --- src/platform/memmem.h | 19 ++++++++++++++----- src/strings/ops.c | 6 ++++-- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/platform/memmem.h b/src/platform/memmem.h index 1e6c2dee37..b193e51528 100644 --- a/src/platform/memmem.h +++ b/src/platform/memmem.h @@ -1,13 +1,22 @@ -#if defined _WIN32 +/* On Linux we use glibc's memmem which uses the Knuth-Morris-Pratt algorithm. + * We use FreeBSD's libc memmem on Windows and MacOS, which uses + * Crochemore-Perrin two-way string matching. + * Reasoning: + * Windows, does not include any native memmem + * MacOS has a memmem but is slower and originates from FreeBSD dated to 2005 */ + +#if defined(_WIN32) || defined(__APPLE__) || defined(__Darwin__) #include "../3rdparty/freebsd/memmem.c" #else -/* On systems that use Glibc, you must defined _GNU_SOURCE before including string.h - * to get access to memmem. On BSD and MacOS this is not needed, though if they - * happen to be using glibc instead of libc, it shouldn't hurt have defined _GNU_SOURCE */ +/* On systems that use glibc, you must define _GNU_SOURCE before including string.h + * to get access to memmem. */ #define _GNU_SOURCE -#endif #include +#endif void* MVM_memmem(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen) { return memmem(haystack, haystacklen, needle, needlelen); } + +/* Extended info: + * In glibc, the Knuth-Morris-Pratt algorithm was added as of git tag glibc-2.8-44-g0caca71ac9 */ diff --git a/src/strings/ops.c b/src/strings/ops.c index 7a12d34441..fb47d9a896 100644 --- a/src/strings/ops.c +++ b/src/strings/ops.c @@ -216,7 +216,8 @@ MVMint64 MVM_string_index(MVMThreadContext *tc, MVMString *haystack, MVMString * return -1; /* Fast paths when storage types are identical. Uses memmem function, which - * in glibc uses Knuth-Morris-Pratt algorithm as of glibc-2.8-44-g0caca71ac9 */ + * uses Knuth-Morris-Pratt algorithm on Linux and on others + * Crochemore+Perrin two-way string matching */ switch (haystack->body.storage_type) { case MVM_STRING_GRAPHEME_32: if (needle->body.storage_type == MVM_STRING_GRAPHEME_32) { @@ -245,7 +246,8 @@ MVMint64 MVM_string_index(MVMThreadContext *tc, MVMString *haystack, MVMString * haystack->body.storage.blob_8 + start, /* start position */ (hgraphs - start) * sizeof(MVMGrapheme8), /* length of haystack from start position to end */ needle->body.storage.blob_8, /* needle start */ - ngraphs * sizeof(MVMGrapheme8)); /* needle length */ + ngraphs * sizeof(MVMGrapheme8) /* needle length */ + ); if (mm_return_8 == NULL) return -1; else