Skip to content

Optional SIMD strlen #586

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jun 30, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -110,6 +110,15 @@ jobs:
TARGET_TRIPLE: wasm32-wasip1-threads
THREAD_MODEL: posix

- name: Test wasm32-wasi-simd
os: ubuntu-24.04
clang_version: 16
test: true
upload: wasm32-wasi-simd
env:
MAKE_TARGETS: "no-check-symbols"
EXTRA_CFLAGS: "-O2 -DNDEBUG -msimd128 -mrelaxed-simd -mbulk-memory -D__wasilibc_simd_string"

steps:
- uses: actions/checkout@v4.1.7
with:
6 changes: 4 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -808,11 +808,13 @@ $(DUMMY_LIBS):
$(AR) crs "$$lib"; \
done

finish: $(STARTUP_FILES) libc $(DUMMY_LIBS)
no-check-symbols: $(STARTUP_FILES) libc $(DUMMY_LIBS)
#
# The build succeeded! The generated sysroot is in $(SYSROOT).
#

finish: no-check-symbols

ifeq ($(LTO),no)
# The check for defined and undefined symbols expects there to be a heap
# allocator (providing malloc, calloc, free, etc). Skip this step if the build
@@ -1033,4 +1035,4 @@ clean:
$(RM) -r "$(OBJDIR)"
$(RM) -r "$(SYSROOT)"

.PHONY: default libc libc_so finish install clean check-symbols bindings
.PHONY: default libc libc_so finish install clean check-symbols no-check-symbols bindings
34 changes: 34 additions & 0 deletions libc-top-half/musl/src/string/strlen.c
Original file line number Diff line number Diff line change
@@ -2,13 +2,47 @@
#include <stdint.h>
#include <limits.h>

#ifdef __wasm_simd128__
#include <wasm_simd128.h>
#endif

#define ALIGN (sizeof(size_t))
#define ONES ((size_t)-1/UCHAR_MAX)
#define HIGHS (ONES * (UCHAR_MAX/2+1))
#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)

size_t strlen(const char *s)
{
#if defined(__wasm_simd128__) && defined(__wasilibc_simd_string)
// strlen must stop as soon as it finds the terminator.
// Aligning ensures loads beyond the terminator are safe.
// Casting through uintptr_t makes this implementation-defined,
// rather than undefined behavior.
uintptr_t align = (uintptr_t)s % sizeof(v128_t);
const v128_t *v = (v128_t *)((uintptr_t)s - align);

for (;;) {
// Bitmask is slow on AArch64, all_true is much faster.
if (!wasm_i8x16_all_true(*v)) {
const v128_t cmp = wasm_i8x16_eq(*v, (v128_t){});
// Clear the bits corresponding to align (little-endian)
// so we can count trailing zeros.
int mask = wasm_i8x16_bitmask(cmp) >> align << align;
// At least one bit will be set, unless align cleared them.
// Knowing this helps the compiler if it unrolls the loop.
__builtin_assume(mask || align);
// If the mask became zero because of align,
// it's as if we didn't find anything.
if (mask) {
// Find the offset of the first one bit (little-endian).
return (char *)v - s + __builtin_ctz(mask);
}
}
align = 0;
v++;
}
#endif

const char *a = s;
#ifdef __GNUC__
typedef size_t __attribute__((__may_alias__)) word;
40 changes: 40 additions & 0 deletions test/src/misc/strlen.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
//! add-flags.py(LDFLAGS): -Wl,--stack-first -Wl,--initial-memory=327680

#include <__macro_PAGESIZE.h>
#include <stdio.h>
#include <string.h>

void test(char *ptr, size_t want) {
size_t got = strlen(ptr);
if (got != want) {
printf("strlen(%p) = %lu, want %lu\n", ptr, got, want);
}
}

int main(void) {
char *const LIMIT = (char *)(__builtin_wasm_memory_size(0) * PAGESIZE);

for (size_t length = 0; length < 64; length++) {
for (size_t alignment = 0; alignment < 24; alignment++) {
// Create a string with the given length, at a pointer with the given
// alignment. Using the offset LIMIT - PAGESIZE - 8 means many strings
// will straddle a (Wasm, and likely OS) page boundary.
char *ptr = LIMIT - PAGESIZE - 8 + alignment;
memset(LIMIT - 2 * PAGESIZE, 0, 2 * PAGESIZE);
memset(ptr, 5, length);
test(ptr, length);

// Make sure we're not fooled by non-zero characters prior to the string.
ptr[-1] = 5;
test(ptr, length);
}

// Ensure we never read past the end of memory.
char *ptr = LIMIT - length - 1;
memset(LIMIT - 2 * PAGESIZE, 0, 2 * PAGESIZE);
memset(ptr, 5, length);
test(ptr, length);
}

return 0;
}