-
Notifications
You must be signed in to change notification settings - Fork 14k
[libc] Implemented wcspbrk #142040
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libc] Implemented wcspbrk #142040
Conversation
@llvm/pr-subscribers-libc Author: Uzair Nawaz (uzairnawaz) ChangesImplemented wcspbrk and added tests Full diff: https://github.com/llvm/llvm-project/pull/142040.diff 7 Files Affected:
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 52e746e32a1cd..593117f94b64e 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -365,6 +365,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.wchar.wcslen
libc.src.wchar.wctob
libc.src.wchar.wcschr
+ libc.src.wchar.wcspbrk
# sys/uio.h entrypoints
libc.src.sys.uio.writev
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 987bdc0b806dc..52ebc6804d043 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -34,3 +34,10 @@ functions:
arguments:
- type: const wchar_t *
- type: wchar_t
+ - name: wcspbrk
+ standards:
+ - stdc
+ return_type: const wchar_t *
+ arguments:
+ - type: const wchar_t *
+ - type: const wchar_t *
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 41034cab16d4d..4d923c5f14474 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -44,3 +44,14 @@ add_entrypoint_object(
libc.hdr.wchar_macros
libc.src.__support.wctype_utils
)
+
+add_entrypoint_object(
+ wcspbrk
+ SRCS
+ wcspbrk.cpp
+ HDRS
+ wcspbrk.h
+ DEPENDS
+ libc.hdr.wchar_macros
+ libc.src.__support.wctype_utils
+)
diff --git a/libc/src/wchar/wcspbrk.cpp b/libc/src/wchar/wcspbrk.cpp
new file mode 100644
index 0000000000000..e92be0853a5c0
--- /dev/null
+++ b/libc/src/wchar/wcspbrk.cpp
@@ -0,0 +1,27 @@
+//===-- Implementation of wcspbrk -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcspbrk.h"
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(const wchar_t *, wcspbrk,
+ (const wchar_t *src, const wchar_t *breakset)) {
+ // currently O(n * m), can be further optimized to O(n + m) with a hash set
+ for (int src_idx = 0; src[src_idx] != 0; src_idx++)
+ for (int breakset_idx = 0; breakset[breakset_idx] != 0; breakset_idx++)
+ if (src[src_idx] == breakset[breakset_idx])
+ return src + src_idx;
+
+ return nullptr;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcspbrk.h b/libc/src/wchar/wcspbrk.h
new file mode 100644
index 0000000000000..531651b0b723a
--- /dev/null
+++ b/libc/src/wchar/wcspbrk.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for wcspbrk ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSPBRK_H
+#define LLVM_LIBC_SRC_WCHAR_WCSPBRK_H
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+const wchar_t *wcspbrk(const wchar_t *src, const wchar_t *breakset);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSPBRK_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 7d64dfeb13b6e..0e3d5c618ae1a 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -42,3 +42,13 @@ add_libc_test(
DEPENDS
libc.src.wchar.wcschr
)
+
+add_libc_test(
+ wcspbrk_test
+ SUITE
+ libc_wchar_unittests
+ SRCS
+ wcspbrk_test.cpp
+ DEPENDS
+ libc.src.wchar.wcspbrk
+)
diff --git a/libc/test/src/wchar/wcspbrk_test.cpp b/libc/test/src/wchar/wcspbrk_test.cpp
new file mode 100644
index 0000000000000..ac7ac473542d9
--- /dev/null
+++ b/libc/test/src/wchar/wcspbrk_test.cpp
@@ -0,0 +1,83 @@
+//===-- Unittests for wcspbrk
+//----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/wchar_t.h"
+#include "src/wchar/wcspbrk.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcWCSPBrkTest, EmptyStringShouldReturnNullptr) {
+ // The search should not include the null terminator.
+ EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(L"", L""), nullptr);
+ EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(L"_", L""), nullptr);
+ EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(L"", L"_"), nullptr);
+}
+
+TEST(LlvmLibcWCSPBrkTest, ShouldNotFindAnythingAfterNullTerminator) {
+ const wchar_t src[4] = {'a', 'b', '\0', 'c'};
+ EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"c"), nullptr);
+}
+
+TEST(LlvmLibcWCSPBrkTest, ShouldReturnNullptrIfNoCharactersFound) {
+ EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(L"12345", L"abcdef"), nullptr);
+}
+
+TEST(LlvmLibcWCSPBrkTest, FindsFirstCharacter) {
+ const wchar_t *src = L"12345";
+ EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"1"), src);
+ EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"-1"), src);
+ EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"1_"), src);
+ EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"f1_"), src);
+
+ EXPECT_TRUE(src[0] == L'1');
+ EXPECT_TRUE(src[1] == L'2');
+ EXPECT_TRUE(src[2] == L'3');
+ EXPECT_TRUE(src[3] == L'4');
+ EXPECT_TRUE(src[4] == L'5');
+ EXPECT_TRUE(src[5] == 0);
+}
+
+TEST(LlvmLibcWCSPBrkTest, FindsMiddleCharacter) {
+ const wchar_t *src = L"12345";
+ EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"3"), src + 2);
+ EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"?3"), src + 2);
+ EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"3F"), src + 2);
+ EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"z3_"), src + 2);
+
+ EXPECT_TRUE(src[0] == L'1');
+ EXPECT_TRUE(src[1] == L'2');
+ EXPECT_TRUE(src[2] == L'3');
+ EXPECT_TRUE(src[3] == L'4');
+ EXPECT_TRUE(src[4] == L'5');
+ EXPECT_TRUE(src[5] == 0);
+}
+
+TEST(LlvmLibcWCSPBrkTest, FindsLastCharacter) {
+ const wchar_t *src = L"12345";
+ EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"5"), src + 4);
+ EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"r5"), src + 4);
+ EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"59"), src + 4);
+ EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"n5_"), src + 4);
+
+ EXPECT_TRUE(src[0] == L'1');
+ EXPECT_TRUE(src[1] == L'2');
+ EXPECT_TRUE(src[2] == L'3');
+ EXPECT_TRUE(src[3] == L'4');
+ EXPECT_TRUE(src[4] == L'5');
+ EXPECT_TRUE(src[5] == 0);
+}
+
+TEST(LlvmLibcWCSPBrkTest, FindsFirstOfRepeated) {
+ const wchar_t *src = L"A,B,C,D";
+ EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L","), src + 1);
+}
+
+TEST(LlvmLibcWCSPBrkTest, FindsFirstInBreakset) {
+ const wchar_t *src = L"12345";
+ EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"34"), src + 2);
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
mostly LGTM with a few comments on the tests
libc/test/src/wchar/wcspbrk_test.cpp
Outdated
EXPECT_TRUE(src[0] == L'1'); | ||
EXPECT_TRUE(src[1] == L'2'); | ||
EXPECT_TRUE(src[2] == L'3'); | ||
EXPECT_TRUE(src[3] == L'4'); | ||
EXPECT_TRUE(src[4] == L'5'); | ||
EXPECT_TRUE(src[5] == 0); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
since src
isn't getting modified I don't think this check is necessary.
libc/test/src/wchar/wcspbrk_test.cpp
Outdated
TEST(LlvmLibcWCSPBrkTest, FindsFirstInBreakset) { | ||
const wchar_t *src = L"12345"; | ||
EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"34"), src + 2); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it might be useful to also check that it works correctly when the breakset is not in the same order as the string being searched.
libc/test/src/wchar/wcspbrk_test.cpp
Outdated
@@ -0,0 +1,63 @@ | |||
//===-- Unittests for wcspbrk |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: fix formatting
libc/src/wchar/wcspbrk.cpp
Outdated
for (int breakset_idx = 0; breakset[breakset_idx] != 0; breakset_idx++) | ||
if (src[src_idx] == breakset[breakset_idx]) | ||
return src + src_idx; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if you break the check if src[src_idx]
is in breakset
out into a helper function this will be easier to read
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with one nit
libc/src/wchar/wcspbrk.cpp
Outdated
@@ -13,13 +13,20 @@ | |||
|
|||
namespace LIBC_NAMESPACE_DECL { | |||
|
|||
bool contains_char(const wchar_t *str, wchar_t target) { | |||
for (; *str != 0; str++) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
since this is a wide char null byte this should be L'\0'
instead of 0
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, we can add nullptr checks in a followup patch
Implemented wcspbrk and added tests