Skip to content

Commit df77ae1

Browse files
trflynn89gmta
authored andcommitted
AK: Implement creating a UTF-16 string from a repeated code point
1 parent a46e9b2 commit df77ae1

File tree

5 files changed

+110
-0
lines changed

5 files changed

+110
-0
lines changed

β€ŽAK/StringBuilder.cppβ€Ž

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,28 @@ ErrorOr<void> StringBuilder::try_append_repeated(StringView string, size_t n)
193193
return {};
194194
}
195195

196+
ErrorOr<void> StringBuilder::try_append_repeated(Utf16View const& string, size_t n)
197+
{
198+
if (string.is_empty())
199+
return {};
200+
201+
if (m_mode == Mode::UTF8) {
202+
if (string.has_ascii_storage()) {
203+
TRY(will_append(string.length_in_code_units() * n));
204+
} else {
205+
auto utf8_length = simdutf::utf8_length_from_utf16(string.utf16_span().data(), string.length_in_code_units());
206+
TRY(will_append(utf8_length * n));
207+
}
208+
} else {
209+
TRY(will_append(string.length_in_code_units() * n * 2));
210+
}
211+
212+
for (size_t i = 0; i < n; ++i)
213+
TRY(try_append(string));
214+
215+
return {};
216+
}
217+
196218
void StringBuilder::append(StringView string)
197219
{
198220
MUST(try_append(string));
@@ -228,6 +250,11 @@ void StringBuilder::append_repeated(StringView string, size_t n)
228250
MUST(try_append_repeated(string, n));
229251
}
230252

253+
void StringBuilder::append_repeated(Utf16View const& string, size_t n)
254+
{
255+
MUST(try_append_repeated(string, n));
256+
}
257+
231258
ErrorOr<ByteBuffer> StringBuilder::to_byte_buffer() const
232259
{
233260
return ByteBuffer::copy(data(), length());

β€ŽAK/StringBuilder.hβ€Ž

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ class StringBuilder {
4545
ErrorOr<void> try_append(char const*, size_t);
4646
ErrorOr<void> try_append_repeated(char, size_t);
4747
ErrorOr<void> try_append_repeated(StringView, size_t);
48+
ErrorOr<void> try_append_repeated(Utf16View const&, size_t);
4849
ErrorOr<void> try_append_escaped_for_json(StringView);
4950

5051
template<typename... Parameters>
@@ -64,6 +65,7 @@ class StringBuilder {
6465
void appendvf(char const*, va_list);
6566
void append_repeated(char, size_t);
6667
void append_repeated(StringView, size_t);
68+
void append_repeated(Utf16View const&, size_t);
6769
void append_escaped_for_json(StringView);
6870
void append_as_lowercase(char);
6971

β€ŽAK/Utf16String.cppβ€Ž

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,30 @@ Utf16String Utf16String::from_string_builder_without_validation(StringBuilder& b
9090
return Utf16String { Detail::Utf16StringData::from_string_builder(builder) };
9191
}
9292

93+
Utf16String Utf16String::repeated(u32 code_point, size_t count)
94+
{
95+
if (count <= Detail::MAX_SHORT_STRING_BYTE_COUNT && AK::is_ascii(code_point)) {
96+
Utf16String string;
97+
string.m_value.short_ascii_string = Detail::ShortString::create_with_byte_count(count);
98+
99+
Bytes bytes { string.m_value.short_ascii_string.storage, count };
100+
bytes.fill(static_cast<u8>(code_point));
101+
102+
return string;
103+
}
104+
105+
Array<char16_t, 2> code_units;
106+
size_t length_in_code_units = 0;
107+
108+
(void)UnicodeUtils::code_point_to_utf16(code_point, [&](auto code_unit) {
109+
code_units[length_in_code_units++] = code_unit;
110+
});
111+
112+
StringBuilder builder(StringBuilder::Mode::UTF16);
113+
builder.append_repeated({ code_units.data(), length_in_code_units }, count);
114+
return builder.to_utf16_string();
115+
}
116+
93117
ErrorOr<void> Formatter<Utf16String>::format(FormatBuilder& builder, Utf16String const& utf16_string)
94118
{
95119
if (utf16_string.has_long_utf16_storage())

β€ŽAK/Utf16String.hβ€Ž

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ class [[nodiscard]] Utf16String : public Detail::Utf16StringBase {
125125
return builder.to_utf16_string();
126126
}
127127

128+
static Utf16String repeated(u32 code_point, size_t count);
129+
128130
ALWAYS_INLINE static Utf16String from_string_builder(Badge<StringBuilder>, StringBuilder& builder)
129131
{
130132
VERIFY(builder.utf16_string_view().validate());

β€ŽTests/AK/TestUtf16String.cppβ€Ž

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,61 @@ TEST_CASE(formatted)
356356
}
357357
}
358358

359+
TEST_CASE(repeated)
360+
{
361+
{
362+
auto string1 = Utf16String::repeated('a', 0);
363+
EXPECT(string1.is_empty());
364+
365+
auto string2 = Utf16String::repeated(0x03C9U, 0);
366+
EXPECT(string2.is_empty());
367+
368+
auto string3 = Utf16String::repeated(0x10300, 0);
369+
EXPECT(string3.is_empty());
370+
}
371+
{
372+
auto string1 = Utf16String::repeated('a', 1);
373+
EXPECT_EQ(string1.length_in_code_units(), 1uz);
374+
EXPECT_EQ(string1, u"a"sv);
375+
376+
auto string2 = Utf16String::repeated(0x03C9U, 1);
377+
EXPECT_EQ(string2.length_in_code_units(), 1uz);
378+
EXPECT_EQ(string2, u"Ο‰"sv);
379+
380+
auto string3 = Utf16String::repeated(0x10300, 1);
381+
EXPECT_EQ(string3.length_in_code_units(), 2uz);
382+
EXPECT_EQ(string3, u"πŒ€"sv);
383+
}
384+
{
385+
auto string1 = Utf16String::repeated('a', 3);
386+
EXPECT_EQ(string1.length_in_code_units(), 3uz);
387+
EXPECT_EQ(string1, u"aaa"sv);
388+
389+
auto string2 = Utf16String::repeated(0x03C9U, 3);
390+
EXPECT_EQ(string2.length_in_code_units(), 3uz);
391+
EXPECT_EQ(string2, u"ωωω"sv);
392+
393+
auto string3 = Utf16String::repeated(0x10300, 3);
394+
EXPECT_EQ(string3.length_in_code_units(), 6uz);
395+
EXPECT_EQ(string3, u"πŒ€πŒ€πŒ€"sv);
396+
}
397+
{
398+
auto string1 = Utf16String::repeated('a', 10);
399+
EXPECT_EQ(string1.length_in_code_units(), 10uz);
400+
EXPECT_EQ(string1, u"aaaaaaaaaa"sv);
401+
402+
auto string2 = Utf16String::repeated(0x03C9U, 10);
403+
EXPECT_EQ(string2.length_in_code_units(), 10uz);
404+
EXPECT_EQ(string2, u"ωωωωωωωωωω"sv);
405+
406+
auto string3 = Utf16String::repeated(0x10300, 10);
407+
EXPECT_EQ(string3.length_in_code_units(), 20uz);
408+
EXPECT_EQ(string3, u"πŒ€πŒ€πŒ€πŒ€πŒ€πŒ€πŒ€πŒ€πŒ€πŒ€"sv);
409+
}
410+
411+
EXPECT_DEATH("Creating a string from an invalid code point", (void)Utf16String::repeated(0xffffffff, 1));
412+
}
413+
359414
TEST_CASE(copy_operations)
360415
{
361416
auto test = [](Utf16String const& string1) {

0 commit comments

Comments
Β (0)