[libc] utf8 to 32 CharacterConverter #143973

sribee8 · 2025-06-12T20:59:23Z

Implemented push and pop for utf8 to 32 conversion and tests.

llvmbot · 2025-06-12T20:59:56Z

@llvm/pr-subscribers-libc

Author: None (sribee8)

Changes

Implemented push and pop for utf8 to 32 conversion and tests.

Full diff: https://github.com/llvm/llvm-project/pull/143973.diff

5 Files Affected:

(modified) libc/src/__support/wchar/character_converter.cpp (+69-5)
(modified) libc/src/__support/wchar/mbstate.h (+1-1)
(modified) libc/test/src/__support/CMakeLists.txt (+1)
(added) libc/test/src/__support/wchar/CMakeLists.txt (+11)
(added) libc/test/src/__support/wchar/utf8_to_32_test.cpp (+125)

diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp
index 3cdb8ca83b7f0..9c2fde3134837 100644
--- a/libc/src/__support/wchar/character_converter.cpp
+++ b/libc/src/__support/wchar/character_converter.cpp
@@ -18,15 +18,79 @@ namespace internal {
 
 CharacterConverter::CharacterConverter(mbstate *mbstate) { state = mbstate; }
 
-bool CharacterConverter::isComplete() {}
+bool CharacterConverter::isComplete() {
+  return state->bytes_processed == state->total_bytes;
+}
 
-int CharacterConverter::push(char8_t utf8_byte) {}
+int CharacterConverter::push(char8_t utf8_byte) {
+  // Checking the first byte if first push
+  if (state->bytes_processed == 0 && state->total_bytes == 0) {
+    // 1 byte total
+    if ((utf8_byte & 128) == 0) {
+      state->total_bytes = 1;
+      state->bytes_processed = 1;
+      state->partial = static_cast<char32_t>(utf8_byte);
+      return 0;
+    }
+    // 2 bytes total
+    else if ((utf8_byte & 0xE0) == 0xC0) {
+      state->total_bytes = 2;
+      state->bytes_processed = 1;
+      utf8_byte &= 0x1F;
+      state->partial = static_cast<char32_t>(utf8_byte);
+      return 0;
+    }
+    // 3 bytes total
+    else if ((utf8_byte & 0xF0) == 0xE0) {
+      state->total_bytes = 3;
+      state->bytes_processed = 1;
+      utf8_byte &= 0x0F;
+      state->partial = static_cast<char32_t>(utf8_byte);
+      return 0;
+    }
+    // 4 bytes total
+    else if ((utf8_byte & 0xF8) == 0xF0) {
+      state->total_bytes = 4;
+      state->bytes_processed = 1;
+      utf8_byte &= 0x07;
+      state->partial = static_cast<char32_t>(utf8_byte);
+      return 0;
+    }
+    // Invalid
+    else {
+        state->bytes_processed++;
+        return -1;
+    }
+  }
+  // Any subsequent push
+  if ((utf8_byte & 0xC0) == 0x80) {
+    state->partial = state->partial << 6;
+    char32_t byte = utf8_byte & 0x3F;
+    state->partial |= byte;
+    state->bytes_processed++;
+    return 0;
+  }
+  state->bytes_processed++;
+  return -1;
+}
 
-int CharacterConverter::push(char32_t utf32) {}
+int CharacterConverter::push(char32_t utf32) { 
+    return utf32; 
+}
 
-utf_ret<char8_t> CharacterConverter::pop_utf8() {}
+utf_ret<char8_t> CharacterConverter::pop_utf8() {
+  utf_ret<char8_t> utf8;
+  utf8.error = 0;
+  utf8.out = 0;
+  return utf8;
+}
 
-utf_ret<char32_t> CharacterConverter::pop_utf32() {}
+utf_ret<char32_t> CharacterConverter::pop_utf32() {
+  utf_ret<char32_t> utf32;
+  utf32.error = 0;
+  utf32.out = state->partial;
+  return utf32;
+}
 
 } // namespace internal
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/wchar/mbstate.h b/libc/src/__support/wchar/mbstate.h
index cb8950374de41..d33ee354a5443 100644
--- a/libc/src/__support/wchar/mbstate.h
+++ b/libc/src/__support/wchar/mbstate.h
@@ -18,7 +18,7 @@ namespace internal {
 
 struct mbstate {
   char32_t partial;
-  uint8_t bits_processed;
+  uint8_t bytes_processed;
   uint8_t total_bytes;
 };
 
diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt
index 4fb0dae86e5ca..8905ac2127620 100644
--- a/libc/test/src/__support/CMakeLists.txt
+++ b/libc/test/src/__support/CMakeLists.txt
@@ -275,3 +275,4 @@ add_subdirectory(fixed_point)
 add_subdirectory(HashTable)
 add_subdirectory(time)
 add_subdirectory(threads)
+add_subdirectory(wchar)
diff --git a/libc/test/src/__support/wchar/CMakeLists.txt b/libc/test/src/__support/wchar/CMakeLists.txt
new file mode 100644
index 0000000000000..cf8e615a4fd59
--- /dev/null
+++ b/libc/test/src/__support/wchar/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_custom_target(libc-support-wchar-tests)
+
+add_libc_test(
+  utf8_to_32_test 
+  SUITE
+    libc-support-tests
+  SRCS
+    utf8_to_32_test.cpp 
+  DEPENDS
+    libc.src.__support.wchar.character_converter
+)
\ No newline at end of file
diff --git a/libc/test/src/__support/wchar/utf8_to_32_test.cpp b/libc/test/src/__support/wchar/utf8_to_32_test.cpp
new file mode 100644
index 0000000000000..aef9cfc557549
--- /dev/null
+++ b/libc/test/src/__support/wchar/utf8_to_32_test.cpp
@@ -0,0 +1,125 @@
+//===-- Unittests for character_converter utf8->3 -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/wchar/character_converter.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/utf_ret.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcCharacterConverterUTF8To32Test, OneByte) {
+  LIBC_NAMESPACE::internal::mbstate state;
+  state.bytes_processed = 0;
+  state.total_bytes = 0;
+  char ch = 'A';
+
+  LIBC_NAMESPACE::internal::CharacterConverter char_conv(&state);
+  int err = char_conv.push(static_cast<char8_t>(ch));
+  LIBC_NAMESPACE::internal::utf_ret<char32_t> wch = char_conv.pop_utf32();
+
+  EXPECT_EQ(err, 0);
+  EXPECT_EQ(wch.error, 0);
+  EXPECT_EQ(static_cast<int>(wch.out), 65);
+}
+
+TEST(LlvmLibcCharacterConverterUTF8To32Test, TwoBytes) {
+  LIBC_NAMESPACE::internal::mbstate state;
+  state.bytes_processed = 0;
+  state.total_bytes = 0;
+  const char *ch = "�"; // hex 0xC2, 0x8E
+
+  LIBC_NAMESPACE::internal::CharacterConverter char_conv(&state);
+  char_conv.push(static_cast<char8_t>(ch[0]));
+  char_conv.push(static_cast<char8_t>(ch[1]));
+  LIBC_NAMESPACE::internal::utf_ret<char32_t> wch = char_conv.pop_utf32();
+
+  ASSERT_EQ(wch.error, 0);
+  ASSERT_EQ(static_cast<int>(wch.out), 142);
+}
+
+TEST(LlvmLibcCharacterConverterUTF8To32Test, ThreeBytes) {
+  LIBC_NAMESPACE::internal::mbstate state;
+  state.bytes_processed = 0;
+  state.total_bytes = 0;
+  const char *ch = "∑"; // hex 0xE2, 0x88, 0x91
+
+  LIBC_NAMESPACE::internal::CharacterConverter char_conv(&state);
+  char_conv.push(static_cast<char8_t>(ch[0]));
+  char_conv.push(static_cast<char8_t>(ch[1]));
+  char_conv.push(static_cast<char8_t>(ch[2]));
+  LIBC_NAMESPACE::internal::utf_ret<char32_t> wch = char_conv.pop_utf32();
+
+  ASSERT_EQ(wch.error, 0);
+  ASSERT_EQ(static_cast<int>(wch.out), 8721);
+}
+
+TEST(LlvmLibcCharacterConverterUTF8To32Test, FourBytes) {
+  LIBC_NAMESPACE::internal::mbstate state;
+  state.bytes_processed = 0;
+  state.total_bytes = 0;
+  const char *ch = "🤡"; // hex 0xF0, 0x9F, 0xA4, 0xA1
+
+  LIBC_NAMESPACE::internal::CharacterConverter char_conv(&state);
+  char_conv.push(static_cast<char8_t>(ch[0]));
+  char_conv.push(static_cast<char8_t>(ch[1]));
+  char_conv.push(static_cast<char8_t>(ch[2]));
+  char_conv.push(static_cast<char8_t>(ch[3]));
+  LIBC_NAMESPACE::internal::utf_ret<char32_t> wch = char_conv.pop_utf32();
+
+  ASSERT_EQ(wch.error, 0);
+  ASSERT_EQ(static_cast<int>(wch.out), 129313);
+}
+
+TEST(LlvmLibcCharacterConverterUTF8To32Test, InvalidByte) {
+  LIBC_NAMESPACE::internal::mbstate state;
+  state.bytes_processed = 0;
+  state.total_bytes = 0;
+  const char ch = static_cast<char>(0x80); // invalid starting bit sequence
+
+  LIBC_NAMESPACE::internal::CharacterConverter char_conv(&state);
+  int err = char_conv.push(static_cast<char8_t>(ch));
+
+  ASSERT_EQ(err, -1);
+}
+
+TEST(LlvmLibcCharacterConverterUTF8To32Test, InvalidMultiByte) {
+  LIBC_NAMESPACE::internal::mbstate state;
+  state.bytes_processed = 0;
+  state.total_bytes = 0;
+  const char ch[4] = {static_cast<char>(0x80), static_cast<char>(0x00),
+                      static_cast<char>(0x00),
+                      static_cast<char>(0x00)}; // All bytes are invalid
+
+  LIBC_NAMESPACE::internal::CharacterConverter char_conv(&state);
+  int err = char_conv.push(static_cast<char8_t>(ch[0]));
+  ASSERT_EQ(err, -1);
+  err = char_conv.push(static_cast<char8_t>(ch[1]));
+  ASSERT_EQ(err, -1);
+  err = char_conv.push(static_cast<char8_t>(ch[2]));
+  ASSERT_EQ(err, -1);
+  err = char_conv.push(static_cast<char8_t>(ch[3]));
+  ASSERT_EQ(err, -1);
+}
+
+TEST(LlvmLibcCharacterConverterUTF8To32Test, InvalidMiddleByte) {
+  LIBC_NAMESPACE::internal::mbstate state;
+  state.bytes_processed = 0;
+  state.total_bytes = 0;
+  const char ch[4] = {static_cast<char>(0xF1), static_cast<char>(0xC0),
+                      static_cast<char>(0x80),
+                      static_cast<char>(0x80)}; // invalid second byte
+
+  LIBC_NAMESPACE::internal::CharacterConverter char_conv(&state);
+  int err = char_conv.push(static_cast<char8_t>(ch[0]));
+  ASSERT_EQ(err, 0);
+  err = char_conv.push(static_cast<char8_t>(ch[1]));
+  ASSERT_EQ(err, -1);
+  err = char_conv.push(static_cast<char8_t>(ch[2]));
+  ASSERT_EQ(err, 0);
+  err = char_conv.push(static_cast<char8_t>(ch[3]));
+  ASSERT_EQ(err, 0);
+}

github-actions · 2025-06-12T21:01:45Z

✅ With the latest revision this PR passed the C/C++ code formatter.

Implemented push and pop for utf8 to 32 conversion and tests.

libc/src/__support/wchar/character_converter.cpp

libc/test/src/__support/wchar/CMakeLists.txt

libc/test/src/__support/wchar/utf8_to_32_test.cpp

libc/src/__support/wchar/character_converter.cpp

libc/test/src/__support/wchar/utf8_to_32_test.cpp

libc/src/__support/wchar/character_converter.cpp

libc/test/src/__support/wchar/utf8_to_32_test.cpp

libc/src/__support/wchar/character_converter.cpp

uzairnawaz

Looks mostly good to me!

libc/src/__support/wchar/character_converter.cpp

libc/test/src/__support/wchar/utf8_to_32_test.cpp

libc/src/__support/wchar/character_converter.cpp

michaelrj-google

for constants that are just simple numbers that can be known an compile time, it's better to use constexpr instead of const.

libc/src/__support/wchar/character_converter.cpp

michaelrj-google

pretty much done, just a couple style things

libc/src/__support/wchar/character_converter.cpp

libc/test/src/__support/wchar/utf8_to_32_test.cpp

brooksmoses

This generally looks good, though I have a number of detail-level comments....

libc/src/__support/wchar/character_converter.cpp

libc/src/__support/wchar/character_converter.h

brooksmoses · 2025-06-13T22:44:15Z

libc/test/src/__support/wchar/utf8_to_32_test.cpp

+  LIBC_NAMESPACE::internal::mbstate state;
+  state.bytes_processed = 0;
+  state.total_bytes = 0;
+  const char ch[2] = {static_cast<char>(0xC2),


Dumb question: Are these static casts required? I would have expected them to be implicit.

It is unfortunately not :( it wouldn't build without the casting

libc/test/src/__support/wchar/utf8_to_32_test.cpp

michaelrj-google

Minor nit, but I think this PR is basically done. After both this and the other half of the conversions land I think we should plan to have a cleanup patch to unify their implementations a bit (deduplicate shared constants, match on table vs bitshifts, etc.) but that can be done later.

libc/src/__support/wchar/character_converter.cpp

michaelrj-google

LGTM, you can merge once the presubmits are done

llvm-ci · 2025-06-16T22:22:04Z

LLVM Buildbot has detected a new failure on builder libc-x86_64-debian-gcc-fullbuild-dbg running on libc-x86_64-debian-fullbuild while building libc at step 4 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/131/builds/24134

Here is the relevant piece of the build log for the reference

Step 4 (annotate) failure: 'python ../llvm-zorg/zorg/buildbot/builders/annotated/libc-linux.py ...' (failure)
...
-- Build files have been written to: /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/build
@@@BUILD_STEP build libc@@@
Running: ninja libc
ninja: no work to do.
@@@BUILD_STEP build libc-startup@@@
Running: ninja libc-startup
ninja: no work to do.
@@@BUILD_STEP libc-unit-tests@@@
Running: ninja libc-unit-tests
[1/1192] Building CXX object libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o
FAILED: libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o 
/usr/bin/g++ -DLIBC_NAMESPACE=__llvm_libc_20_0_0_git -D_DEBUG -I/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc -isystem /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/build/libc/include -fvisibility-inlines-hidden -Werror=date-time -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -Wimplicit-fallthrough -Wno-nonnull -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment -Wno-misleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -g -DLIBC_QSORT_IMPL=LIBC_QSORT_QUICK_SORT -DLIBC_ADD_NULL_CHECKS -fpie -ffreestanding -DLIBC_FULL_BUILD -isystem/usr/lib/gcc/x86_64-linux-gnu/12//include -nostdinc -idirafter/usr/include -fno-builtin -fno-exceptions -fno-lax-vector-conversions -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-rtti -ftrivial-auto-var-init=pattern -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Wall -Wextra -Werror -Wconversion -Wno-sign-conversion -Wdeprecated -fext-numeric-literals -Wno-pedantic -Wimplicit-fallthrough -Wwrite-strings -Wextra-semi -std=gnu++17 -MD -MT libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o -MF libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o.d -o libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o -c /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.cpp
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.cpp: In member function ‘int __llvm_libc_20_0_0_git::internal::CharacterConverter::push(char8_t)’:
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.cpp:49:17: error: conversion from ‘uint32_t’ {aka ‘unsigned int’} to ‘char8_t’ {aka ‘unsigned char’} may change value [-Werror=conversion]
   49 |       utf8_byte &= (base_mask >> num_ones);
      |       ~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~
cc1plus: all warnings being treated as errors
[2/1192] Running unit test libc.test.src.__support.block_test.__unit__
[==========] Running 23 tests from 1 test suite.
[ RUN      ] LlvmLibcBlockTest.CanCreateSingleAlignedBlock
[       OK ] LlvmLibcBlockTest.CanCreateSingleAlignedBlock (9 us)
[ RUN      ] LlvmLibcBlockTest.CanCreateUnalignedSingleBlock
[       OK ] LlvmLibcBlockTest.CanCreateUnalignedSingleBlock (2 us)
[ RUN      ] LlvmLibcBlockTest.CannotCreateTooSmallBlock
[       OK ] LlvmLibcBlockTest.CannotCreateTooSmallBlock (2 us)
[ RUN      ] LlvmLibcBlockTest.CanSplitBlock
[       OK ] LlvmLibcBlockTest.CanSplitBlock (4 us)
[ RUN      ] LlvmLibcBlockTest.CanSplitBlockUnaligned
[       OK ] LlvmLibcBlockTest.CanSplitBlockUnaligned (15 us)
[ RUN      ] LlvmLibcBlockTest.CanSplitMidBlock
[       OK ] LlvmLibcBlockTest.CanSplitMidBlock (8 us)
[ RUN      ] LlvmLibcBlockTest.CannotSplitTooSmallBlock
[       OK ] LlvmLibcBlockTest.CannotSplitTooSmallBlock (2 us)
[ RUN      ] LlvmLibcBlockTest.CannotSplitBlockWithoutHeaderSpace
[       OK ] LlvmLibcBlockTest.CannotSplitBlockWithoutHeaderSpace (2 us)
[ RUN      ] LlvmLibcBlockTest.CannotMakeBlockLargerInSplit
[       OK ] LlvmLibcBlockTest.CannotMakeBlockLargerInSplit (2 us)
[ RUN      ] LlvmLibcBlockTest.CanMakeMinimalSizeFirstBlock
[       OK ] LlvmLibcBlockTest.CanMakeMinimalSizeFirstBlock (4 us)
[ RUN      ] LlvmLibcBlockTest.CanMakeMinimalSizeSecondBlock
[       OK ] LlvmLibcBlockTest.CanMakeMinimalSizeSecondBlock (2 us)
[ RUN      ] LlvmLibcBlockTest.CanMarkBlockUsed
[       OK ] LlvmLibcBlockTest.CanMarkBlockUsed (3 us)
[ RUN      ] LlvmLibcBlockTest.CannotSplitUsedBlock
[       OK ] LlvmLibcBlockTest.CannotSplitUsedBlock (2 us)
[ RUN      ] LlvmLibcBlockTest.CanMergeWithNextBlock
[       OK ] LlvmLibcBlockTest.CanMergeWithNextBlock (3 us)
[ RUN      ] LlvmLibcBlockTest.CannotMergeWithFirstOrLastBlock
[       OK ] LlvmLibcBlockTest.CannotMergeWithFirstOrLastBlock (3 us)
Step 8 (libc-unit-tests) failure: libc-unit-tests (failure)
@@@BUILD_STEP libc-unit-tests@@@
Running: ninja libc-unit-tests
[1/1192] Building CXX object libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o
FAILED: libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o 
/usr/bin/g++ -DLIBC_NAMESPACE=__llvm_libc_20_0_0_git -D_DEBUG -I/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc -isystem /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/build/libc/include -fvisibility-inlines-hidden -Werror=date-time -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -Wimplicit-fallthrough -Wno-nonnull -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment -Wno-misleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -g -DLIBC_QSORT_IMPL=LIBC_QSORT_QUICK_SORT -DLIBC_ADD_NULL_CHECKS -fpie -ffreestanding -DLIBC_FULL_BUILD -isystem/usr/lib/gcc/x86_64-linux-gnu/12//include -nostdinc -idirafter/usr/include -fno-builtin -fno-exceptions -fno-lax-vector-conversions -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-rtti -ftrivial-auto-var-init=pattern -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Wall -Wextra -Werror -Wconversion -Wno-sign-conversion -Wdeprecated -fext-numeric-literals -Wno-pedantic -Wimplicit-fallthrough -Wwrite-strings -Wextra-semi -std=gnu++17 -MD -MT libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o -MF libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o.d -o libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o -c /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.cpp
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.cpp: In member function ‘int __llvm_libc_20_0_0_git::internal::CharacterConverter::push(char8_t)’:
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.cpp:49:17: error: conversion from ‘uint32_t’ {aka ‘unsigned int’} to ‘char8_t’ {aka ‘unsigned char’} may change value [-Werror=conversion]
   49 |       utf8_byte &= (base_mask >> num_ones);
      |       ~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~
cc1plus: all warnings being treated as errors
[2/1192] Running unit test libc.test.src.__support.block_test.__unit__
[==========] Running 23 tests from 1 test suite.
[ RUN      ] LlvmLibcBlockTest.CanCreateSingleAlignedBlock
[       OK ] LlvmLibcBlockTest.CanCreateSingleAlignedBlock (9 us)
[ RUN      ] LlvmLibcBlockTest.CanCreateUnalignedSingleBlock
[       OK ] LlvmLibcBlockTest.CanCreateUnalignedSingleBlock (2 us)
[ RUN      ] LlvmLibcBlockTest.CannotCreateTooSmallBlock
[       OK ] LlvmLibcBlockTest.CannotCreateTooSmallBlock (2 us)
[ RUN      ] LlvmLibcBlockTest.CanSplitBlock
[       OK ] LlvmLibcBlockTest.CanSplitBlock (4 us)
[ RUN      ] LlvmLibcBlockTest.CanSplitBlockUnaligned
[       OK ] LlvmLibcBlockTest.CanSplitBlockUnaligned (15 us)
[ RUN      ] LlvmLibcBlockTest.CanSplitMidBlock
[       OK ] LlvmLibcBlockTest.CanSplitMidBlock (8 us)
[ RUN      ] LlvmLibcBlockTest.CannotSplitTooSmallBlock
[       OK ] LlvmLibcBlockTest.CannotSplitTooSmallBlock (2 us)
[ RUN      ] LlvmLibcBlockTest.CannotSplitBlockWithoutHeaderSpace
[       OK ] LlvmLibcBlockTest.CannotSplitBlockWithoutHeaderSpace (2 us)
[ RUN      ] LlvmLibcBlockTest.CannotMakeBlockLargerInSplit
[       OK ] LlvmLibcBlockTest.CannotMakeBlockLargerInSplit (2 us)
[ RUN      ] LlvmLibcBlockTest.CanMakeMinimalSizeFirstBlock
[       OK ] LlvmLibcBlockTest.CanMakeMinimalSizeFirstBlock (4 us)
[ RUN      ] LlvmLibcBlockTest.CanMakeMinimalSizeSecondBlock
[       OK ] LlvmLibcBlockTest.CanMakeMinimalSizeSecondBlock (2 us)
[ RUN      ] LlvmLibcBlockTest.CanMarkBlockUsed
[       OK ] LlvmLibcBlockTest.CanMarkBlockUsed (3 us)
[ RUN      ] LlvmLibcBlockTest.CannotSplitUsedBlock
[       OK ] LlvmLibcBlockTest.CannotSplitUsedBlock (2 us)
[ RUN      ] LlvmLibcBlockTest.CanMergeWithNextBlock
[       OK ] LlvmLibcBlockTest.CanMergeWithNextBlock (3 us)
[ RUN      ] LlvmLibcBlockTest.CannotMergeWithFirstOrLastBlock
[       OK ] LlvmLibcBlockTest.CannotMergeWithFirstOrLastBlock (3 us)
[ RUN      ] LlvmLibcBlockTest.CannotMergeUsedBlock
[       OK ] LlvmLibcBlockTest.CannotMergeUsedBlock (3 us)
[ RUN      ] LlvmLibcBlockTest.CanGetBlockFromUsableSpace
[       OK ] LlvmLibcBlockTest.CanGetBlockFromUsableSpace (2 us)
[ RUN      ] LlvmLibcBlockTest.CanGetConstBlockFromUsableSpace
[       OK ] LlvmLibcBlockTest.CanGetConstBlockFromUsableSpace (2 us)
[ RUN      ] LlvmLibcBlockTest.Allocate
[       OK ] LlvmLibcBlockTest.Allocate (1 ms)

This reverts commit 98eee4b.

Reverts #143973 This merge broke the build and I'm currently looking into the issue to fix it.

Reverts llvm/llvm-project#143973 This merge broke the build and I'm currently looking into the issue to fix it.

llvmbot added the libc label Jun 12, 2025

[libc] CharacterConverter utf8 to 32 push and pop

9561ab5

Implemented push and pop for utf8 to 32 conversion and tests.

sribee8 force-pushed the utf8-32-character-converter branch from 3e18ffb to 9561ab5 Compare June 12, 2025 21:03

sribee8 changed the title ~~Utf8-32-character-converter~~ [libc]Utf8-32-character-converter Jun 12, 2025

Sriya Pratipati added 2 commits June 12, 2025 21:05

fixed formatting

54c64e0

Changed cmakelists for tests since MacOS does not have uchar header

a82f9ee

sribee8 changed the title ~~[libc]Utf8-32-character-converter~~ [libc] utf8 to 32 CharacterConverter Jun 12, 2025

Sriya Pratipati and others added 2 commits June 12, 2025 21:42

Deleted unused functions

4087711

Merge branch 'main' into utf8-32-character-converter

84913d8

sribee8 requested a review from uzairnawaz June 12, 2025 22:08

michaelrj-google reviewed Jun 12, 2025

View reviewed changes

uzairnawaz reviewed Jun 12, 2025

View reviewed changes

Sriya Pratipati added 2 commits June 12, 2025 23:09

Cleaned up code, added edge cases, added new test cases for edge cases

20cd8e5

Fixed invalid pop and added test

a188c4b

uzairnawaz reviewed Jun 12, 2025

View reviewed changes

libc/src/__support/wchar/character_converter.cpp Outdated Show resolved Hide resolved

libc/test/src/__support/wchar/utf8_to_32_test.cpp Outdated Show resolved Hide resolved

cleaned up code

9535c5b

michaelrj-google reviewed Jun 12, 2025

View reviewed changes

libc/src/__support/wchar/character_converter.cpp Outdated Show resolved Hide resolved

Sriya Pratipati added 2 commits June 12, 2025 23:56

Changed return type to error_or for pop

82bff49

fixed formatting

b1e17ad

uzairnawaz reviewed Jun 13, 2025

View reviewed changes

libc/src/__support/wchar/character_converter.cpp Outdated Show resolved Hide resolved

libc/src/__support/wchar/character_converter.cpp Outdated Show resolved Hide resolved

Sriya Pratipati added 2 commits June 13, 2025 16:30

code cleanup

3ad6866

fixed formatting

3323d0d

michaelrj-google reviewed Jun 13, 2025

View reviewed changes

libc/src/__support/wchar/character_converter.cpp Outdated Show resolved Hide resolved

libc/src/__support/wchar/character_converter.cpp Outdated Show resolved Hide resolved

libc/test/src/__support/wchar/utf8_to_32_test.cpp Outdated Show resolved Hide resolved

Cleaned up pushing first byte and some readability changes

19fabd3

michaelrj-google reviewed Jun 13, 2025

View reviewed changes

libc/src/__support/wchar/character_converter.cpp Outdated Show resolved Hide resolved

changed variable name

b46a878

michaelrj-google reviewed Jun 13, 2025

View reviewed changes

libc/src/__support/wchar/character_converter.cpp Outdated Show resolved Hide resolved

removed magic numbers

077525a

michaelrj-google reviewed Jun 13, 2025

View reviewed changes

libc/src/__support/wchar/character_converter.cpp Outdated Show resolved Hide resolved

Sriya Pratipati added 2 commits June 13, 2025 18:08

renamed variable and const -> constexpr

80d3a83

fixed formatting

4f59bc7

michaelrj-google reviewed Jun 13, 2025

View reviewed changes

libc/src/__support/wchar/character_converter.cpp Outdated Show resolved Hide resolved

libc/test/src/__support/wchar/utf8_to_32_test.cpp Outdated Show resolved Hide resolved

libc/test/src/__support/wchar/utf8_to_32_test.cpp Outdated Show resolved Hide resolved

style fixes

60db657

brooksmoses reviewed Jun 13, 2025

View reviewed changes

brooksmoses mentioned this pull request Jun 13, 2025

[libc] Implemented CharacterConverter push/pop for utf32->utf8 conversions #143971

Merged

style and detail fixes

25ab213

michaelrj-google reviewed Jun 16, 2025

View reviewed changes

libc/src/__support/wchar/character_converter.cpp Outdated Show resolved Hide resolved

Sriya Pratipati added 2 commits June 16, 2025 18:28

cleaned up comment

72db39d

removed unnecessary check

0a5f434

michaelrj-google approved these changes Jun 16, 2025

View reviewed changes

sribee8 and others added 3 commits June 16, 2025 13:16

Merge branch 'main' into utf8-32-character-converter

039429d

formatting fix

8b2e2a9

removed unused include

50d690b

sribee8 merged commit 98eee4b into llvm:main Jun 16, 2025
12 of 13 checks passed

sribee8 added a commit that referenced this pull request Jun 16, 2025

Revert "[libc] utf8 to 32 CharacterConverter (#143973)"

d7fecc0

This reverts commit 98eee4b.

sribee8 mentioned this pull request Jun 16, 2025

Revert "[libc] utf8 to 32 CharacterConverter" #144446

Merged

sribee8 added a commit that referenced this pull request Jun 16, 2025

Revert "[libc] utf8 to 32 CharacterConverter" (#144446)

6e12442

Reverts #143973 This merge broke the build and I'm currently looking into the issue to fix it.

llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Jun 16, 2025

Automerge: Revert "[libc] utf8 to 32 CharacterConverter" (#144446)

1004b70

Reverts llvm/llvm-project#143973 This merge broke the build and I'm currently looking into the issue to fix it.

[libc] utf8 to 32 CharacterConverter #143973

[libc] utf8 to 32 CharacterConverter #143973

Uh oh!

Conversation

sribee8 commented Jun 12, 2025

Uh oh!

llvmbot commented Jun 12, 2025

Uh oh!

github-actions bot commented Jun 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

uzairnawaz left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

michaelrj-google left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

michaelrj-google left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

brooksmoses left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

brooksmoses Jun 13, 2025

Choose a reason for hiding this comment

Uh oh!

sribee8 Jun 13, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

michaelrj-google left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

michaelrj-google left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Jun 16, 2025

Uh oh!

Uh oh!

github-actions bot commented Jun 12, 2025 •

edited

Loading