Skip to content

[libc] utf8 to 32 CharacterConverter #143973

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Jun 16, 2025

Conversation

sribee8
Copy link
Contributor

@sribee8 sribee8 commented Jun 12, 2025

Implemented push and pop for utf8 to 32 conversion and tests.

@llvmbot llvmbot added the libc label Jun 12, 2025
@llvmbot
Copy link
Member

llvmbot commented Jun 12, 2025

@llvm/pr-subscribers-libc

Author: None (sribee8)

Changes

Implemented push and pop for utf8 to 32 conversion and tests.


Full diff: https://github.com/llvm/llvm-project/pull/143973.diff

5 Files Affected:

  • (modified) libc/src/__support/wchar/character_converter.cpp (+69-5)
  • (modified) libc/src/__support/wchar/mbstate.h (+1-1)
  • (modified) libc/test/src/__support/CMakeLists.txt (+1)
  • (added) libc/test/src/__support/wchar/CMakeLists.txt (+11)
  • (added) libc/test/src/__support/wchar/utf8_to_32_test.cpp (+125)
diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp
index 3cdb8ca83b7f0..9c2fde3134837 100644
--- a/libc/src/__support/wchar/character_converter.cpp
+++ b/libc/src/__support/wchar/character_converter.cpp
@@ -18,15 +18,79 @@ namespace internal {
 
 CharacterConverter::CharacterConverter(mbstate *mbstate) { state = mbstate; }
 
-bool CharacterConverter::isComplete() {}
+bool CharacterConverter::isComplete() {
+  return state->bytes_processed == state->total_bytes;
+}
 
-int CharacterConverter::push(char8_t utf8_byte) {}
+int CharacterConverter::push(char8_t utf8_byte) {
+  // Checking the first byte if first push
+  if (state->bytes_processed == 0 && state->total_bytes == 0) {
+    // 1 byte total
+    if ((utf8_byte & 128) == 0) {
+      state->total_bytes = 1;
+      state->bytes_processed = 1;
+      state->partial = static_cast<char32_t>(utf8_byte);
+      return 0;
+    }
+    // 2 bytes total
+    else if ((utf8_byte & 0xE0) == 0xC0) {
+      state->total_bytes = 2;
+      state->bytes_processed = 1;
+      utf8_byte &= 0x1F;
+      state->partial = static_cast<char32_t>(utf8_byte);
+      return 0;
+    }
+    // 3 bytes total
+    else if ((utf8_byte & 0xF0) == 0xE0) {
+      state->total_bytes = 3;
+      state->bytes_processed = 1;
+      utf8_byte &= 0x0F;
+      state->partial = static_cast<char32_t>(utf8_byte);
+      return 0;
+    }
+    // 4 bytes total
+    else if ((utf8_byte & 0xF8) == 0xF0) {
+      state->total_bytes = 4;
+      state->bytes_processed = 1;
+      utf8_byte &= 0x07;
+      state->partial = static_cast<char32_t>(utf8_byte);
+      return 0;
+    }
+    // Invalid
+    else {
+        state->bytes_processed++;
+        return -1;
+    }
+  }
+  // Any subsequent push
+  if ((utf8_byte & 0xC0) == 0x80) {
+    state->partial = state->partial << 6;
+    char32_t byte = utf8_byte & 0x3F;
+    state->partial |= byte;
+    state->bytes_processed++;
+    return 0;
+  }
+  state->bytes_processed++;
+  return -1;
+}
 
-int CharacterConverter::push(char32_t utf32) {}
+int CharacterConverter::push(char32_t utf32) { 
+    return utf32; 
+}
 
-utf_ret<char8_t> CharacterConverter::pop_utf8() {}
+utf_ret<char8_t> CharacterConverter::pop_utf8() {
+  utf_ret<char8_t> utf8;
+  utf8.error = 0;
+  utf8.out = 0;
+  return utf8;
+}
 
-utf_ret<char32_t> CharacterConverter::pop_utf32() {}
+utf_ret<char32_t> CharacterConverter::pop_utf32() {
+  utf_ret<char32_t> utf32;
+  utf32.error = 0;
+  utf32.out = state->partial;
+  return utf32;
+}
 
 } // namespace internal
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/wchar/mbstate.h b/libc/src/__support/wchar/mbstate.h
index cb8950374de41..d33ee354a5443 100644
--- a/libc/src/__support/wchar/mbstate.h
+++ b/libc/src/__support/wchar/mbstate.h
@@ -18,7 +18,7 @@ namespace internal {
 
 struct mbstate {
   char32_t partial;
-  uint8_t bits_processed;
+  uint8_t bytes_processed;
   uint8_t total_bytes;
 };
 
diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt
index 4fb0dae86e5ca..8905ac2127620 100644
--- a/libc/test/src/__support/CMakeLists.txt
+++ b/libc/test/src/__support/CMakeLists.txt
@@ -275,3 +275,4 @@ add_subdirectory(fixed_point)
 add_subdirectory(HashTable)
 add_subdirectory(time)
 add_subdirectory(threads)
+add_subdirectory(wchar)
diff --git a/libc/test/src/__support/wchar/CMakeLists.txt b/libc/test/src/__support/wchar/CMakeLists.txt
new file mode 100644
index 0000000000000..cf8e615a4fd59
--- /dev/null
+++ b/libc/test/src/__support/wchar/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_custom_target(libc-support-wchar-tests)
+
+add_libc_test(
+  utf8_to_32_test 
+  SUITE
+    libc-support-tests
+  SRCS
+    utf8_to_32_test.cpp 
+  DEPENDS
+    libc.src.__support.wchar.character_converter
+)
\ No newline at end of file
diff --git a/libc/test/src/__support/wchar/utf8_to_32_test.cpp b/libc/test/src/__support/wchar/utf8_to_32_test.cpp
new file mode 100644
index 0000000000000..aef9cfc557549
--- /dev/null
+++ b/libc/test/src/__support/wchar/utf8_to_32_test.cpp
@@ -0,0 +1,125 @@
+//===-- Unittests for character_converter utf8->3 -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/wchar/character_converter.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/utf_ret.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcCharacterConverterUTF8To32Test, OneByte) {
+  LIBC_NAMESPACE::internal::mbstate state;
+  state.bytes_processed = 0;
+  state.total_bytes = 0;
+  char ch = 'A';
+
+  LIBC_NAMESPACE::internal::CharacterConverter char_conv(&state);
+  int err = char_conv.push(static_cast<char8_t>(ch));
+  LIBC_NAMESPACE::internal::utf_ret<char32_t> wch = char_conv.pop_utf32();
+
+  EXPECT_EQ(err, 0);
+  EXPECT_EQ(wch.error, 0);
+  EXPECT_EQ(static_cast<int>(wch.out), 65);
+}
+
+TEST(LlvmLibcCharacterConverterUTF8To32Test, TwoBytes) {
+  LIBC_NAMESPACE::internal::mbstate state;
+  state.bytes_processed = 0;
+  state.total_bytes = 0;
+  const char *ch = "�"; // hex 0xC2, 0x8E
+
+  LIBC_NAMESPACE::internal::CharacterConverter char_conv(&state);
+  char_conv.push(static_cast<char8_t>(ch[0]));
+  char_conv.push(static_cast<char8_t>(ch[1]));
+  LIBC_NAMESPACE::internal::utf_ret<char32_t> wch = char_conv.pop_utf32();
+
+  ASSERT_EQ(wch.error, 0);
+  ASSERT_EQ(static_cast<int>(wch.out), 142);
+}
+
+TEST(LlvmLibcCharacterConverterUTF8To32Test, ThreeBytes) {
+  LIBC_NAMESPACE::internal::mbstate state;
+  state.bytes_processed = 0;
+  state.total_bytes = 0;
+  const char *ch = "∑"; // hex 0xE2, 0x88, 0x91
+
+  LIBC_NAMESPACE::internal::CharacterConverter char_conv(&state);
+  char_conv.push(static_cast<char8_t>(ch[0]));
+  char_conv.push(static_cast<char8_t>(ch[1]));
+  char_conv.push(static_cast<char8_t>(ch[2]));
+  LIBC_NAMESPACE::internal::utf_ret<char32_t> wch = char_conv.pop_utf32();
+
+  ASSERT_EQ(wch.error, 0);
+  ASSERT_EQ(static_cast<int>(wch.out), 8721);
+}
+
+TEST(LlvmLibcCharacterConverterUTF8To32Test, FourBytes) {
+  LIBC_NAMESPACE::internal::mbstate state;
+  state.bytes_processed = 0;
+  state.total_bytes = 0;
+  const char *ch = "🤡"; // hex 0xF0, 0x9F, 0xA4, 0xA1
+
+  LIBC_NAMESPACE::internal::CharacterConverter char_conv(&state);
+  char_conv.push(static_cast<char8_t>(ch[0]));
+  char_conv.push(static_cast<char8_t>(ch[1]));
+  char_conv.push(static_cast<char8_t>(ch[2]));
+  char_conv.push(static_cast<char8_t>(ch[3]));
+  LIBC_NAMESPACE::internal::utf_ret<char32_t> wch = char_conv.pop_utf32();
+
+  ASSERT_EQ(wch.error, 0);
+  ASSERT_EQ(static_cast<int>(wch.out), 129313);
+}
+
+TEST(LlvmLibcCharacterConverterUTF8To32Test, InvalidByte) {
+  LIBC_NAMESPACE::internal::mbstate state;
+  state.bytes_processed = 0;
+  state.total_bytes = 0;
+  const char ch = static_cast<char>(0x80); // invalid starting bit sequence
+
+  LIBC_NAMESPACE::internal::CharacterConverter char_conv(&state);
+  int err = char_conv.push(static_cast<char8_t>(ch));
+
+  ASSERT_EQ(err, -1);
+}
+
+TEST(LlvmLibcCharacterConverterUTF8To32Test, InvalidMultiByte) {
+  LIBC_NAMESPACE::internal::mbstate state;
+  state.bytes_processed = 0;
+  state.total_bytes = 0;
+  const char ch[4] = {static_cast<char>(0x80), static_cast<char>(0x00),
+                      static_cast<char>(0x00),
+                      static_cast<char>(0x00)}; // All bytes are invalid
+
+  LIBC_NAMESPACE::internal::CharacterConverter char_conv(&state);
+  int err = char_conv.push(static_cast<char8_t>(ch[0]));
+  ASSERT_EQ(err, -1);
+  err = char_conv.push(static_cast<char8_t>(ch[1]));
+  ASSERT_EQ(err, -1);
+  err = char_conv.push(static_cast<char8_t>(ch[2]));
+  ASSERT_EQ(err, -1);
+  err = char_conv.push(static_cast<char8_t>(ch[3]));
+  ASSERT_EQ(err, -1);
+}
+
+TEST(LlvmLibcCharacterConverterUTF8To32Test, InvalidMiddleByte) {
+  LIBC_NAMESPACE::internal::mbstate state;
+  state.bytes_processed = 0;
+  state.total_bytes = 0;
+  const char ch[4] = {static_cast<char>(0xF1), static_cast<char>(0xC0),
+                      static_cast<char>(0x80),
+                      static_cast<char>(0x80)}; // invalid second byte
+
+  LIBC_NAMESPACE::internal::CharacterConverter char_conv(&state);
+  int err = char_conv.push(static_cast<char8_t>(ch[0]));
+  ASSERT_EQ(err, 0);
+  err = char_conv.push(static_cast<char8_t>(ch[1]));
+  ASSERT_EQ(err, -1);
+  err = char_conv.push(static_cast<char8_t>(ch[2]));
+  ASSERT_EQ(err, 0);
+  err = char_conv.push(static_cast<char8_t>(ch[3]));
+  ASSERT_EQ(err, 0);
+}

Copy link

github-actions bot commented Jun 12, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

Implemented push and pop for utf8 to 32 conversion and tests.
@sribee8 sribee8 force-pushed the utf8-32-character-converter branch from 3e18ffb to 9561ab5 Compare June 12, 2025 21:03
@sribee8 sribee8 changed the title Utf8-32-character-converter [libc]Utf8-32-character-converter Jun 12, 2025
@sribee8 sribee8 changed the title [libc]Utf8-32-character-converter [libc] utf8 to 32 CharacterConverter Jun 12, 2025
@sribee8 sribee8 requested a review from uzairnawaz June 12, 2025 22:08
Copy link
Contributor

@uzairnawaz uzairnawaz left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks mostly good to me!

Copy link
Contributor

@michaelrj-google michaelrj-google left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for constants that are just simple numbers that can be known an compile time, it's better to use constexpr instead of const.

Copy link
Contributor

@michaelrj-google michaelrj-google left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pretty much done, just a couple style things

Copy link

@brooksmoses brooksmoses left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This generally looks good, though I have a number of detail-level comments....

LIBC_NAMESPACE::internal::mbstate state;
state.bytes_processed = 0;
state.total_bytes = 0;
const char ch[2] = {static_cast<char>(0xC2),

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dumb question: Are these static casts required? I would have expected them to be implicit.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is unfortunately not :( it wouldn't build without the casting

Copy link
Contributor

@michaelrj-google michaelrj-google left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor nit, but I think this PR is basically done. After both this and the other half of the conversions land I think we should plan to have a cleanup patch to unify their implementations a bit (deduplicate shared constants, match on table vs bitshifts, etc.) but that can be done later.

Copy link
Contributor

@michaelrj-google michaelrj-google left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, you can merge once the presubmits are done

@sribee8 sribee8 merged commit 98eee4b into llvm:main Jun 16, 2025
12 of 13 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jun 16, 2025

LLVM Buildbot has detected a new failure on builder libc-x86_64-debian-gcc-fullbuild-dbg running on libc-x86_64-debian-fullbuild while building libc at step 4 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/131/builds/24134

Here is the relevant piece of the build log for the reference
Step 4 (annotate) failure: 'python ../llvm-zorg/zorg/buildbot/builders/annotated/libc-linux.py ...' (failure)
...
-- Build files have been written to: /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/build
@@@BUILD_STEP build libc@@@
Running: ninja libc
ninja: no work to do.
@@@BUILD_STEP build libc-startup@@@
Running: ninja libc-startup
ninja: no work to do.
@@@BUILD_STEP libc-unit-tests@@@
Running: ninja libc-unit-tests
[1/1192] Building CXX object libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o
FAILED: libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o 
/usr/bin/g++ -DLIBC_NAMESPACE=__llvm_libc_20_0_0_git -D_DEBUG -I/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc -isystem /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/build/libc/include -fvisibility-inlines-hidden -Werror=date-time -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -Wimplicit-fallthrough -Wno-nonnull -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment -Wno-misleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -g -DLIBC_QSORT_IMPL=LIBC_QSORT_QUICK_SORT -DLIBC_ADD_NULL_CHECKS -fpie -ffreestanding -DLIBC_FULL_BUILD -isystem/usr/lib/gcc/x86_64-linux-gnu/12//include -nostdinc -idirafter/usr/include -fno-builtin -fno-exceptions -fno-lax-vector-conversions -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-rtti -ftrivial-auto-var-init=pattern -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Wall -Wextra -Werror -Wconversion -Wno-sign-conversion -Wdeprecated -fext-numeric-literals -Wno-pedantic -Wimplicit-fallthrough -Wwrite-strings -Wextra-semi -std=gnu++17 -MD -MT libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o -MF libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o.d -o libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o -c /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.cpp
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.cpp: In member function ‘int __llvm_libc_20_0_0_git::internal::CharacterConverter::push(char8_t)’:
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.cpp:49:17: error: conversion from ‘uint32_t’ {aka ‘unsigned int’} to ‘char8_t’ {aka ‘unsigned char’} may change value [-Werror=conversion]
   49 |       utf8_byte &= (base_mask >> num_ones);
      |       ~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~
cc1plus: all warnings being treated as errors
[2/1192] Running unit test libc.test.src.__support.block_test.__unit__
[==========] Running 23 tests from 1 test suite.
[ RUN      ] LlvmLibcBlockTest.CanCreateSingleAlignedBlock
[       OK ] LlvmLibcBlockTest.CanCreateSingleAlignedBlock (9 us)
[ RUN      ] LlvmLibcBlockTest.CanCreateUnalignedSingleBlock
[       OK ] LlvmLibcBlockTest.CanCreateUnalignedSingleBlock (2 us)
[ RUN      ] LlvmLibcBlockTest.CannotCreateTooSmallBlock
[       OK ] LlvmLibcBlockTest.CannotCreateTooSmallBlock (2 us)
[ RUN      ] LlvmLibcBlockTest.CanSplitBlock
[       OK ] LlvmLibcBlockTest.CanSplitBlock (4 us)
[ RUN      ] LlvmLibcBlockTest.CanSplitBlockUnaligned
[       OK ] LlvmLibcBlockTest.CanSplitBlockUnaligned (15 us)
[ RUN      ] LlvmLibcBlockTest.CanSplitMidBlock
[       OK ] LlvmLibcBlockTest.CanSplitMidBlock (8 us)
[ RUN      ] LlvmLibcBlockTest.CannotSplitTooSmallBlock
[       OK ] LlvmLibcBlockTest.CannotSplitTooSmallBlock (2 us)
[ RUN      ] LlvmLibcBlockTest.CannotSplitBlockWithoutHeaderSpace
[       OK ] LlvmLibcBlockTest.CannotSplitBlockWithoutHeaderSpace (2 us)
[ RUN      ] LlvmLibcBlockTest.CannotMakeBlockLargerInSplit
[       OK ] LlvmLibcBlockTest.CannotMakeBlockLargerInSplit (2 us)
[ RUN      ] LlvmLibcBlockTest.CanMakeMinimalSizeFirstBlock
[       OK ] LlvmLibcBlockTest.CanMakeMinimalSizeFirstBlock (4 us)
[ RUN      ] LlvmLibcBlockTest.CanMakeMinimalSizeSecondBlock
[       OK ] LlvmLibcBlockTest.CanMakeMinimalSizeSecondBlock (2 us)
[ RUN      ] LlvmLibcBlockTest.CanMarkBlockUsed
[       OK ] LlvmLibcBlockTest.CanMarkBlockUsed (3 us)
[ RUN      ] LlvmLibcBlockTest.CannotSplitUsedBlock
[       OK ] LlvmLibcBlockTest.CannotSplitUsedBlock (2 us)
[ RUN      ] LlvmLibcBlockTest.CanMergeWithNextBlock
[       OK ] LlvmLibcBlockTest.CanMergeWithNextBlock (3 us)
[ RUN      ] LlvmLibcBlockTest.CannotMergeWithFirstOrLastBlock
[       OK ] LlvmLibcBlockTest.CannotMergeWithFirstOrLastBlock (3 us)
Step 8 (libc-unit-tests) failure: libc-unit-tests (failure)
@@@BUILD_STEP libc-unit-tests@@@
Running: ninja libc-unit-tests
[1/1192] Building CXX object libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o
FAILED: libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o 
/usr/bin/g++ -DLIBC_NAMESPACE=__llvm_libc_20_0_0_git -D_DEBUG -I/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc -isystem /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/build/libc/include -fvisibility-inlines-hidden -Werror=date-time -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -Wimplicit-fallthrough -Wno-nonnull -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment -Wno-misleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -g -DLIBC_QSORT_IMPL=LIBC_QSORT_QUICK_SORT -DLIBC_ADD_NULL_CHECKS -fpie -ffreestanding -DLIBC_FULL_BUILD -isystem/usr/lib/gcc/x86_64-linux-gnu/12//include -nostdinc -idirafter/usr/include -fno-builtin -fno-exceptions -fno-lax-vector-conversions -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-rtti -ftrivial-auto-var-init=pattern -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Wall -Wextra -Werror -Wconversion -Wno-sign-conversion -Wdeprecated -fext-numeric-literals -Wno-pedantic -Wimplicit-fallthrough -Wwrite-strings -Wextra-semi -std=gnu++17 -MD -MT libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o -MF libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o.d -o libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o -c /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.cpp
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.cpp: In member function ‘int __llvm_libc_20_0_0_git::internal::CharacterConverter::push(char8_t)’:
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.cpp:49:17: error: conversion from ‘uint32_t’ {aka ‘unsigned int’} to ‘char8_t’ {aka ‘unsigned char’} may change value [-Werror=conversion]
   49 |       utf8_byte &= (base_mask >> num_ones);
      |       ~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~
cc1plus: all warnings being treated as errors
[2/1192] Running unit test libc.test.src.__support.block_test.__unit__
[==========] Running 23 tests from 1 test suite.
[ RUN      ] LlvmLibcBlockTest.CanCreateSingleAlignedBlock
[       OK ] LlvmLibcBlockTest.CanCreateSingleAlignedBlock (9 us)
[ RUN      ] LlvmLibcBlockTest.CanCreateUnalignedSingleBlock
[       OK ] LlvmLibcBlockTest.CanCreateUnalignedSingleBlock (2 us)
[ RUN      ] LlvmLibcBlockTest.CannotCreateTooSmallBlock
[       OK ] LlvmLibcBlockTest.CannotCreateTooSmallBlock (2 us)
[ RUN      ] LlvmLibcBlockTest.CanSplitBlock
[       OK ] LlvmLibcBlockTest.CanSplitBlock (4 us)
[ RUN      ] LlvmLibcBlockTest.CanSplitBlockUnaligned
[       OK ] LlvmLibcBlockTest.CanSplitBlockUnaligned (15 us)
[ RUN      ] LlvmLibcBlockTest.CanSplitMidBlock
[       OK ] LlvmLibcBlockTest.CanSplitMidBlock (8 us)
[ RUN      ] LlvmLibcBlockTest.CannotSplitTooSmallBlock
[       OK ] LlvmLibcBlockTest.CannotSplitTooSmallBlock (2 us)
[ RUN      ] LlvmLibcBlockTest.CannotSplitBlockWithoutHeaderSpace
[       OK ] LlvmLibcBlockTest.CannotSplitBlockWithoutHeaderSpace (2 us)
[ RUN      ] LlvmLibcBlockTest.CannotMakeBlockLargerInSplit
[       OK ] LlvmLibcBlockTest.CannotMakeBlockLargerInSplit (2 us)
[ RUN      ] LlvmLibcBlockTest.CanMakeMinimalSizeFirstBlock
[       OK ] LlvmLibcBlockTest.CanMakeMinimalSizeFirstBlock (4 us)
[ RUN      ] LlvmLibcBlockTest.CanMakeMinimalSizeSecondBlock
[       OK ] LlvmLibcBlockTest.CanMakeMinimalSizeSecondBlock (2 us)
[ RUN      ] LlvmLibcBlockTest.CanMarkBlockUsed
[       OK ] LlvmLibcBlockTest.CanMarkBlockUsed (3 us)
[ RUN      ] LlvmLibcBlockTest.CannotSplitUsedBlock
[       OK ] LlvmLibcBlockTest.CannotSplitUsedBlock (2 us)
[ RUN      ] LlvmLibcBlockTest.CanMergeWithNextBlock
[       OK ] LlvmLibcBlockTest.CanMergeWithNextBlock (3 us)
[ RUN      ] LlvmLibcBlockTest.CannotMergeWithFirstOrLastBlock
[       OK ] LlvmLibcBlockTest.CannotMergeWithFirstOrLastBlock (3 us)
[ RUN      ] LlvmLibcBlockTest.CannotMergeUsedBlock
[       OK ] LlvmLibcBlockTest.CannotMergeUsedBlock (3 us)
[ RUN      ] LlvmLibcBlockTest.CanGetBlockFromUsableSpace
[       OK ] LlvmLibcBlockTest.CanGetBlockFromUsableSpace (2 us)
[ RUN      ] LlvmLibcBlockTest.CanGetConstBlockFromUsableSpace
[       OK ] LlvmLibcBlockTest.CanGetConstBlockFromUsableSpace (2 us)
[ RUN      ] LlvmLibcBlockTest.Allocate
[       OK ] LlvmLibcBlockTest.Allocate (1 ms)

sribee8 added a commit that referenced this pull request Jun 16, 2025
sribee8 added a commit that referenced this pull request Jun 16, 2025
Reverts #143973
This merge broke the build and I'm currently looking into the issue to
fix it.
llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Jun 16, 2025
Reverts llvm/llvm-project#143973
This merge broke the build and I'm currently looking into the issue to
fix it.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

6 participants