From ac5368636d421b7b1a88cb659c9339cfdced75c3 Mon Sep 17 00:00:00 2001 From: Annie Caron Date: Wed, 29 Apr 2026 19:03:38 -0400 Subject: [PATCH] Scrub invalid UTF-8 in FailureFormatter#to_s `String#encode!(Encoding::UTF_8, invalid: :replace, ...)` is a no-op when the source encoding is already UTF-8. When `body` interpolation produces a UTF-8-tagged string with invalid byte sequences (e.g. binary protobuf bodies surfaced through WebMock failure messages), the existing fix lets those bytes through and `JSON.dump` later crashes in `BuildStatusReporter#write_failure_file` with `Encoding::UndefinedConversionError`, taking down the entire report step. Switch to `force_encoding(Encoding::UTF_8) + scrub!` which handles both the original ASCII-8BIT-source case and the new UTF-8-tagged-with-invalid- bytes case. Adds a regression test for the second case alongside the existing one. --- ruby/lib/minitest/queue/failure_formatter.rb | 1 + .../minitest/queue/failure_formatter_test.rb | 52 ++++++++++++++++++- 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/ruby/lib/minitest/queue/failure_formatter.rb b/ruby/lib/minitest/queue/failure_formatter.rb index a77384a0..87179f09 100644 --- a/ruby/lib/minitest/queue/failure_formatter.rb +++ b/ruby/lib/minitest/queue/failure_formatter.rb @@ -15,6 +15,7 @@ def initialize(test) def to_s s = +"#{header}\n#{body}\n\n" s.encode!(Encoding::UTF_8, invalid: :replace, undef: :replace) + s.scrub! s end diff --git a/ruby/test/minitest/queue/failure_formatter_test.rb b/ruby/test/minitest/queue/failure_formatter_test.rb index 2d5ca2e8..5b12b3b9 100644 --- a/ruby/test/minitest/queue/failure_formatter_test.rb +++ b/ruby/test/minitest/queue/failure_formatter_test.rb @@ -6,12 +6,60 @@ module Minitest::Queue class FailureFormatterTest < Minitest::Test include ReporterTestHelper - def test_failure_formatter_to_h_can_be_json_dumped + def test_to_s_and_to_h_with_valid_utf8 + test = result('test_valid', failure: "assertion failed: expected \u2603") + formatter = FailureFormatter.new(test) + + assert_predicate(formatter.to_s, :valid_encoding?) + assert_includes(formatter.to_s, "assertion failed: expected \u2603") + assert(JSON.dump(formatter.to_h)) + end + + def test_to_s_and_to_h_with_ascii_8bit_failure test = result('test_json', failure: "\xD6".b) + formatter = FailureFormatter.new(test) + + assert_predicate(formatter.to_s, :valid_encoding?) + assert(JSON.dump(formatter.to_h)) + end + + def test_to_s_and_to_h_with_utf8_tagged_invalid_bytes + # Mirror the production trigger: a UTF-8-tagged string containing invalid byte + # sequences. encode!(UTF_8, ...) is a no-op when source == dest encoding, so + # without scrub! these bytes pass through and crash JSON.dump. + invalid_utf8 = "boom \xE1\x02\xFF tail".dup.force_encoding(Encoding::UTF_8) + refute_predicate(invalid_utf8, :valid_encoding?) + + test = result('test_json_utf8_tagged', failure: invalid_utf8) + formatter = FailureFormatter.new(test) + + assert_predicate(formatter.to_s, :valid_encoding?) + assert_includes(formatter.to_s, "tail") + assert(JSON.dump(formatter.to_h)) + end + + def test_to_s_and_to_h_with_iso_8859_1_failure + # encode! transcodes non-UTF-8 sources; verify the character is preserved. + iso_message = "expected \xD6sterreich".dup.force_encoding(Encoding::ISO_8859_1) + test = result('test_iso', failure: iso_message) + formatter = FailureFormatter.new(test) + + assert_predicate(formatter.to_s, :valid_encoding?) + assert_includes(formatter.to_s, "\u00D6sterreich") + assert(JSON.dump(formatter.to_h)) + end + + def test_to_s_and_to_h_with_unexpected_error_containing_invalid_bytes + error = StandardError.new("binary payload \xC0\xFF".b) + error.set_backtrace(["test.rb:1:in `test'"]) + unexpected = Minitest::UnexpectedError.new(error) + test = result('test_unexpected', failure: unexpected) formatter = FailureFormatter.new(test) - assert JSON.dump(formatter.to_h) + assert_predicate(formatter.to_s, :valid_encoding?) + assert_includes(formatter.to_s, "StandardError") + assert(JSON.dump(formatter.to_h)) end end end