Skip to content

Commit

Permalink
If the field offset of UTF8Encoding.emitUtf8Identifier is not known b…
Browse files Browse the repository at this point in the history
…ased on the runtime and process word size, scan for it automatically. Determine runtime using reflection instead of compile-time preprocessor directives, which had prevented this library from targeting .NET Standard and thus more runtimes than just .NET Core and Framework. Added workaround to fix crash in Utf8Json due to dependency on the original Encoding.UTF8.GetPreamble() on static type initialization.
  • Loading branch information
Aldaviva committed Nov 8, 2023
1 parent 98a3bc8 commit 1448b36
Show file tree
Hide file tree
Showing 11 changed files with 204 additions and 52 deletions.
24 changes: 10 additions & 14 deletions .github/workflows/dotnetpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,17 @@ jobs:
- name: Build
run: dotnet build ${{ env.ProjectName }} --no-restore --configuration Release --verbosity normal

- name: Test x64
- name: Test
run: |
dotnet test --arch x64 --verbosity normal --configuration Release --collect:"XPlat Code Coverage" --settings Test/Test.runsettings --logger trx --logger "console;verbosity=detailed"
Out-File -InputObject "TEST_EXIT_CODE_X64=$LASTEXITCODE" -FilePath $env:GITHUB_ENV -Append -Encoding UTF8
dotnet test --arch x64 --configuration Release --collect:"XPlat Code Coverage" --settings Test/Test.runsettings --logger trx --verbosity normal
$TEST_EXIT_CODE_X64 = $LASTEXITCODE
Get-ChildItem Test\TestResults\*\coverage.info | Rename-Item -NewName lcov.info
exit 0
- name: Test x86
run: |
dotnet test --arch x86 --verbosity normal --configuration Release --collect:"XPlat Code Coverage" --settings Test/Test.runsettings --logger trx --logger "console;verbosity=detailed"
Out-File -InputObject "TEST_EXIT_CODE_X86=$LASTEXITCODE" -FilePath $env:GITHUB_ENV -Append -Encoding UTF8
dotnet test --arch x86 --configuration Release --collect:"XPlat Code Coverage" --settings Test/Test.runsettings --logger trx --verbosity normal
$TEST_EXIT_CODE_X86 = $LASTEXITCODE
Get-ChildItem Test\TestResults\*\coverage.info | Rename-Item -NewName lcov.info
Out-File -InputObject "TEST_EXIT_CODE=$($TEST_EXIT_CODE_X64 + $TEST_EXIT_CODE_X86)" -FilePath $env:GITHUB_ENV -Append -Encoding UTF8
exit 0
- name: Upload test report
Expand All @@ -50,11 +49,8 @@ jobs:
with:
github-token: ${{ secrets.GITHUB_TOKEN }}

- name: Stop if x64 tests failed
run: exit $env:TEST_EXIT_CODE_X64

- name: Stop if x86 tests failed
run: exit $env:TEST_EXIT_CODE_X86
- name: Stop if tests failed
run: exit $env:TEST_EXIT_CODE

- name: Pack
run: dotnet pack ${{ env.ProjectName }} --no-build --configuration Release --verbosity normal
Expand Down
4 changes: 2 additions & 2 deletions Bom.Squad.sln
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.7.34221.43
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Test", "Test\Test.csproj", "{FDC49FE7-C032-4C40-87FE-DA52078465FE}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Test", "Test\Test.csproj", "{FDC49FE7-C032-4C40-87FE-DA52078465FE}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Bom.Squad", "Bom.Squad\Bom.Squad.csproj", "{06F9E300-025A-479D-AE1D-53CBB811E9FA}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Bom.Squad", "Bom.Squad\Bom.Squad.csproj", "{06F9E300-025A-479D-AE1D-53CBB811E9FA}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand Down
10 changes: 5 additions & 5 deletions Bom.Squad/Bom.Squad.csproj
Original file line number Diff line number Diff line change
@@ -1,24 +1,23 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFrameworks>net452;netcoreapp2.0</TargetFrameworks>
<TargetFrameworks>net452;netstandard2.0</TargetFrameworks>
<Nullable>enable</Nullable>
<LangVersion>latest</LangVersion>

<Version>0.1.0</Version>
<Version>0.2.0</Version>
<Authors>Ben Hutchison</Authors>
<Copyright>© 2023 $(Authors)</Copyright>
<Company>$(Authors)</Company>
<NoWarn>CS8524</NoWarn>
<CheckEolTargetFramework>false</CheckEolTargetFramework> <!-- It's just a library, I'm not actually using .NET Core 2 -->

<PackageProjectUrl>https://github.com/Aldaviva/Bom.Squad</PackageProjectUrl>
<RepositoryUrl>https://github.com/Aldaviva/Bom.Squad.git</RepositoryUrl>
<PublishRepositoryUrl>true</PublishRepositoryUrl>
<RepositoryType>git</RepositoryType>
<PackageLicenseExpression>MIT</PackageLicenseExpression>
<Description>Disable writing UTF-8 byte order marks (BOMs) in .NET</Description>
<PackageTags>utf utf8 unicode bom byte-order-mark</PackageTags>
<Description>Disable writing UTF-8 byte order marks (BOMs)</Description>
<PackageTags>utf utf8 unicode bom byte-order-mark encoding charset codepage</PackageTags>
<PackageIcon>icon.png</PackageIcon>
<GeneratePackageOnBuild>false</GeneratePackageOnBuild>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
Expand All @@ -32,6 +31,7 @@
<ItemGroup>
<None Include="icon.png" Pack="true" PackagePath="\" />
<None Include="..\Readme.md" Pack="true" PackagePath="\" />
<AdditionalFiles Include="ExceptionAdjustments.txt" />
<InternalsVisibleTo Include="Test" />
</ItemGroup>

Expand Down
89 changes: 72 additions & 17 deletions Bom.Squad/BomSquad.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,30 +12,85 @@ namespace Bom.Squad;
/// </summary>
public static class BomSquad {

private const int MinScanOffset = 5; // if we write to offset 4, it causes a segfault on .NET 7 MacOS (ARM64 or x86_64)
private const int MaxScanOffset = 76;

private static readonly Encoding Utf8 = Encoding.UTF8;

private static bool IsBomDefused => Utf8.GetPreamble().Length == 0;

private static int? _emitUtf8IdentifierFieldOffset;

/// <summary>
/// <para>Modify the base class library <see cref="Encoding.UTF8"/> to not output byte order marks when encoding, although it will still parse them correctly.</para>
/// <para>After calling this method, the <see cref="Encoding.UTF8"/> instance will behave as if was constructed with the <c>encoderShouldEmitUTF8Identifier</c> constructor parameter set to false, so it will not write BOMs.</para>
/// <para>Other encodings, such as <see cref="Encoding.Unicode"/> and <c>new UTF8Encoding(true, true)</c>, will not be affected.</para>
/// </summary>
/// <exception cref="AccessViolationException">if dereferencing one of the pointers fails</exception>
/// <exception cref="PlatformNotSupportedException">If this library does not support the current operating system, runtime, and CPU architecture, either explicitly or automatically.</exception>
/// <exception cref="AccessViolationException">If this process' memory cannot be read or written.</exception>
public static void DefuseUtf8Bom() {
Encoding utf8 = Encoding.UTF8;
if (utf8.GetPreamble().LongLength > 0) {
GCHandle gcHandle = GCHandle.Alloc(utf8, GCHandleType.WeakTrackResurrection);
IntPtr pointer1 = GCHandle.ToIntPtr(gcHandle);
IntPtr pointer2 = Marshal.ReadIntPtr(pointer1);

int emitUtf8IdentifierFieldOffset = PlatformInfo.IsNetCore switch {
true when PlatformInfo.Is64Bit => 37,
true => 21,
false when PlatformInfo.Is64Bit => 38,
false => 22
};

Marshal.WriteByte(pointer2, emitUtf8IdentifierFieldOffset, 0); // set private readonly bool UTF8Encoding._emitUTF8Identifier to false

gcHandle.Free();
if (!IsBomDefused) {
Workarounds.ApplyWorkarounds();

GCHandle gcHandle = GCHandle.Alloc(Utf8, GCHandleType.WeakTrackResurrection);
try {
IntPtr gcHandlePointer = GCHandle.ToIntPtr(gcHandle);
IntPtr utf8Pointer = Marshal.ReadIntPtr(gcHandlePointer);

bool is64Bit = PlatformInfo.IsProcess64Bit;
int? emitUtf8IdentifierFieldOffset = _emitUtf8IdentifierFieldOffset ?? PlatformInfo.ProcessRuntime switch {
PlatformInfo.Runtime.NetCore when is64Bit => 37,
PlatformInfo.Runtime.NetCore => 21,
PlatformInfo.Runtime.NetFramework when is64Bit => 38,
PlatformInfo.Runtime.NetFramework => 22,
_ => null
};

if (emitUtf8IdentifierFieldOffset != null) {
// set private readonly bool UTF8Encoding._emitUTF8Identifier to false
Marshal.WriteByte(utf8Pointer, emitUtf8IdentifierFieldOffset.Value, 0);
} else if ((emitUtf8IdentifierFieldOffset = ScanForBomOffset(utf8Pointer)) == null) {
throw new PlatformNotSupportedException(
"Bom.Squad does not yet have the ability to disable Encoding.UTF8 BOM on this operating system, .NET runtime, and CPU architecture combination. Please file an issue at https://github.com/Aldaviva/Bom.Squad/issues/new with this information.");
}

_emitUtf8IdentifierFieldOffset = emitUtf8IdentifierFieldOffset;
} finally {
gcHandle.Free();
}
}
}

/// <exception cref="AccessViolationException">If this process' memory cannot be read or written.</exception>
internal static void RearmUtf8Bom() {
if (IsBomDefused && _emitUtf8IdentifierFieldOffset != null) {
GCHandle gcHandle = GCHandle.Alloc(Utf8, GCHandleType.WeakTrackResurrection);
try {
Marshal.WriteByte(Marshal.ReadIntPtr(GCHandle.ToIntPtr(gcHandle)), _emitUtf8IdentifierFieldOffset.Value, 1);
} finally {
gcHandle.Free();
}
}
}

internal static int? ScanForBomOffset(IntPtr utf8Pointer2) {
for (int offset = MinScanOffset; offset <= MaxScanOffset; offset++) {
try {
byte oldValue = Marshal.ReadByte(utf8Pointer2, offset);
if (oldValue == 1) {
Marshal.WriteByte(utf8Pointer2, offset, 0);
if (IsBomDefused) {
return offset;
} else {
Marshal.WriteByte(utf8Pointer2, offset, oldValue);
}
}
} catch {
//continue to next offset
}
}

return null;
}

}
8 changes: 8 additions & 0 deletions Bom.Squad/ExceptionAdjustments.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Due to [1], you may have to manually change the "Build Action" of this file to "C# analyzer additional file".
# [1] https://github.com/dotnet/roslyn/issues/4655

# This file adjusts exception information used by Tetractic.CodeAnalysis.ExceptionAnalyzers.
# Usage: <memberId>[ <accessor>] (-/+)<exceptionTypeId>
# See ECMA-334, 5th Ed. § D.4.2 "ID string format" for a description of the ID format.

P:System.Array.Length get -T:System.OverflowException
22 changes: 14 additions & 8 deletions Bom.Squad/PlatformInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,19 @@

internal readonly struct PlatformInfo {

public static bool Is64Bit { get; } = IntPtr.Size == 8;

public static bool IsNetCore { get; } =
#if NETCOREAPP
true;
#else
false;
#endif
public static bool IsProcess64Bit { get; } = IntPtr.Size == 8;

public static Runtime? ProcessRuntime { get; } = typeof(object).Assembly.GetName().Name switch {
"mscorlib" => Runtime.NetFramework,
"System.Private.CoreLib" => Runtime.NetCore,
_ => null
};

public enum Runtime {

NetFramework,
NetCore

}

}
22 changes: 22 additions & 0 deletions Bom.Squad/Workarounds.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
namespace Bom.Squad;

internal static class Workarounds {

public static void ApplyWorkarounds() {
FixUtf8Json();
}

/// <summary>
/// <para>Utf8Json needs to read the UTF-8 BOM once when it first loads.</para>
/// <para>To prevent it from crashing with an <see cref="IndexOutOfRangeException"/> inside <c>JsonSerializer.DeserializeAsync</c> and related methods, construct a throwaway <c>JsonReader</c> instance before disabling the BOM.</para>
/// <para>https://www.nuget.org/packages/ZCS.Utf8Json</para>
/// </summary>
private static void FixUtf8Json() {
try {
Type.GetType("Utf8Json.JsonReader, Utf8Json")?
.GetConstructor(new[] { typeof(byte[]) })?
.Invoke(new object[] { new[] { (byte) '1', (byte) '1', (byte) '1' } });
} catch (Exception e) when (e is not OutOfMemoryException) { }
}

}
6 changes: 4 additions & 2 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

[![Nuget](https://img.shields.io/nuget/v/Bom.Squad?logo=nuget&color=blue)](https://www.nuget.org/packages/Bom.Squad/) [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/Aldaviva/Bom.Squad/dotnetpackage.yml?branch=master&logo=github)](https://github.com/Aldaviva/Bom.Squad/actions/workflows/dotnetpackage.yml) [![Testspace](https://img.shields.io/testspace/tests/Aldaviva/Aldaviva:Bom.Squad/master?passed_label=passing&failed_label=failing&logo=data%3Aimage%2Fsvg%2Bxml%3Bbase64%2CPHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA4NTkgODYxIj48cGF0aCBkPSJtNTk4IDUxMy05NCA5NCAyOCAyNyA5NC05NC0yOC0yN3pNMzA2IDIyNmwtOTQgOTQgMjggMjggOTQtOTQtMjgtMjh6bS00NiAyODctMjcgMjcgOTQgOTQgMjctMjctOTQtOTR6bTI5My0yODctMjcgMjggOTQgOTQgMjctMjgtOTQtOTR6TTQzMiA4NjFjNDEuMzMgMCA3Ni44My0xNC42NyAxMDYuNS00NFM1ODMgNzUyIDU4MyA3MTBjMC00MS4zMy0xNC44My03Ni44My00NC41LTEwNi41UzQ3My4zMyA1NTkgNDMyIDU1OWMtNDIgMC03Ny42NyAxNC44My0xMDcgNDQuNXMtNDQgNjUuMTctNDQgMTA2LjVjMCA0MiAxNC42NyA3Ny42NyA0NCAxMDdzNjUgNDQgMTA3IDQ0em0wLTU1OWM0MS4zMyAwIDc2LjgzLTE0LjgzIDEwNi41LTQ0LjVTNTgzIDE5Mi4zMyA1ODMgMTUxYzAtNDItMTQuODMtNzcuNjctNDQuNS0xMDdTNDczLjMzIDAgNDMyIDBjLTQyIDAtNzcuNjcgMTQuNjctMTA3IDQ0cy00NCA2NS00NCAxMDdjMCA0MS4zMyAxNC42NyA3Ni44MyA0NCAxMDYuNVMzOTAgMzAyIDQzMiAzMDJ6bTI3NiAyODJjNDIgMCA3Ny42Ny0xNC44MyAxMDctNDQuNXM0NC02NS4xNyA0NC0xMDYuNWMwLTQyLTE0LjY3LTc3LjY3LTQ0LTEwN3MtNjUtNDQtMTA3LTQ0Yy00MS4zMyAwLTc2LjY3IDE0LjY3LTEwNiA0NHMtNDQgNjUtNDQgMTA3YzAgNDEuMzMgMTQuNjcgNzYuODMgNDQgMTA2LjVTNjY2LjY3IDU4NCA3MDggNTg0em0tNTU3IDBjNDIgMCA3Ny42Ny0xNC44MyAxMDctNDQuNXM0NC02NS4xNyA0NC0xMDYuNWMwLTQyLTE0LjY3LTc3LjY3LTQ0LTEwN3MtNjUtNDQtMTA3LTQ0Yy00MS4zMyAwLTc2LjgzIDE0LjY3LTEwNi41IDQ0UzAgMzkxIDAgNDMzYzAgNDEuMzMgMTQuODMgNzYuODMgNDQuNSAxMDYuNVMxMDkuNjcgNTg0IDE1MSA1ODR6IiBmaWxsPSIjZmZmIi8%2BPC9zdmc%2B)](https://aldaviva.testspace.com/spaces/245919) [![Coveralls](https://img.shields.io/coveralls/github/Aldaviva/Bom.Squad?logo=coveralls)](https://coveralls.io/github/Aldaviva/Bom.Squad?branch=master)

*Disable writing UTF-8 byte order marks (BOMs) in .NET*
*Disable writing UTF-8 byte order marks (BOMs)*

<!-- MarkdownTOC autolink="true" bracket="round" levels="1,2,3" bullets="1." -->

Expand Down Expand Up @@ -32,8 +32,10 @@ BomSquad.DefuseUtf8Bom();
- .NET Core 2.0 or later
- .NET Framework 4.5.2 or later
- CPU architecture
- x86_64
- x86_64/x64
- x86
- ARM32/Aarch32
- ARM64/AArch64

## Problem

Expand Down
33 changes: 30 additions & 3 deletions Test/BomSquadTest.cs
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
using Bom.Squad;
using FluentAssertions;
using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.InteropServices;
using System.Text;
using Xunit.Abstractions;

namespace Test;

public class BomSquadTest {
public class BomSquadTest: IDisposable {

public void Dispose() {
BomSquad.RearmUtf8Bom();
}

public BomSquadTest(ITestOutputHelper outputHelper) {
outputHelper.WriteLine($".NET {(PlatformInfo.IsNetCore ? "Core" : "Framework")} {(PlatformInfo.Is64Bit ? "64-bit" : "32-bit")}");
outputHelper.WriteLine(
$".NET {PlatformInfo.ProcessRuntime switch { PlatformInfo.Runtime.NetFramework => "Framework", PlatformInfo.Runtime.NetCore => "Core", _ => "unknown runtime" }} {(PlatformInfo.IsProcess64Bit ? "64-bit" : "32-bit")}");
}

[Fact]
Expand Down Expand Up @@ -40,19 +47,39 @@ public class BomSquadTest {
const string input = "hi";

Encode(input, new UTF8Encoding(false, true)).Should().Equal(expected);
Encoding.UTF8.GetBytes(input).Should().Equal(expected);

BomSquad.DefuseUtf8Bom();
Encode(input, Encoding.UTF8).Should().Equal(expected);
Encoding.UTF8.GetBytes(input).Should().Equal(expected);
}

[Fact]
public void WriteUtf8Bom() {
byte[] expected = { 0xEF, 0xBB, 0xBF, 0x68, 0x69 };
const string input = "hi";

Encode(input, Encoding.UTF8).Should().Equal(expected);
Encode(input, new UTF8Encoding(true, true)).Should().Equal(expected);
}

[Fact]
public void ScanForBomOffset() {
Encoding.UTF8.GetPreamble().Length.Should().NotBe(0);

GCHandle gcHandle = GCHandle.Alloc(Encoding.UTF8, GCHandleType.WeakTrackResurrection);
IntPtr utf8Pointer = Marshal.ReadIntPtr(GCHandle.ToIntPtr(gcHandle));

int? bomOffset = BomSquad.ScanForBomOffset(utf8Pointer);

Encoding.UTF8.GetPreamble().Length.Should().Be(0);
bomOffset.Should().NotBeNull();

Marshal.WriteByte(utf8Pointer, bomOffset!.Value, 1);
Encoding.UTF8.GetPreamble().Length.Should().NotBe(0);

gcHandle.Free();
}

private static IEnumerable<byte> Encode(string input, Encoding encoding) {
using MemoryStream memoryStream = new();
using (StreamWriter writer = new(memoryStream, encoding)) {
Expand Down
5 changes: 4 additions & 1 deletion Test/Test.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

<IsPackable>false</IsPackable>
<IsTestProject>true</IsTestProject>
<PlatformTargets>x86;x64</PlatformTargets>
</PropertyGroup>

<ItemGroup>
Expand All @@ -24,6 +23,10 @@
</PackageReference>
</ItemGroup>

<ItemGroup Condition="'$(TargetFramework)' != 'net452'">
<PackageReference Include="ZCS.Utf8Json" Version="1.4.4" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\Bom.Squad\Bom.Squad.csproj" />
</ItemGroup>
Expand Down
33 changes: 33 additions & 0 deletions Test/WorkaroundsTest.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
using Bom.Squad;
using FluentAssertions;
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Threading.Tasks;
#if !NET452
using Utf8Json;
#endif

namespace Test;

public class WorkaroundsTest: IDisposable {

private static readonly Encoding Utf8 = new UTF8Encoding(false, true);

public void Dispose() {
BomSquad.RearmUtf8Bom();
}

#if !NET452
[Fact]
public async Task ZcsUtf8Json() {
BomSquad.DefuseUtf8Bom();

MemoryStream stream = new(Utf8.GetBytes("""{"hello":"world"}"""));
var deserialized = await JsonSerializer.DeserializeAsync<Dictionary<string, string>>(stream);
deserialized["hello"].Should().Be("world");
}
#endif

}

0 comments on commit 1448b36

Please sign in to comment.