Skip to content
Permalink
Browse files

Strip BOM when converting From UTF8 or deserializing C# string

  • Loading branch information...
mythz committed Jun 4, 2019
1 parent f6ff873 commit 22969480339cdfc365d9c135bedc6e797d4cc1cd
@@ -177,18 +177,19 @@ public override ReadOnlyMemory<byte> ToUtf8(ReadOnlySpan<char> source)

public override ReadOnlyMemory<char> FromUtf8(ReadOnlySpan<byte> source)
{
source = source.WithoutBom();
Memory<char> chars = new char[Encoding.UTF8.GetCharCount(source)];
var charsWritten = Encoding.UTF8.GetChars(source, chars.Span);
return chars.Slice(0, charsWritten);
}

public override int ToUtf8(ReadOnlySpan<char> source, Span<byte> destination) => Encoding.UTF8.GetBytes(source, destination);

public override int FromUtf8(ReadOnlySpan<byte> source, Span<char> destination) => Encoding.UTF8.GetChars(source, destination);
public override int FromUtf8(ReadOnlySpan<byte> source, Span<char> destination) => Encoding.UTF8.GetChars(source.WithoutBom(), destination);

public override byte[] ToUtf8Bytes(ReadOnlySpan<char> source) => ToUtf8(source).ToArray();

public override string FromUtf8Bytes(ReadOnlySpan<byte> source) => FromUtf8(source).ToString();
public override string FromUtf8Bytes(ReadOnlySpan<byte> source) => FromUtf8(source.WithoutBom()).ToString();

public override MemoryStream ToMemoryStream(ReadOnlySpan<byte> source)
{
@@ -532,7 +532,7 @@ public override ReadOnlyMemory<byte> ToUtf8(ReadOnlySpan<char> source)

public override ReadOnlyMemory<char> FromUtf8(ReadOnlySpan<byte> source)
{
var bytes = source.ToArray();
var bytes = source.WithoutBom().ToArray();
var chars = new char[Encoding.UTF8.GetCharCount(bytes)];
var charsWritten = Encoding.UTF8.GetChars(bytes, 0, source.Length, chars, 0);
return new ReadOnlyMemory<char>(chars, 0, charsWritten);
@@ -549,7 +549,7 @@ public override int ToUtf8(ReadOnlySpan<char> source, Span<byte> destination)

public override int FromUtf8(ReadOnlySpan<byte> source, Span<char> destination)
{
var bytes = source.ToArray();
var bytes = source.WithoutBom().ToArray();
var chars = destination.ToArray();
var charsWritten = Encoding.UTF8.GetChars(bytes, 0, source.Length, chars, 0);
new ReadOnlySpan<char>(chars, 0, charsWritten).CopyTo(destination);
@@ -558,7 +558,7 @@ public override int FromUtf8(ReadOnlySpan<byte> source, Span<char> destination)

public override byte[] ToUtf8Bytes(ReadOnlySpan<char> source) => Encoding.UTF8.GetBytes(source.ToArray());

public override string FromUtf8Bytes(ReadOnlySpan<byte> source) => Encoding.UTF8.GetString(source.ToArray());
public override string FromUtf8Bytes(ReadOnlySpan<byte> source) => Encoding.UTF8.GetString(source.WithoutBom().ToArray());

public override MemoryStream ToMemoryStream(ReadOnlySpan<byte> source) =>
MemoryStreamFactory.GetStream(source.ToArray());
@@ -88,6 +88,8 @@ public static object Parse(ReadOnlySpan<char> value)
{
TypeConfig<T>.Init();

value = value.WithoutBom();

if (ReadFn == null)
{
if (typeof(T).IsAbstract || typeof(T).IsInterface)
@@ -85,6 +85,8 @@ public static object Parse(ReadOnlySpan<char> value)
{
TypeConfig<T>.Init();

value = value.WithoutBom();

if (ReadFn == null)
{
if (typeof(T).IsInterface)
@@ -177,18 +177,19 @@ public override ReadOnlyMemory<byte> ToUtf8(ReadOnlySpan<char> source)

public override ReadOnlyMemory<char> FromUtf8(ReadOnlySpan<byte> source)
{
source = source.WithoutBom();
Memory<char> chars = new char[Encoding.UTF8.GetCharCount(source)];
var charsWritten = Encoding.UTF8.GetChars(source, chars.Span);
return chars.Slice(0, charsWritten);
}

public override int ToUtf8(ReadOnlySpan<char> source, Span<byte> destination) => Encoding.UTF8.GetBytes(source, destination);

public override int FromUtf8(ReadOnlySpan<byte> source, Span<char> destination) => Encoding.UTF8.GetChars(source, destination);
public override int FromUtf8(ReadOnlySpan<byte> source, Span<char> destination) => Encoding.UTF8.GetChars(source.WithoutBom(), destination);

public override byte[] ToUtf8Bytes(ReadOnlySpan<char> source) => ToUtf8(source).ToArray();

public override string FromUtf8Bytes(ReadOnlySpan<byte> source) => FromUtf8(source).ToString();
public override string FromUtf8Bytes(ReadOnlySpan<byte> source) => FromUtf8(source.WithoutBom()).ToString();

public override MemoryStream ToMemoryStream(ReadOnlySpan<byte> source)
{
@@ -270,7 +270,9 @@ public static string AppendUrlPathsRaw(this string uri, params string[] uriCompo
public static string FromUtf8Bytes(this byte[] bytes)
{
return bytes == null ? null
: Encoding.UTF8.GetString(bytes, 0, bytes.Length);
: bytes.Length > 3 && bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF
? Encoding.UTF8.GetString(bytes, 3, bytes.Length - 3)
: Encoding.UTF8.GetString(bytes, 0, bytes.Length);
}

public static byte[] ToUtf8Bytes(this string value)
@@ -661,5 +661,20 @@ public static int CountOccurrencesOf(this ReadOnlySpan<char> value, char needle)
}
return count;
}

public static ReadOnlySpan<char> WithoutBom(this ReadOnlySpan<char> value)
{
return value.Length > 0 && value[0] == 65279
? value.Slice(1)
: value;
}

public static ReadOnlySpan<byte> WithoutBom(this ReadOnlySpan<byte> value)
{
return value.Length > 3 && value[0] == 0xEF && value[1] == 0xBB && value[2] == 0xBF
? value.Slice(3)
: value;
}

}
}
@@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Threading.Tasks;
using NUnit.Framework;
using ServiceStack.Text.Pools;
@@ -149,6 +150,25 @@ public async Task Can_deserialize_from_MemoryStream_using_Memory()

Assert.That(to, Is.EqualTo(from));
}

[Test]
public void Can_deserialize_JSON_with_UTF8_BOM()
{
var from = new Person { Id = 1, Name = "Foo" };
var json = from.ToJson();
var jsonBytes = json.ToUtf8Bytes();

var bytes = new List<byte>(new byte[] { 0xEF, 0xBB, 0xBF });
bytes.AddRange(jsonBytes);

var mergedBytes = bytes.ToArray();

var jsonWithBOM = mergedBytes.FromUtf8Bytes();

var fromJsonWithBOM = jsonWithBOM.FromJson<Person>();

Assert.That(fromJsonWithBOM, Is.EqualTo(from));
}
}

public class Utf8Case

0 comments on commit 2296948

Please sign in to comment.
You can’t perform that action at this time.