Skip to content

Commit

Permalink
Allow direct addition of byte sequences
Browse files Browse the repository at this point in the history
  • Loading branch information
rickardp committed Feb 16, 2023
1 parent fe27a05 commit af6b8ae
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 34 deletions.
11 changes: 9 additions & 2 deletions src/Combination.StringPools/IUtf8DeduplicatedStringPool.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,14 @@ public interface IUtf8DeduplicatedStringPool : IUtf8StringPool
/// <summary>
/// Returns a pooled string if it exists in the pool already, otherwise returns null.
/// </summary>
/// <param name="value">The .NET string</param>
/// <param name="value">The .NET string or char sequence</param>
/// <returns>The string pool reference, if it exists, otherwise null.</returns>
PooledUtf8String? TryGet(string value);
PooledUtf8String? TryGet(ReadOnlySpan<char> value);

/// <summary>
/// Returns a pooled string if it exists in the pool already, otherwise returns null.
/// </summary>
/// <param name="value">The sequence of UTF-8 bytes</param>
/// <returns>The string pool reference, if it exists, otherwise null.</returns>
PooledUtf8String? TryGet(ReadOnlySpan<byte> value);
}
12 changes: 10 additions & 2 deletions src/Combination.StringPools/IUtf8StringPool.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,16 @@ public interface IUtf8StringPool : IStringPool
/// <summary>
/// Adds a string to the pool.
/// </summary>
/// <param name="value">The .NET string to add.</param>
/// <param name="value">The sequence of chars to add.</param>
/// <returns>A reference to the pooled string. Note that the same reference may be returned for several calls to Add if
/// deduplication is used.</returns>
PooledUtf8String Add(string value);
PooledUtf8String Add(ReadOnlySpan<char> value);

/// <summary>
/// Adds a sequence of UTF-8 bytes to the pool. No validation is performed that the bytes are valid UTF-8.
/// </summary>
/// <param name="value">The sequence of bytes to add.</param>
/// <returns>A reference to the pooled string. Note that the same reference may be returned for several calls to Add if
/// deduplication is used.</returns>
PooledUtf8String Add(ReadOnlySpan<byte> value);
}
83 changes: 53 additions & 30 deletions src/Combination.StringPools/Utf8StringPool.cs
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,35 @@ public Utf8StringPool(int pageSize, int initialPageCount, bool deduplicateString
}
}

PooledUtf8String IUtf8StringPool.Add(string value)
PooledUtf8String IUtf8StringPool.Add(ReadOnlySpan<char> value)
{
if (string.IsNullOrEmpty(value))
var utf8ByteCount = Encoding.UTF8.GetByteCount(value);
if (utf8ByteCount < 16384)
{
// Use the stack for small strings
Span<byte> utf8 = stackalloc byte[utf8ByteCount];
Encoding.UTF8.GetBytes(value, utf8);
return AddInternal(utf8);
}

var buffer = new byte[utf8ByteCount];
Encoding.UTF8.GetBytes(value, buffer);
return AddInternal(buffer);
}

PooledUtf8String IUtf8StringPool.Add(ReadOnlySpan<byte> value)
=> AddInternal(value);


[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
private PooledUtf8String AddInternal(ReadOnlySpan<byte> value)
{
if (value.Length == 0)
{
return PooledUtf8String.Empty;
}

var length = Encoding.UTF8.GetByteCount(value);
var length = value.Length;
var structLength = length + 2;
if (structLength > 0xffff || structLength > pageSize)
{
Expand All @@ -95,7 +116,7 @@ PooledUtf8String IUtf8StringPool.Add(string value)
if (deduplicationTable is not null)
{
stringHash = unchecked((int)StringHash.Compute(value));
if (TryDeduplicate(stringHash, length, value, out var result))
if (TryDeduplicate(stringHash, value, out var result))
{
return new PooledUtf8String(result);
}
Expand All @@ -108,7 +129,7 @@ PooledUtf8String IUtf8StringPool.Add(string value)
throw new ObjectDisposedException("String pool is already disposed");
}

if (oldSize != usedBytes && TryDeduplicate(stringHash, length, value, out var result))
if (oldSize != usedBytes && TryDeduplicate(stringHash, value, out var result))
{
return new PooledUtf8String(result);
}
Expand Down Expand Up @@ -144,7 +165,7 @@ PooledUtf8String IUtf8StringPool.Add(string value)
{
*(ushort*)(writePtr + pageStartOffset) = checked((ushort)length);
var stringWritePtr = new Span<byte>((byte*)(writePtr + pageStartOffset + 2), length);
Encoding.UTF8.GetBytes(value, stringWritePtr);
value.CopyTo(stringWritePtr);
}

var handle = ((ulong)index << (64 - PoolIndexBits)) | (ulong)(writePosition - structLength);
Expand All @@ -161,22 +182,7 @@ PooledUtf8String IUtf8StringPool.Add(string value)

StringAdded?.Invoke(this, EventArgs.Empty);

var ret = new PooledUtf8String(handle);
#if DEBUG
var rVal = ret.ToString();

if (usedBytes > pages.Count * pageSize)
{
throw new InvalidOperationException("Internal error: Pooled beyond allocation");
}

if (!rVal.Equals(value, StringComparison.Ordinal))
{
throw new InvalidOperationException($"Internal error: Incorrect string pooled: '{value}' != '{rVal}'");
}
#endif

return ret;
return new PooledUtf8String(handle);
}
}

Expand All @@ -186,9 +192,29 @@ PooledUtf8String IUtf8StringPool.Add(string value)

long IStringPool.AllocatedBytes => pages.Count * pageSize;

PooledUtf8String? IUtf8DeduplicatedStringPool.TryGet(string value)
PooledUtf8String? IUtf8DeduplicatedStringPool.TryGet(ReadOnlySpan<char> value)
{
var utf8ByteCount = Encoding.UTF8.GetByteCount(value);
if (utf8ByteCount < 16384)
{
// Use the stack for small strings
Span<byte> utf8 = stackalloc byte[utf8ByteCount];
Encoding.UTF8.GetBytes(value, utf8);
return TryGetInternal(utf8);
}

var buffer = new byte[utf8ByteCount];
Encoding.UTF8.GetBytes(value, buffer);
return TryGetInternal(buffer);
}

PooledUtf8String? IUtf8DeduplicatedStringPool.TryGet(ReadOnlySpan<byte> value)
=> TryGetInternal(value);

[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
private PooledUtf8String? TryGetInternal(ReadOnlySpan<byte> value)
{
if (string.IsNullOrEmpty(value))
if (value.Length == 0)
{
return PooledUtf8String.Empty;
}
Expand All @@ -198,16 +224,15 @@ PooledUtf8String IUtf8StringPool.Add(string value)
throw new InvalidOperationException("Deduplication is not enabled for this pool");
}

var length = Encoding.UTF8.GetByteCount(value);
if (!TryDeduplicate(unchecked((int)StringHash.Compute(value)), length, value, out var result))
if (!TryDeduplicate(unchecked((int)StringHash.Compute(value)), value, out var result))
{
return null;
}

return new PooledUtf8String(result);
}

private bool TryDeduplicate(int stringHash, int utf8ByteCount, string value, out ulong offset)
private bool TryDeduplicate(int stringHash, ReadOnlySpan<byte> value, out ulong offset)
{
using (disposeLock.PreventDispose())
{
Expand All @@ -225,15 +250,13 @@ private bool TryDeduplicate(int stringHash, int utf8ByteCount, string value, out
return false;
}

Span<byte> utf8 = stackalloc byte[utf8ByteCount];
Encoding.UTF8.GetBytes(value, utf8);
var ct = table.Count;
for (var i = 0; i < ct; ++i)
{
var handle = table[i];
var poolOffset = handle & ((1UL << (64 - PoolIndexBits)) - 1);
var poolBytes = GetStringBytes(poolOffset);
if (poolBytes.Length != utf8ByteCount || !utf8.SequenceEqual(poolBytes))
if (poolBytes.Length != value.Length || !value.SequenceEqual(poolBytes))
{
continue;
}
Expand Down

0 comments on commit af6b8ae

Please sign in to comment.