Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Further Improve JumpDestAnalysis (x20 improvement) #6554

Merged
merged 13 commits into from
Jan 18, 2024
77 changes: 77 additions & 0 deletions src/Nethermind/Nethermind.Evm.Test/CodeAnalysis/CodeInfoTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

using System.Linq;
using System.Reflection;
using System.Runtime.Intrinsics;

using FluentAssertions;
using Nethermind.Evm.CodeAnalysis;
using NUnit.Framework;
Expand Down Expand Up @@ -165,5 +167,80 @@ public void Push1Jumpdest_Over10k()
codeInfo.ValidateJump(10, false).Should().BeFalse();
codeInfo.ValidateJump(11, false).Should().BeFalse(); // 0x5b but not JUMPDEST but data
}

[TestCase(1)]
[TestCase(2)]
[TestCase(3)]
[TestCase(4)]
[TestCase(5)]
[TestCase(6)]
[TestCase(7)]
[TestCase(8)]
[TestCase(9)]
[TestCase(10)]
[TestCase(11)]
[TestCase(12)]
[TestCase(13)]
[TestCase(14)]
[TestCase(15)]
[TestCase(16)]
[TestCase(17)]
[TestCase(18)]
[TestCase(19)]
[TestCase(20)]
[TestCase(21)]
[TestCase(22)]
[TestCase(23)]
[TestCase(24)]
[TestCase(25)]
[TestCase(26)]
[TestCase(27)]
[TestCase(28)]
[TestCase(29)]
[TestCase(30)]
[TestCase(31)]
[TestCase(32)]
public void PushNJumpdest_Over10k(int n)
{
byte[] code = new byte[10_001];

// One vector (aligned), half vector to unalign
int i;
for (i = 0; i < Vector256<byte>.Count * 2 + Vector128<byte>.Count; i++)
{
code[i] = (byte)0x5b;
}
for (; i < Vector256<byte>.Count * 3; i++)
{
//
}
var triggerPushes = false;
for (; i < code.Length; i++)
{
if (i % (n + 1) == 0)
{
triggerPushes = true;
}
if (triggerPushes)
{
code[i] = i % (n + 1) == 0 ? (byte)(0x60 + n - 1) : (byte)0x5b;
}
}

CodeInfo codeInfo = new(code);

for (i = 0; i < Vector256<byte>.Count * 2 + Vector128<byte>.Count; i++)
{
codeInfo.ValidateJump(i, false).Should().BeTrue();
}
for (; i < Vector256<byte>.Count * 3; i++)
{
codeInfo.ValidateJump(i, false).Should().BeFalse();
}
for (; i < code.Length; i++)
{
codeInfo.ValidateJump(i, false).Should().BeFalse(); // Are 0x5b but not JUMPDEST but data
}
}
}
}
10 changes: 10 additions & 0 deletions src/Nethermind/Nethermind.Evm/ByteArrayExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,16 @@ public static ZeroPaddedSpan SliceWithZeroPadding(this Span<byte> span, scoped i
return SliceWithZeroPadding(span, (int)startIndex, length, padDirection);
}

public static ZeroPaddedSpan SliceWithZeroPadding(this ReadOnlySpan<byte> span, scoped in UInt256 startIndex, int length, PadDirection padDirection = PadDirection.Right)
{
if (startIndex >= span.Length || startIndex > int.MaxValue)
{
return new ZeroPaddedSpan(default, length, PadDirection.Right);
}

return SliceWithZeroPadding(span, (int)startIndex, length, padDirection);
}
LukaszRozmej marked this conversation as resolved.
Show resolved Hide resolved

public static ZeroPaddedSpan SliceWithZeroPadding(this ReadOnlyMemory<byte> bytes, scoped in UInt256 startIndex, int length, PadDirection padDirection = PadDirection.Right)
{
if (startIndex >= bytes.Length || startIndex > int.MaxValue)
Expand Down
32 changes: 17 additions & 15 deletions src/Nethermind/Nethermind.Evm/CodeAnalysis/CodeInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,30 @@
// SPDX-License-Identifier: LGPL-3.0-only

using System;
using System.Runtime.CompilerServices;
using System.Threading;

using Nethermind.Evm.Precompiles;

namespace Nethermind.Evm.CodeAnalysis
{
public class CodeInfo
public class CodeInfo : IThreadPoolWorkItem
{
public byte[] MachineCode { get; set; }
public ReadOnlyMemory<byte> MachineCode { get; }
public IPrecompile? Precompile { get; set; }
private JumpDestinationAnalyzer? _analyzer;
private readonly JumpDestinationAnalyzer _analyzer;
private static readonly JumpDestinationAnalyzer _emptyAnalyzer = new(Array.Empty<byte>());
public static CodeInfo Empty { get; } = new CodeInfo(Array.Empty<byte>());

public CodeInfo(byte[] code)
{
MachineCode = code;
_analyzer = code.Length == 0 ? _emptyAnalyzer : new JumpDestinationAnalyzer(code);
}

public CodeInfo(ReadOnlyMemory<byte> code)
{
MachineCode = code;
_analyzer = code.Length == 0 ? _emptyAnalyzer : new JumpDestinationAnalyzer(code);
}

public bool IsPrecompile => Precompile is not null;
Expand All @@ -25,24 +34,17 @@ public CodeInfo(IPrecompile precompile)
{
Precompile = precompile;
MachineCode = Array.Empty<byte>();
_analyzer = _emptyAnalyzer;
}

public bool ValidateJump(int destination, bool isSubroutine)
{
JumpDestinationAnalyzer analyzer = _analyzer;
analyzer ??= CreateAnalyzer();

return analyzer.ValidateJump(destination, isSubroutine);
return _analyzer.ValidateJump(destination, isSubroutine);
}

/// <summary>
/// Do sampling to choose an algo when the code is big enough.
/// When the code size is small we can use the default analyzer.
/// </summary>
[MethodImpl(MethodImplOptions.NoInlining)]
private JumpDestinationAnalyzer CreateAnalyzer()
void IThreadPoolWorkItem.Execute()
{
return _analyzer = new JumpDestinationAnalyzer(MachineCode);
_analyzer.Execute();
}
}
}
136 changes: 77 additions & 59 deletions src/Nethermind/Nethermind.Evm/CodeAnalysis/JumpDestinationAnalyzer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,53 +4,49 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Threading;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;

namespace Nethermind.Evm.CodeAnalysis
{
public sealed class JumpDestinationAnalyzer : IThreadPoolWorkItem
public sealed class JumpDestinationAnalyzer(ReadOnlyMemory<byte> code)
{
private const int PUSH1 = 0x60;
private const int PUSHx = PUSH1 - 1;
private const int PUSH32 = 0x7f;
private const int JUMPDEST = 0x5b;
private const int BEGINSUB = 0x5c;
private const int BitShiftPerInt32 = 5;
private const int BitShiftPerInt64 = 6;

private int[]? _jumpDestBitmap;
public byte[] MachineCode { get; set; }
private readonly static long[]? _emptyJumpDestBitmap = new long[1];
private long[]? _jumpDestBitmap = code.Length == 0 ? _emptyJumpDestBitmap : null;

public JumpDestinationAnalyzer(byte[] code)
{
// Store the code refence as the JumpDest analysis is lazy
// and not performed until first jump.
MachineCode = code;

// Start generating the JumpDestinationBitmap in background.
ThreadPool.UnsafeQueueUserWorkItem(this, preferLocal: false);
}
public ReadOnlyMemory<byte> MachineCode { get; } = code;

public bool ValidateJump(int destination, bool isSubroutine)
{
// Take array ref to local so Jit knows its size won't change in the method.
byte[] machineCode = MachineCode;
ReadOnlySpan<byte> machineCode = MachineCode.Span;
_jumpDestBitmap ??= CreateJumpDestinationBitmap(machineCode);

var result = false;
// Cast to uint to change negative numbers to very int high numbers
// Then do length check, this both reduces check by 1 and eliminates the bounds
// check from accessing the array.
if ((uint)destination < (uint)machineCode.Length &&
IsJumpDestination(_jumpDestBitmap, destination))
if ((uint)destination < (uint)machineCode.Length)
{
// Store byte to int, as less expensive operations at word size
int codeByte = machineCode[destination];
if (isSubroutine)
if (IsJumpDestination(_jumpDestBitmap, destination))
{
result = codeByte == BEGINSUB;
}
else
{
result = codeByte == JUMPDEST;
if (isSubroutine)
{
result = codeByte == BEGINSUB;
}
else
{
result = codeByte == JUMPDEST;
}
}
}

Expand All @@ -59,58 +55,87 @@ public bool ValidateJump(int destination, bool isSubroutine)

/// <summary>
/// Used for conversion between different representations of bit array.
/// Returns (n + (32 - 1)) / 32, rearranged to avoid arithmetic overflow.
/// Returns (n + (64 - 1)) / 64, rearranged to avoid arithmetic overflow.
/// For example, in the bit to int case, the straightforward calc would
/// be (n + 31) / 32, but that would cause overflow. So instead it's
/// rearranged to ((n - 1) / 32) + 1.
/// be (n + 63) / 64, but that would cause overflow. So instead it's
/// rearranged to ((n - 1) / 64) + 1.
/// Due to sign extension, we don't need to special case for n == 0, if we use
/// bitwise operations (since ((n - 1) >> 5) + 1 = 0).
/// This doesn't hold true for ((n - 1) / 32) + 1, which equals 1.
/// bitwise operations (since ((n - 1) >> 6) + 1 = 0).
/// This doesn't hold true for ((n - 1) / 64) + 1, which equals 1.
///
/// Usage:
/// GetInt32ArrayLengthFromBitLength(77): returns how many ints must be
/// allocated to store 77 bits.
/// </summary>
/// <param name="n"></param>
/// <returns>how many ints are required to store n bytes</returns>
private static int GetInt32ArrayLengthFromBitLength(int n)
private static int GetInt64ArrayLengthFromBitLength(int n)
{
return (int)((uint)(n - 1 + (1 << BitShiftPerInt32)) >> BitShiftPerInt32);
return (int)((uint)(n - 1 + (1 << BitShiftPerInt64)) >> BitShiftPerInt64);
}

/// <summary>
/// Collects data locations in code.
/// An unset bit means the byte is an opcode, a set bit means it's data.
/// </summary>
private static int[] CreateJumpDestinationBitmap(byte[] code)
private static long[] CreateJumpDestinationBitmap(ReadOnlySpan<byte> code)
{
int[] jumpDestBitmap = new int[GetInt32ArrayLengthFromBitLength(code.Length)];
long[] jumpDestBitmap = new long[GetInt64ArrayLengthFromBitLength(code.Length)];

int pc = 0;
long flags = 0;
while (true)
{
// Since we are using a non-standard for loop here;
// changing to while(true) plus below if check elides
// the bounds check from the following code array access.
if ((uint)pc >= (uint)code.Length) break;
int move = 1;
if (Avx2.IsSupported && (pc & 0x1f) == 0 && pc < code.Length - Vector256<sbyte>.Count)
benaadams marked this conversation as resolved.
Show resolved Hide resolved
{
Vector256<sbyte> data = Unsafe.As<byte, Vector256<sbyte>>(ref Unsafe.AddByteOffset(ref MemoryMarshal.GetReference(code), pc));
Vector256<sbyte> compare = Avx2.CompareGreaterThan(data, Vector256.Create((sbyte)PUSHx));
if (compare == default)
{
Vector256<sbyte> dest = Avx2.CompareEqual(data, Vector256.Create((sbyte)JUMPDEST));
Vector256<sbyte> sub = Avx2.CompareEqual(data, Vector256.Create((sbyte)BEGINSUB));
benaadams marked this conversation as resolved.
Show resolved Hide resolved
Vector256<sbyte> combined = Avx2.Or(dest, sub);
flags |= (long)Avx2.MoveMask(combined) << (pc & 0x20);
move = Vector256<sbyte>.Count;
goto Next;
}
benaadams marked this conversation as resolved.
Show resolved Hide resolved
}

// Grab the instruction from the code.
int op = code[pc];
int op = Unsafe.Add(ref MemoryMarshal.GetReference(code), pc);

if (op >= PUSH1 && op <= PUSH32)
if ((uint)op - JUMPDEST <= BEGINSUB - JUMPDEST)
{
// Accumulate JumpDest to register
flags |= 1L << pc;
}
else if ((uint)op - PUSH1 <= PUSH32 - PUSH1)
{
// Skip forward amount of data the push represents
// don't need to analyse data for JumpDests
pc += op - PUSH1 + 1;
move = op - PUSH1 + 2;
}
else if (op == JUMPDEST || op == BEGINSUB)
Next:
int next = pc + move;
bool exit = next >= code.Length;
if ((pc & 0x3F) + move > 0x3f || exit)
benaadams marked this conversation as resolved.
Show resolved Hide resolved
{
// Exact type will be checked again by ValidateJump
MarkAsJumpDestination(jumpDestBitmap, pc);
if (flags != 0)
{
// Moving to next array element (or finishing) assign to array.
MarkJumpDestinations(jumpDestBitmap, pc, flags);
flags = 0;
}
}

if (exit)
{
break;
}

// Next instruction
pc++;
pc = next;
}

return jumpDestBitmap;
Expand All @@ -119,33 +144,26 @@ private static int[] CreateJumpDestinationBitmap(byte[] code)
/// <summary>
/// Checks if the position is in a code segment.
/// </summary>
private static bool IsJumpDestination(int[] bitvec, int pos)
private static bool IsJumpDestination(long[] bitvec, int pos)
{
int vecIndex = pos >> BitShiftPerInt32;
int vecIndex = pos >> BitShiftPerInt64;
// Check if in bounds, Jit will add slightly more expensive exception throwing check if we don't
if ((uint)vecIndex >= (uint)bitvec.Length) return false;

return (bitvec[vecIndex] & (1 << pos)) != 0;
return (bitvec[vecIndex] & (1L << pos)) != 0;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void MarkAsJumpDestination(int[] bitvec, int pos)
private static void MarkJumpDestinations(long[] bitvec, int pos, long flags)
{
int vecIndex = pos >> BitShiftPerInt32;
Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(bitvec), vecIndex)
|= 1 << pos;
uint offset = (uint)pos >> BitShiftPerInt64;
Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(bitvec), offset)
|= flags;
}

void IThreadPoolWorkItem.Execute()
public void Execute()
{
if (_jumpDestBitmap is null)
{
var jumpDestBitmap = CreateJumpDestinationBitmap(MachineCode);
// Atomically assign if still null. Aren't really any thread safety issues here,
// as will be same result. Just keep first one we created; as Evm will have started
// using it if already created and let this one be Gen0 GC'd.
Interlocked.CompareExchange(ref _jumpDestBitmap, jumpDestBitmap, null);
}
_jumpDestBitmap ??= CreateJumpDestinationBitmap(MachineCode.Span);
}
}
}
Loading