Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 132 additions & 2 deletions src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,43 @@ internal static class LossyUtils

// Note: method name in libwebp reference implementation is called VP8SSE16x16.
[MethodImpl(InliningOptions.ShortMethod)]
public static int Vp8_Sse16X16(Span<byte> a, Span<byte> b) => Vp8_SseNxN(a, b, 16, 16);
public static int Vp8_Sse16X16(Span<byte> a, Span<byte> b)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
return Vp8_Sse16xN_Avx2(a, b, 4);
}

if (Sse2.IsSupported)
{
return Vp8_Sse16xN_Sse2(a, b, 8);
}
#endif
{
return Vp8_SseNxN(a, b, 16, 16);
}
}

// Note: method name in libwebp reference implementation is called VP8SSE16x8.
[MethodImpl(InliningOptions.ShortMethod)]
public static int Vp8_Sse16X8(Span<byte> a, Span<byte> b) => Vp8_SseNxN(a, b, 16, 8);
public static int Vp8_Sse16X8(Span<byte> a, Span<byte> b)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
return Vp8_Sse16xN_Avx2(a, b, 2);
}

if (Sse2.IsSupported)
{
return Vp8_Sse16xN_Sse2(a, b, 4);
}
#endif
{
return Vp8_SseNxN(a, b, 16, 8);
}
}

// Note: method name in libwebp reference implementation is called VP8SSE4x4.
[MethodImpl(InliningOptions.ShortMethod)]
Expand Down Expand Up @@ -146,6 +178,104 @@ public static int Vp8_SseNxN(Span<byte> a, Span<byte> b, int w, int h)
return count;
}

#if SUPPORTS_RUNTIME_INTRINSICS
[MethodImpl(InliningOptions.ShortMethod)]
private static int Vp8_Sse16xN_Sse2(Span<byte> a, Span<byte> b, int numPairs)
{
Vector128<int> sum = Vector128<int>.Zero;
nint offset = 0;
ref byte aRef = ref MemoryMarshal.GetReference(a);
ref byte bRef = ref MemoryMarshal.GetReference(b);
for (int i = 0; i < numPairs; i++)
{
// Load values.
Vector128<byte> a0 = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref aRef, offset));
Vector128<byte> b0 = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref bRef, offset));
Vector128<byte> a1 = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref aRef, offset + WebpConstants.Bps));
Vector128<byte> b1 = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref bRef, offset + WebpConstants.Bps));

Vector128<int> sum1 = SubtractAndAccumulate(a0, b0);
Vector128<int> sum2 = SubtractAndAccumulate(a1, b1);
sum = Sse2.Add(sum, Sse2.Add(sum1, sum2));

offset += 2 * WebpConstants.Bps;
}

return Numerics.ReduceSum(sum);
}

[MethodImpl(InliningOptions.ShortMethod)]
private static int Vp8_Sse16xN_Avx2(Span<byte> a, Span<byte> b, int numPairs)
{
Vector256<int> sum = Vector256<int>.Zero;
nint offset = 0;
ref byte aRef = ref MemoryMarshal.GetReference(a);
ref byte bRef = ref MemoryMarshal.GetReference(b);
for (int i = 0; i < numPairs; i++)
{
// Load values.
var a0 = Vector256.Create(
Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref aRef, offset)),
Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref aRef, offset + WebpConstants.Bps)));
var b0 = Vector256.Create(
Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref bRef, offset)),
Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref bRef, offset + WebpConstants.Bps)));
var a1 = Vector256.Create(
Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref aRef, offset + (2 * WebpConstants.Bps))),
Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref aRef, offset + (3 * WebpConstants.Bps))));
var b1 = Vector256.Create(
Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref bRef, offset + (2 * WebpConstants.Bps))),
Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref bRef, offset + (3 * WebpConstants.Bps))));

Vector256<int> sum1 = SubtractAndAccumulate(a0, b0);
Vector256<int> sum2 = SubtractAndAccumulate(a1, b1);
sum = Avx2.Add(sum, Avx2.Add(sum1, sum2));

offset += 4 * WebpConstants.Bps;
}

return Numerics.ReduceSum(sum);
}

[MethodImpl(InliningOptions.ShortMethod)]
private static Vector128<int> SubtractAndAccumulate(Vector128<byte> a, Vector128<byte> b)
{
// Take abs(a-b) in 8b.
Vector128<byte> ab = Sse2.SubtractSaturate(a, b);
Vector128<byte> ba = Sse2.SubtractSaturate(b, a);
Vector128<byte> absAb = Sse2.Or(ab, ba);

// Zero-extend to 16b.
Vector128<byte> c0 = Sse2.UnpackLow(absAb, Vector128<byte>.Zero);
Vector128<byte> c1 = Sse2.UnpackHigh(absAb, Vector128<byte>.Zero);

// Multiply with self.
Vector128<int> sum1 = Sse2.MultiplyAddAdjacent(c0.AsInt16(), c0.AsInt16());
Vector128<int> sum2 = Sse2.MultiplyAddAdjacent(c1.AsInt16(), c1.AsInt16());

return Sse2.Add(sum1, sum2);
}

[MethodImpl(InliningOptions.ShortMethod)]
private static Vector256<int> SubtractAndAccumulate(Vector256<byte> a, Vector256<byte> b)
{
// Take abs(a-b) in 8b.
Vector256<byte> ab = Avx2.SubtractSaturate(a, b);
Vector256<byte> ba = Avx2.SubtractSaturate(b, a);
Vector256<byte> absAb = Avx2.Or(ab, ba);

// Zero-extend to 16b.
Vector256<byte> c0 = Avx2.UnpackLow(absAb, Vector256<byte>.Zero);
Vector256<byte> c1 = Avx2.UnpackHigh(absAb, Vector256<byte>.Zero);

// Multiply with self.
Vector256<int> sum1 = Avx2.MultiplyAddAdjacent(c0.AsInt16(), c0.AsInt16());
Vector256<int> sum2 = Avx2.MultiplyAddAdjacent(c1.AsInt16(), c1.AsInt16());

return Avx2.Add(sum1, sum2);
}
#endif

[MethodImpl(InliningOptions.ShortMethod)]
public static void Vp8Copy4X4(Span<byte> src, Span<byte> dst) => Copy(src, dst, 4, 4);

Expand Down
142 changes: 142 additions & 0 deletions tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,124 @@ private static void RunTransformOneTest()
Assert.True(expected.SequenceEqual(dst));
}

private static void RunVp8Sse16X16Test()
{
// arrange
byte[] a =
{
154, 154, 151, 151, 149, 148, 151, 157, 163, 163, 154, 132, 102, 98, 104, 108, 107, 104, 104, 103,
101, 106, 123, 119, 170, 171, 172, 171, 168, 175, 171, 173, 151, 151, 149, 150, 147, 147, 146, 159,
164, 165, 154, 129, 92, 90, 101, 105, 104, 103, 104, 101, 100, 105, 123, 117, 172, 172, 172, 168,
170, 177, 170, 175, 151, 149, 150, 150, 147, 147, 156, 161, 161, 161, 151, 126, 93, 90, 102, 107,
104, 103, 104, 101, 104, 104, 122, 117, 172, 172, 170, 168, 170, 177, 172, 175, 150, 149, 152, 151,
148, 151, 160, 159, 157, 157, 148, 133, 96, 90, 103, 107, 104, 104, 101, 100, 102, 102, 121, 117,
170, 170, 169, 171, 171, 179, 173, 175, 149, 151, 152, 151, 148, 154, 162, 157, 154, 154, 151, 132,
92, 89, 101, 108, 104, 102, 101, 101, 103, 103, 123, 118, 171, 168, 177, 173, 171, 178, 172, 176,
152, 152, 152, 151, 154, 162, 161, 155, 149, 157, 156, 129, 92, 87, 101, 107, 102, 100, 107, 100,
101, 102, 123, 118, 170, 175, 182, 172, 171, 179, 173, 175, 152, 151, 154, 155, 160, 162, 161, 153,
150, 156, 153, 129, 92, 91, 102, 106, 100, 109, 115, 99, 101, 102, 124, 120, 171, 179, 178, 172,
171, 181, 171, 173, 154, 154, 154, 162, 160, 158, 156, 152, 153, 157, 151, 128, 86, 86, 102, 105,
102, 122, 114, 99, 101, 102, 125, 120, 178, 173, 177, 172, 171, 180, 172, 173, 154, 152, 158, 163,
150, 148, 148, 156, 151, 158, 152, 129, 87, 87, 101, 105, 204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204, 154, 151, 165, 156, 141, 137, 146, 158, 152, 159, 152, 133,
90, 88, 99, 106, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204,
154, 160, 164, 150, 126, 127, 149, 159, 155, 161, 153, 131, 84, 86, 97, 103, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 157, 167, 157, 137, 102, 128, 155, 161,
157, 159, 154, 134, 84, 82, 97, 102, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 163, 163, 150, 113, 78, 132, 156, 162, 159, 160, 154, 132, 83, 78, 91, 97, 204,
204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 163, 157, 137, 80, 78,
131, 154, 163, 157, 159, 149, 131, 82, 77, 94, 100, 204, 204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 159, 151, 108, 72, 88, 132, 156, 162, 159, 157, 151, 130, 79, 78,
95, 102, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 151, 130,
82, 82, 89, 134, 154, 161, 161, 157, 152, 129, 81, 77, 95, 102, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204, 204
};

byte[] b =
{
150, 150, 150, 150, 146, 149, 152, 154, 164, 166, 154, 132, 99, 92, 106, 112, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 150, 150, 150, 150, 146, 149, 152, 154,
161, 164, 151, 130, 93, 86, 100, 106, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 150, 150, 150, 150, 146, 149, 152, 154, 158, 161, 148, 127, 93, 86, 100, 106,
204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 150, 150, 150, 150,
146, 149, 152, 154, 156, 159, 146, 125, 99, 92, 106, 112, 204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204, 148, 148, 148, 148, 149, 158, 162, 159, 155, 155, 153, 129,
94, 87, 101, 106, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204,
151, 151, 151, 151, 152, 159, 161, 156, 155, 155, 153, 129, 94, 87, 101, 106, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 154, 154, 154, 154, 156, 161, 159, 152,
155, 155, 153, 129, 94, 87, 101, 106, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 156, 156, 156, 156, 159, 162, 158, 149, 155, 155, 153, 129, 94, 87, 101, 106,
204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 152, 153, 157, 162,
150, 149, 149, 151, 155, 160, 150, 131, 91, 90, 104, 104, 204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204, 152, 156, 158, 157, 140, 137, 145, 159, 155, 160, 150, 131,
89, 88, 102, 101, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204,
153, 161, 160, 149, 118, 128, 147, 162, 155, 160, 150, 131, 86, 85, 99, 98, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 154, 165, 161, 144, 96, 128, 154, 159, 155,
160, 150, 131, 83, 82, 97, 96, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 161, 160, 149, 105, 78, 127, 156, 170, 156, 156, 154, 130, 81, 77, 95, 102, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 160, 160, 133, 85, 81, 129, 155,
167, 156, 156, 154, 130, 81, 77, 95, 102, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 156, 147, 109, 76, 85, 130, 153, 163, 156, 156, 154, 130, 81, 77, 95, 102,
204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 152, 128, 87, 83,
88, 132, 152, 159, 156, 156, 154, 130, 81, 77, 95, 102, 204, 204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204
};

int expected = 2063;

// act
int actual = LossyUtils.Vp8_Sse16X16(a, b);

// assert
Assert.Equal(expected, actual);
}

private static void RunVp8Sse16X8Test()
{
// arrange
byte[] a =
{
107, 104, 104, 103, 101, 106, 123, 119, 170, 171, 172, 171, 168, 175, 171, 173, 151, 151, 149, 150,
147, 147, 146, 159, 164, 165, 154, 129, 92, 90, 101, 105, 104, 103, 104, 101, 100, 105, 123, 117,
172, 172, 172, 168, 170, 177, 170, 175, 151, 149, 150, 150, 147, 147, 156, 161, 161, 161, 151, 126,
93, 90, 102, 107, 104, 103, 104, 101, 104, 104, 122, 117, 172, 172, 170, 168, 170, 177, 172, 175,
150, 149, 152, 151, 148, 151, 160, 159, 157, 157, 148, 133, 96, 90, 103, 107, 104, 104, 101, 100,
102, 102, 121, 117, 170, 170, 169, 171, 171, 179, 173, 175, 149, 151, 152, 151, 148, 154, 162, 157,
154, 154, 151, 132, 92, 89, 101, 108, 104, 102, 101, 101, 103, 103, 123, 118, 171, 168, 177, 173,
171, 178, 172, 176, 152, 152, 152, 151, 154, 162, 161, 155, 149, 157, 156, 129, 92, 87, 101, 107,
102, 100, 107, 100, 101, 102, 123, 118, 170, 175, 182, 172, 171, 179, 173, 175, 152, 151, 154, 155,
160, 162, 161, 153, 150, 156, 153, 129, 92, 91, 102, 106, 100, 109, 115, 99, 101, 102, 124, 120,
171, 179, 178, 172, 171, 181, 171, 173, 154, 154, 154, 162, 160, 158, 156, 152, 153, 157, 151, 128,
86, 86, 102, 105, 102, 122, 114, 99, 101, 102, 125, 120, 178, 173, 177, 172, 171, 180, 172, 173,
154, 152, 158, 163, 150, 148, 148, 156, 151, 158, 152, 129, 87, 87, 101, 105
};

byte[] b =
{
103, 103, 103, 103, 101, 106, 122, 114, 171, 171, 171, 171, 171, 177, 169, 175, 150, 150, 150, 150,
146, 149, 152, 154, 161, 164, 151, 130, 93, 86, 100, 106, 103, 103, 103, 103, 101, 106, 122, 114,
171, 171, 171, 171, 171, 177, 169, 175, 150, 150, 150, 150, 146, 149, 152, 154, 158, 161, 148, 127,
93, 86, 100, 106, 103, 103, 103, 103, 101, 106, 122, 114, 171, 171, 171, 171, 171, 177, 169, 175,
150, 150, 150, 150, 146, 149, 152, 154, 156, 159, 146, 125, 99, 92, 106, 112, 103, 103, 103, 103,
101, 106, 122, 114, 171, 171, 171, 171, 171, 177, 169, 175, 148, 148, 148, 148, 149, 158, 162, 159,
155, 155, 153, 129, 94, 87, 101, 106, 102, 100, 100, 102, 100, 101, 120, 122, 170, 176, 176, 170,
174, 180, 171, 177, 151, 151, 151, 151, 152, 159, 161, 156, 155, 155, 153, 129, 94, 87, 101, 106,
102, 105, 105, 102, 100, 101, 120, 122, 170, 176, 176, 170, 174, 180, 171, 177, 154, 154, 154, 154,
156, 161, 159, 152, 155, 155, 153, 129, 94, 87, 101, 106, 102, 112, 112, 102, 100, 101, 120, 122,
170, 176, 176, 170, 174, 180, 171, 177, 156, 156, 156, 156, 159, 162, 158, 149, 155, 155, 153, 129,
94, 87, 101, 106, 102, 117, 117, 102, 100, 101, 120, 122, 170, 176, 176, 170, 174, 180, 171, 177,
152, 153, 157, 162, 150, 149, 149, 151, 155, 160, 150, 131, 91, 90, 104, 104
};

int expected = 749;

// act
int actual = LossyUtils.Vp8_Sse16X8(a, b);

// assert
Assert.Equal(expected, actual);
}

private static void RunVp8Sse4X4Test()
{
// arrange
Expand Down Expand Up @@ -168,6 +286,12 @@ private static void RunHadamardTransformTest()
[Fact]
public void RunTransformOne_Works() => RunTransformOneTest();

[Fact]
public void Vp8Sse16X16_Works() => RunVp8Sse16X16Test();

[Fact]
public void Vp8Sse16X8_Works() => RunVp8Sse16X8Test();

[Fact]
public void Vp8Sse4X4_Works() => RunVp8Sse4X4Test();

Expand All @@ -190,6 +314,24 @@ private static void RunHadamardTransformTest()
[Fact]
public void TransformOne_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformOneTest, HwIntrinsics.DisableHWIntrinsic);

[Fact]
public void Vp8Sse16X16_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse16X16Test, HwIntrinsics.AllowAll);

[Fact]
public void Vp8Sse16X16_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse16X16Test, HwIntrinsics.DisableSSE2);

[Fact]
public void Vp8Sse16X16_WithoutAVX2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse16X16Test, HwIntrinsics.DisableAVX2);

[Fact]
public void Vp8Sse16X8_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse16X8Test, HwIntrinsics.AllowAll);

[Fact]
public void Vp8Sse16X8_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse16X8Test, HwIntrinsics.DisableSSE2);

[Fact]
public void Vp8Sse16X8_WithoutAVX2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse16X8Test, HwIntrinsics.DisableAVX2);

[Fact]
public void Vp8Sse4X4_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse4X4Test, HwIntrinsics.AllowAll);

Expand Down