diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs index ba5c588ca5..fa0af823d5 100644 --- a/src/ImageSharp/Common/Helpers/Numerics.cs +++ b/src/ImageSharp/Common/Helpers/Numerics.cs @@ -820,6 +820,26 @@ public static int ReduceSum(Vector128 accumulator) } } + /// + /// Reduces elements of the vector into one sum. + /// + /// The accumulator to reduce. + /// The sum of all elements. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int ReduceSum(Vector256 accumulator) + { + // Add upper lane to lower lane. + Vector128 vsum = Sse2.Add(accumulator.GetLower(), accumulator.GetUpper()); + + // Add odd to even. + vsum = Sse2.Add(vsum, Sse2.Shuffle(vsum, 0b_11_11_01_01)); + + // Add high to low. + vsum = Sse2.Add(vsum, Sse2.Shuffle(vsum, 0b_11_10_11_10)); + + return Sse2.ConvertToInt32(vsum); + } + /// /// Reduces even elements of the vector into one sum. /// diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 0ed180a184..ebb198a2d8 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -2,6 +2,7 @@ // Licensed under the Apache License, Version 2.0. using System; +using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using SixLabors.ImageSharp.Memory; @@ -761,28 +762,184 @@ public static void BundleColorMap(Span row, int width, int xBits, SpanShanon entropy. public static float CombinedShannonEntropy(Span x, Span y) { - double retVal = 0.0d; - uint sumX = 0, sumXY = 0; - for (int i = 0; i < 256; i++) +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported) { - uint xi = (uint)x[i]; - if (xi != 0) + double retVal = 0.0d; + Vector256 tmp = Vector256.Zero; // has the size of the scratch space of sizeof(int) * 8 + ref int xRef = ref MemoryMarshal.GetReference(x); + ref int yRef = ref MemoryMarshal.GetReference(y); + Vector256 sumXY256 = Vector256.Zero; + Vector256 sumX256 = Vector256.Zero; + ref int tmpRef = ref Unsafe.As, int>(ref tmp); + for (nint i = 0; i < 256; i += 8) { - uint xy = xi + (uint)y[i]; - sumX += xi; - retVal -= FastSLog2(xi); - sumXY += xy; - retVal -= FastSLog2(xy); + Vector256 xVec = Unsafe.As>(ref Unsafe.Add(ref xRef, i)); + Vector256 yVec = Unsafe.As>(ref Unsafe.Add(ref yRef, i)); + + // Check if any X is non-zero: this actually provides a speedup as X is usually sparse. + int mask = Avx2.MoveMask(Avx2.CompareEqual(xVec, Vector256.Zero).AsByte()); + if (mask != -1) + { + Vector256 xy256 = Avx2.Add(xVec, yVec); + sumXY256 = Avx2.Add(sumXY256, xy256); + sumX256 = Avx2.Add(sumX256, xVec); + + // Analyze the different X + Y. + Unsafe.As>(ref tmpRef) = xy256; + if (tmpRef != 0) + { + retVal -= FastSLog2((uint)tmpRef); + if (Unsafe.Add(ref xRef, i) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i)); + } + } + + if (Unsafe.Add(ref tmpRef, 1) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 1)); + if (Unsafe.Add(ref xRef, i + 1) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 1)); + } + } + + if (Unsafe.Add(ref tmpRef, 2) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 2)); + if (Unsafe.Add(ref xRef, i + 2) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 2)); + } + } + + if (Unsafe.Add(ref tmpRef, 3) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 3)); + if (Unsafe.Add(ref xRef, i + 3) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 3)); + } + } + + if (Unsafe.Add(ref tmpRef, 4) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 4)); + if (Unsafe.Add(ref xRef, i + 4) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 4)); + } + } + + if (Unsafe.Add(ref tmpRef, 5) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 5)); + if (Unsafe.Add(ref xRef, i + 5) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 5)); + } + } + + if (Unsafe.Add(ref tmpRef, 6) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 6)); + if (Unsafe.Add(ref xRef, i + 6) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 6)); + } + } + + if (Unsafe.Add(ref tmpRef, 7) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 7)); + if (Unsafe.Add(ref xRef, i + 7) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 7)); + } + } + } + else + { + // X is fully 0, so only deal with Y. + sumXY256 = Avx2.Add(sumXY256, yVec); + + if (Unsafe.Add(ref yRef, i) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i)); + } + + if (Unsafe.Add(ref yRef, i + 1) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 1)); + } + + if (Unsafe.Add(ref yRef, i + 2) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 2)); + } + + if (Unsafe.Add(ref yRef, i + 3) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 3)); + } + + if (Unsafe.Add(ref yRef, i + 4) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 4)); + } + + if (Unsafe.Add(ref yRef, i + 5) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 5)); + } + + if (Unsafe.Add(ref yRef, i + 6) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 6)); + } + + if (Unsafe.Add(ref yRef, i + 7) != 0) + { + retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 7)); + } + } } - else if (y[i] != 0) + + // Sum up sumX256 to get sumX and sum up sumXY256 to get sumXY. + int sumX = Numerics.ReduceSum(sumX256); + int sumXY = Numerics.ReduceSum(sumXY256); + + retVal += FastSLog2((uint)sumX) + FastSLog2((uint)sumXY); + + return (float)retVal; + } + else +#endif + { + double retVal = 0.0d; + uint sumX = 0, sumXY = 0; + for (int i = 0; i < 256; i++) { - sumXY += (uint)y[i]; - retVal -= FastSLog2((uint)y[i]); + uint xi = (uint)x[i]; + if (xi != 0) + { + uint xy = xi + (uint)y[i]; + sumX += xi; + retVal -= FastSLog2(xi); + sumXY += xy; + retVal -= FastSLog2(xy); + } + else if (y[i] != 0) + { + sumXY += (uint)y[i]; + retVal -= FastSLog2((uint)y[i]); + } } - } - retVal += FastSLog2(sumX) + FastSLog2(sumXY); - return (float)retVal; + retVal += FastSLog2(sumX) + FastSLog2(sumXY); + return (float)retVal; + } } [MethodImpl(InliningOptions.ShortMethod)] @@ -838,6 +995,7 @@ public static void ColorCodeToMultipliers(uint colorCode, ref Vp8LMultipliers m) private static float FastSLog2Slow(uint v) { DebugGuard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v)); + if (v < ApproxLogWithCorrectionMax) { int logCnt = 0; @@ -867,7 +1025,7 @@ private static float FastSLog2Slow(uint v) private static float FastLog2Slow(uint v) { - Guard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v)); + DebugGuard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v)); if (v < ApproxLogWithCorrectionMax) { diff --git a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs index 62e23c1cdf..684d7791bf 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs @@ -10,6 +10,17 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp [Trait("Format", "Webp")] public class LosslessUtilsTests { + private static void RunCombinedShannonEntropyTest() + { + int[] x = { 3, 5, 2, 5, 3, 1, 2, 2, 3, 3, 1, 2, 1, 2, 1, 1, 0, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 1, 1, 0, 0, 2, 1, 1, 0, 3, 1, 2, 3, 2, 3 }; + int[] y = { 11, 12, 8, 3, 4, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 2, 1, 1, 2, 4, 6, 4 }; + float expected = 884.7585f; + + float actual = LosslessUtils.CombinedShannonEntropy(x, y); + + Assert.Equal(expected, actual, 5); + } + private static void RunSubtractGreenTest() { uint[] pixelData = @@ -193,6 +204,9 @@ private static void RunPredictor13Test() } } + [Fact] + public void CombinedShannonEntropy_Works() => RunCombinedShannonEntropyTest(); + [Fact] public void Predictor11_Works() => RunPredictor11Test(); @@ -216,6 +230,12 @@ private static void RunPredictor13Test() #if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void CombinedShannonEntropy_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCombinedShannonEntropyTest, HwIntrinsics.AllowAll); + + [Fact] + public void CombinedShannonEntropy_WithoutAVX2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCombinedShannonEntropyTest, HwIntrinsics.DisableAVX2); + [Fact] public void Predictor11_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.AllowAll); @@ -238,19 +258,19 @@ private static void RunPredictor13Test() public void SubtractGreen_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.AllowAll); [Fact] - public void SubtractGreen_WithoutAvx_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.DisableAVX); + public void SubtractGreen_WithoutAVX2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.DisableAVX2); [Fact] - public void SubtractGreen_WithoutAvxOrSSSE3_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSSE3); + public void SubtractGreen_WithoutAvxOrSSSE3_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSSE3); [Fact] public void AddGreenToBlueAndRed_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunAddGreenToBlueAndRedTest, HwIntrinsics.AllowAll); [Fact] - public void AddGreenToBlueAndRed_WithoutAvx_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunAddGreenToBlueAndRedTest, HwIntrinsics.DisableAVX); + public void AddGreenToBlueAndRed_WithoutAVX2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunAddGreenToBlueAndRedTest, HwIntrinsics.DisableAVX2); [Fact] - public void AddGreenToBlueAndRed_WithoutAvxOrSSSE3_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunAddGreenToBlueAndRedTest, HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE2 | HwIntrinsics.DisableSSSE3); + public void AddGreenToBlueAndRed_WithoutAVX2OrSSSE3_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunAddGreenToBlueAndRedTest, HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE2 | HwIntrinsics.DisableSSSE3); [Fact] public void TransformColor_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorTest, HwIntrinsics.AllowAll);