Skip to content

[API Proposal]: Add non-overflowing widening sum to Vector types #114832

Closed as not planned
@poizan42

Description

@poizan42

Background and motivation

The various Vector types have Sum methods available for efficiently summing elements. However they all return the same type as the element types which means they can't be used if the sum of elements might overflow.

Actually writing efficient non-overflowing sum methods is a lot of work if you want it to be efficient on diverse platforms with different availability of SIMD instructions - the obvious solution of Widen + Sum is generally not the most performant choice.

API Proposal

namespace System.Numerics;

public static class Vector
{
    public static ushort SumWidening(Vector<byte> value);
    public static uint SumWidening(Vector<ushort> value);
    public static ulong SumWidening(Vector<uint> value);
    public static UInt128 SumWidening(Vector<ulong> value);

    public static short SumWidening(Vector<sbyte> value);
    public static int SumWidening(Vector<short> value);
    public static long SumWidening(Vector<int> value);
    public static Int128 SumWidening(Vector<long> value);
}
namespace System.Runtime.Intrinsics;

public static class Vector64
{
    public static ushort SumWidening(Vector64<byte> value);
    public static uint SumWidening(Vector64<ushort> value);
    public static ulong SumWidening(Vector64<uint> value);
    public static UInt128 SumWidening(Vector64<ulong> value);

    public static short SumWidening(Vector64<sbyte> value);
    public static int SumWidening(Vector64<short> value);
    public static long SumWidening(Vector64<int> value);
    public static Int128 SumWidening(Vector64<long> value);
}
namespace System.Runtime.Intrinsics;

public static class Vector128
{
    public static ushort SumWidening(Vector128<byte> value);
    public static uint SumWidening(Vector128<ushort> value);
    public static ulong SumWidening(Vector128<uint> value);
    public static UInt128 SumWidening(Vector128<ulong> value);

    public static short SumWidening(Vector128<sbyte> value);
    public static int SumWidening(Vector128<short> value);
    public static long SumWidening(Vector128<int> value);
    public static Int128 SumWidening(Vector128<long> value);
}
namespace System.Runtime.Intrinsics;

public static class Vector256
{
    public static ushort SumWidening(Vector256<byte> value);
    public static uint SumWidening(Vector256<ushort> value);
    public static ulong SumWidening(Vector256<uint> value);
    public static UInt128 SumWidening(Vector256<ulong> value);

    public static short SumWidening(Vector256<sbyte> value);
    public static int SumWidening(Vector256<short> value);
    public static long SumWidening(Vector256<int> value);
    public static Int128 SumWidening(Vector256<long> value);
}
namespace System.Runtime.Intrinsics;

public static class Vector512
{
    public static ushort SumWidening(Vector512<byte> value);
    public static uint SumWidening(Vector512<ushort> value);
    public static ulong SumWidening(Vector512<uint> value);
    public static UInt128 SumWidening(Vector512<ulong> value);

    public static short SumWidening(Vector512<sbyte> value);
    public static int SumWidening(Vector512<short> value);
    public static long SumWidening(Vector512<int> value);
    public static Int128 SumWidening(Vector512<long> value);
}

API Usage

    public static ushort ConditionalSumByBitSet(uint bitset, Vector256<byte> items)
    {
        Vector256<byte> xbcast = Vector256.Create(bitset).AsByte();

        // Each byte gets the source byte containing the corresponding bit
        Vector256<byte> indices = Vector256.Create(
            0x0000000000000000UL,
            0x0101010101010101UL,
            0x1E1E1E1E1E1E1E1EUL,
            0x1F1F1F1F1F1F1F1FUL).AsByte();
        
        Vector256<byte> shuf = Vector256.Shuffle(xbcast, indices);
        Vector256<byte> andmask = Vector256.Create(0x08040201008040201UL).AsByte();
        Vector256<byte> isolated = Vector256.BitwiseAnd(shuf, andmask);
        Vector256<byte> notSelectedMask = Vector256.Equals(isolated, Vector256<byte>.Zero);
        
        Vector256<byte> selected = Vector256.ConditionalSelect(notSelectedMask, Vector256<byte>.Zero, items);
        return Vector256.SumWidening(selected); // <-- Used here
    }

Alternative Designs

No response

Risks

None known

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions