Skip to content

JIT: wrong result with Avx512BW.BlendVariable #114922

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
BruceForstall opened this issue Apr 22, 2025 · 4 comments
Closed

JIT: wrong result with Avx512BW.BlendVariable #114922

BruceForstall opened this issue Apr 22, 2025 · 4 comments
Assignees
Labels
area-CodeGen-coreclr CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI avx512 Related to the AVX-512 architecture
Milestone

Comments

@BruceForstall
Copy link
Contributor

// Generated by Fuzzlyn v2.5 on 2025-04-22 17:33:23
// Run on X64 Windows
// Seed: 5065841368374928084-vectort,vector128,vector256,vector512,x86aes,x86avx,x86avx2,x86avx512bw,x86avx512bwvl,x86avx512cd,x86avx512cdvl,x86avx512dq,x86avx512dqvl,x86avx512f,x86avx512fvl,x86avx512fx64,x86avx512vbmi,x86avx512vbmivl,x86bmi1,x86bmi1x64,x86bmi2,x86bmi2x64,x86fma,x86lzcnt,x86lzcntx64,x86pclmulqdq,x86popcnt,x86popcntx64,x86sse,x86ssex64,x86sse2,x86sse2x64,x86sse3,x86sse41,x86sse41x64,x86sse42,x86sse42x64,x86ssse3,x86x86base
// Reduced from 351.2 KiB to 1.3 KiB in 00:03:48
// Debug: Outputs <0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>
// Release: Outputs <0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>
using System;
using System.Numerics;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;

public class Program
{
    public static IRuntime s_rt;
    public static ushort s_10;
    public static void Main()
    {
        s_rt = new Runtime();
        Vector512<ushort>[][] vr24 = new Vector512<ushort>[][]
        {
            new Vector512<ushort>[]
            {
                Vector512.Create<ushort>(1)
            }
        };
        var vr25 = (sbyte)0;
        var vr26 = Vector256.CreateScalar(vr25);
        var vr27 = Avx512BW.ConvertToVector512UInt16(vr26);
        var vr28 = Vector512.Create<ushort>(1);
        var vr29 = Vector128.Create<ushort>(s_10);
        var vr30 = Avx512F.InsertVector128(vr28, vr29, 0);
        var vr31 = vr24[0][0];
        var vr32 = Vector512.Create(1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 63696, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0);
        var vr33 = Avx512BW.MultiplyLow(vr31, vr32);
        Vector512<ushort> vr23 = Avx512BW.BlendVariable(vr27, vr30, vr33);
        s_rt.WriteLine(vr23);
    }
}

public interface IRuntime
{
    void WriteLine<T>(T value);
}

public class Runtime : IRuntime
{
    public void WriteLine<T>(T value) => System.Console.WriteLine(value);
}

another case:

// Generated by Fuzzlyn v2.5 on 2025-04-22 17:40:04
// Run on X64 Windows
// Seed: 541182799536624154-vectort,vector128,vector256,vector512,x86aes,x86avx,x86avx2,x86avx512bw,x86avx512bwvl,x86avx512cd,x86avx512cdvl,x86avx512dq,x86avx512dqvl,x86avx512f,x86avx512fvl,x86avx512fx64,x86avx512vbmi,x86avx512vbmivl,x86bmi1,x86bmi1x64,x86bmi2,x86bmi2x64,x86fma,x86lzcnt,x86lzcntx64,x86pclmulqdq,x86popcnt,x86popcntx64,x86sse,x86ssex64,x86sse2,x86sse2x64,x86sse3,x86sse41,x86sse41x64,x86sse42,x86sse42x64,x86ssse3,x86x86base
// Reduced from 69.1 KiB to 1.0 KiB in 00:00:41
// Debug: Outputs <1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>
// Release: Outputs <1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>
using System;
using System.Numerics;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;

public class Program
{
    public static IRuntime s_rt;
    public static void Main()
    {
        s_rt = new Runtime();
        var vr5 = Vector512.Create<sbyte>(1);
        var vr6 = Vector512.Create<sbyte>(0);
        var vr7 = Vector128.Create(0, 1, 0, 1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1);
        var vr8 = Avx512F.InsertVector128(vr6, vr7, 0);
        var vr9 = Vector512.Create(0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
        Vector512<sbyte> vr10 = Avx512BW.BlendVariable(vr5, vr8, vr9);
        s_rt.WriteLine(vr10);
    }
}

public interface IRuntime
{
    void WriteLine<T>(T value);
}

public class Runtime : IRuntime
{
    public void WriteLine<T>(T value) => System.Console.WriteLine(value);
}

another case:

// Generated by Fuzzlyn v2.5 on 2025-04-22 17:40:46
// Run on X64 Windows
// Seed: 5475524594276290114-vectort,vector128,vector256,vector512,x86aes,x86avx,x86avx2,x86avx512bw,x86avx512bwvl,x86avx512cd,x86avx512cdvl,x86avx512dq,x86avx512dqvl,x86avx512f,x86avx512fvl,x86avx512fx64,x86avx512vbmi,x86avx512vbmivl,x86bmi1,x86bmi1x64,x86bmi2,x86bmi2x64,x86fma,x86lzcnt,x86lzcntx64,x86pclmulqdq,x86popcnt,x86popcntx64,x86sse,x86ssex64,x86sse2,x86sse2x64,x86sse3,x86sse41,x86sse41x64,x86sse42,x86sse42x64,x86ssse3,x86x86base
// Reduced from 34.9 KiB to 0.9 KiB in 00:00:32
// Debug: Outputs <-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>
// Release: Outputs <-1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>
using System;
using System.Numerics;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;

public struct S1
{
    public sbyte F5;
    public sbyte F8;
    public S1(sbyte f5, sbyte f8) : this()
    {
        F5 = f5;
        F8 = f8;
    }
}

public class Program
{
    public static S1[,, ][] s_1 = new S1[,, ][]
    {
        {
            {
                new S1[]
                {
                    new S1(-1, 1)
                }
            }
        }
    };
    public static void Main()
    {
        var vr5 = s_1[0, 0, 0][0].F5;
        Vector512<sbyte> vr6 = Vector512.CreateScalar(vr5);
        var vr7 = s_1[0, 0, 0][0].F8;
        var vr8 = Vector512.Create<sbyte>(vr7);
        var vr9 = s_1[0, 0, 0][0].F8;
        var vr10 = Vector128.Create<sbyte>(vr9);
        vr6 = Avx512BW.BlendVariable(vr8, Avx512F.InsertVector128(vr6, vr10, 1), vr6);
        System.Console.WriteLine(vr6);
    }
}

Possible dup of #114921

cc @dotnet/jit-contrib @dotnet/intel

@BruceForstall BruceForstall added area-CodeGen-coreclr CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI avx512 Related to the AVX-512 architecture labels Apr 22, 2025
@BruceForstall BruceForstall added this to the 10.0.0 milestone Apr 22, 2025
Copy link
Contributor

Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch
See info in area-owners.md if you want to be subscribed.

@BruceForstall BruceForstall self-assigned this Apr 22, 2025
@BruceForstall
Copy link
Contributor Author

Another case:

// Generated by Fuzzlyn v2.7 on 2025-04-23 21:46:30
// Run on X64 Windows
// Seed: 8647346386771555139-vectort,vector128,vector256,vector512,x86aes,x86avx,x86avx2,x86avx512bw,x86avx512bwvl,x86avx512cd,x86avx512cdvl,x86avx512dq,x86avx512dqvl,x86avx512f,x86avx512fvl,x86avx512fx64,x86avx512vbmi,x86avx512vbmivl,x86bmi1,x86bmi1x64,x86bmi2,x86bmi2x64,x86fma,x86lzcnt,x86lzcntx64,x86pclmulqdq,x86popcnt,x86popcntx64,x86sse,x86ssex64,x86sse2,x86sse2x64,x86sse3,x86sse41,x86sse41x64,x86sse42,x86sse42x64,x86ssse3,x86x86base
// Reduced from 273.0 KiB to 1.1 KiB in 00:00:52
// Debug: Outputs <0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>
// Release: Outputs <0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>
using System;
using System.Numerics;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;

public class Program
{
    public static IRuntime s_rt;
    public static void Main()
    {
        s_rt = new Runtime();
        var vr13 = Vector128.Create<byte>(1);
        var vr14 = Sse41.Extract(vr13, 0);
        var vr15 = Vector512.Create<short>(vr14);
        var vr16 = (short)0;
        var vr17 = Vector512.CreateScalar(vr16);
        var vr18 = Vector128.Create<short>(0);
        var vr19 = Vector128.Create(0, 0, 0, 0, 0, 0, 0, 0);
        var vr20 = Sse41.Blend(vr18, vr19, 0);
        var vr21 = Avx512F.InsertVector128(vr17, vr20, 0);
        var vr22 = (short)-1;
        var vr23 = Vector512.CreateScalar(vr22);
        var vr24 = Vector512.Create<short>(0);
        var vr25 = Avx512BW.AddSaturate(vr23, vr24);
        Vector512<short> vr26 = Avx512BW.BlendVariable(vr15, vr21, vr25);
        s_rt.WriteLine(vr26);
    }
}

public interface IRuntime
{
    void WriteLine<T>(T value);
}

public class Runtime : IRuntime
{
    public void WriteLine<T>(T value) => System.Console.WriteLine(value);
}

@BruceForstall
Copy link
Contributor Author

More:

// Generated by Fuzzlyn v2.7 on 2025-04-23 22:44:42
// Run on X64 Windows
// Seed: 1593219699638934932-vectort,vector128,vector256,vector512,x86aes,x86avx,x86avx2,x86avx512bw,x86avx512bwvl,x86avx512cd,x86avx512cdvl,x86avx512dq,x86avx512dqvl,x86avx512f,x86avx512fvl,x86avx512fx64,x86avx512vbmi,x86avx512vbmivl,x86bmi1,x86bmi1x64,x86bmi2,x86bmi2x64,x86fma,x86lzcnt,x86lzcntx64,x86pclmulqdq,x86popcnt,x86popcntx64,x86sse,x86ssex64,x86sse2,x86sse2x64,x86sse3,x86sse41,x86sse41x64,x86sse42,x86sse42x64,x86ssse3,x86x86base
// Reduced from 121.4 KiB to 1.0 KiB in 00:00:50
// Debug: Outputs <7434824043319439063, 0, 0, 0, 0, 0, 0, 0>
// Release: Outputs <7434824043319439063, 0, 0, 0, 1, 1, 1, 1>
using System;
using System.Numerics;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;

public class Program
{
    public static IRuntime s_rt;
    public static sbyte s_4;
    public static int[][] s_6 = new int[][]
    {
        new int[]
        {
            1
        }
    };
    public static void Main()
    {
        s_rt = new Runtime();
        int vr9 = s_6[0][0];
        var vr10 = Vector512.Create<long>(1);
        var vr11 = Vector512.Create<long>(0);
        var vr12 = Vector128.CreateScalar(7434824043319439063L);
        var vr13 = Avx512F.InsertVector128(vr11, vr12, 0);
        var vr14 = s_4 - vr9;
        var vr15 = Vector512.Create<long>(vr14);
        Vector512<long> vr16 = Avx512F.BlendVariable(vr10, vr13, vr15);
        s_rt.WriteLine(vr16);
    }
}

public interface IRuntime
{
    void WriteLine<T>(T value);
}

public class Runtime : IRuntime
{
    public void WriteLine<T>(T value) => System.Console.WriteLine(value);
}
// Generated by Fuzzlyn v2.7 on 2025-04-23 22:45:33
// Run on X64 Windows
// Seed: 1584331799433842652-vectort,vector128,vector256,vector512,x86aes,x86avx,x86avx2,x86avx512bw,x86avx512bwvl,x86avx512cd,x86avx512cdvl,x86avx512dq,x86avx512dqvl,x86avx512f,x86avx512fvl,x86avx512fx64,x86avx512vbmi,x86avx512vbmivl,x86bmi1,x86bmi1x64,x86bmi2,x86bmi2x64,x86fma,x86lzcnt,x86lzcntx64,x86pclmulqdq,x86popcnt,x86popcntx64,x86sse,x86ssex64,x86sse2,x86sse2x64,x86sse3,x86sse41,x86sse41x64,x86sse42,x86sse42x64,x86ssse3,x86x86base
// Reduced from 23.9 KiB to 0.8 KiB in 00:00:24
// Debug: Outputs <-9223372036854775808, 0, 0, 0, 0, 1, 0, 0>
// Release: Outputs <-9223372036854775808, 0, 0, 0, 0, 0, 0, 0>
using System;
using System.Numerics;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;

public class Program
{
    public static IRuntime s_rt;
    public static void Main()
    {
        s_rt = new Runtime();
        var vr7 = Vector512.CreateScalar(-9223372036854775808L);
        var vr8 = Vector512.Create<long>(1);
        var vr9 = Vector128.Create(0, 0);
        var vr10 = Avx512F.InsertVector128(vr8, vr9, 0);
        var vr11 = Vector512.Create(0, 0, 0, 0, 0, -1, 0, 0);
        Vector512<long> vr12 = Avx512F.BlendVariable(vr7, vr10, vr11);
        s_rt.WriteLine(vr12);
    }
}

public interface IRuntime
{
    void WriteLine<T>(T value);
}

public class Runtime : IRuntime
{
    public void WriteLine<T>(T value) => System.Console.WriteLine(value);
}
// Generated by Fuzzlyn v2.7 on 2025-04-23 22:45:58
// Run on X64 Windows
// Seed: 2041690579167985867-vectort,vector128,vector256,vector512,x86aes,x86avx,x86avx2,x86avx512bw,x86avx512bwvl,x86avx512cd,x86avx512cdvl,x86avx512dq,x86avx512dqvl,x86avx512f,x86avx512fvl,x86avx512fx64,x86avx512vbmi,x86avx512vbmivl,x86bmi1,x86bmi1x64,x86bmi2,x86bmi2x64,x86fma,x86lzcnt,x86lzcntx64,x86pclmulqdq,x86popcnt,x86popcntx64,x86sse,x86ssex64,x86sse2,x86sse2x64,x86sse3,x86sse41,x86sse41x64,x86sse42,x86sse42x64,x86ssse3,x86x86base
// Reduced from 41.2 KiB to 1.0 KiB in 00:00:31
// Debug: Outputs <0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>
// Release: Outputs <0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>
using System;
using System.Numerics;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;

public class C0
{
    public ushort F9;
    public C0(ushort f9)
    {
        F9 = f9;
    }
}

public class Program
{
    public static C0[] s_3 = new C0[]
    {
        new C0(1)
    };
    public static void Main()
    {
        var vr13 = s_3[0].F9;
        var vr14 = Vector512.Create<ushort>(vr13);
        var vr15 = (ushort)1;
        var vr16 = Vector512.CreateScalar(vr15);
        var vr17 = Vector512.Create<ushort>(0);
        var vr18 = Vector512.Create<ushort>(0);
        var vr19 = Avx512BW.CompareNotEqual(vr17, vr18);
        var vr20 = Vector128.Create<ushort>(1);
        var vr21 = Avx512F.InsertVector128(vr19, vr20, 0);
        var vr22 = (ushort)52046;
        var vr23 = Vector512.CreateScalar(vr22);
        var vr24 = Avx512BW.BlendVariable(vr16, vr21, vr23);
        var vr25 = Avx512BW.ShiftRightLogicalVariable(vr14, vr24);
        System.Console.WriteLine(vr25);
    }
}
// Generated by Fuzzlyn v2.7 on 2025-04-23 22:46:30
// Run on X64 Windows
// Seed: 2502091404831381095-vectort,vector128,vector256,vector512,x86aes,x86avx,x86avx2,x86avx512bw,x86avx512bwvl,x86avx512cd,x86avx512cdvl,x86avx512dq,x86avx512dqvl,x86avx512f,x86avx512fvl,x86avx512fx64,x86avx512vbmi,x86avx512vbmivl,x86bmi1,x86bmi1x64,x86bmi2,x86bmi2x64,x86fma,x86lzcnt,x86lzcntx64,x86pclmulqdq,x86popcnt,x86popcntx64,x86sse,x86ssex64,x86sse2,x86sse2x64,x86sse3,x86sse41,x86sse41x64,x86sse42,x86sse42x64,x86ssse3,x86x86base
// Reduced from 89.8 KiB to 0.8 KiB in 00:00:44
// Debug: Outputs <0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 226, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>
// Release: Outputs <0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>
using System;
using System.Numerics;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;

public class Program
{
    public static void Main()
    {
        Vector512<byte> vr7 = Vector512.Create(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 226, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0);
        var vr8 = Vector512.Create<byte>(0);
        byte vr15 = default(byte);
        var vr10 = Vector512.Create<byte>(vr15);
        var vr11 = Avx512BW.CompareNotEqual(vr8, vr10);
        var vr12 = (byte)1;
        var vr13 = Vector128.CreateScalar(vr12);
        var vr14 = Avx512F.InsertVector128(vr7, vr13, 0);
        vr7 = Avx512BW.BlendVariable(vr11, vr14, vr7);
        System.Console.WriteLine(vr7);
    }
}

@BruceForstall
Copy link
Contributor Author

I verified these are all dups of #114921

@github-actions github-actions bot locked and limited conversation to collaborators May 27, 2025
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
area-CodeGen-coreclr CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI avx512 Related to the AVX-512 architecture
Projects
None yet
Development

No branches or pull requests

1 participant