Skip to content

Commit e72f9bd

Browse files
committed
Special-case vinserti* when considering embedded mask optimization
The instructions `vinserti32x4`, `vinserti32x8`, `vinserti64x2` all have specific mask sizes assumed, which don't match the simd base type size of all intrinsics which generate them. So, special case these when consider embedded masking. Fixes #114921
1 parent 9df77ee commit e72f9bd

File tree

3 files changed

+134
-4
lines changed

3 files changed

+134
-4
lines changed

src/coreclr/jit/lowerxarch.cpp

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10628,14 +10628,53 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
1062810628
// TODO-AVX512-CQ: Ensure we can support embedded operations on RMW intrinsics
1062910629
isEmbeddedMask = false;
1063010630
}
10631+
else
10632+
{
10633+
uint32_t maskSize = genTypeSize(simdBaseType);
10634+
uint32_t operSize = genTypeSize(op2->AsHWIntrinsic()->GetSimdBaseType());
10635+
10636+
if (maskSize != operSize)
10637+
{
10638+
isEmbeddedMask = false;
10639+
}
10640+
else
10641+
{
10642+
// Special cases:
10643+
// 1. vinserti128 => vinserti32x4 assumes a mask size of 512/32 = 16 bits
10644+
// (type int)
10645+
// 2. vinserti32x8 assumes a mask size of 512/32 = 16 bits (type int)
10646+
// 3. vinserti64x2 assumes a mask size of 512/64 = 8 bits (type long)
10647+
10648+
NamedIntrinsic op2IntrinsicId = op2->AsHWIntrinsic()->GetHWIntrinsicId();
10649+
var_types op2SimdBaseType = op2->AsHWIntrinsic()->GetSimdBaseType();
10650+
instruction ins =
10651+
HWIntrinsicInfo::lookupIns(op2IntrinsicId, op2SimdBaseType);
10652+
assert(ins != INS_invalid);
10653+
switch (ins)
10654+
{
10655+
case INS_vinserti128:
10656+
case INS_vinserti32x8:
10657+
if (operSize != 4)
10658+
{
10659+
isEmbeddedMask = false;
10660+
}
10661+
break;
10662+
case INS_vinserti64x2:
10663+
if (operSize != 8)
10664+
{
10665+
isEmbeddedMask = false;
10666+
}
10667+
break;
10668+
default:
10669+
break;
10670+
}
10671+
}
10672+
}
1063110673
}
1063210674

1063310675
if (isEmbeddedMask)
1063410676
{
10635-
uint32_t maskSize = genTypeSize(simdBaseType);
10636-
uint32_t operSize = genTypeSize(op2->AsHWIntrinsic()->GetSimdBaseType());
10637-
10638-
if ((maskSize == operSize) && IsInvariantInRange(op2, node))
10677+
if (IsInvariantInRange(op2, node))
1063910678
{
1064010679
MakeSrcContained(node, op2);
1064110680
op2->MakeEmbMaskOp();
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
//
4+
// Generated by Fuzzlyn v2.5 on 2025-04-22 17:32:36
5+
// Run on X64 Windows
6+
// Seed: 7915602115310323123-vectort,vector128,vector256,vector512,x86aes,x86avx,x86avx2,x86avx512bw,x86avx512bwvl,x86avx512cd,x86avx512cdvl,x86avx512dq,x86avx512dqvl,x86avx512f,x86avx512fvl,x86avx512fx64,x86avx512vbmi,x86avx512vbmivl,x86bmi1,x86bmi1x64,x86bmi2,x86bmi2x64,x86fma,x86lzcnt,x86lzcntx64,x86pclmulqdq,x86popcnt,x86popcntx64,x86sse,x86ssex64,x86sse2,x86sse2x64,x86sse3,x86sse41,x86sse41x64,x86sse42,x86sse42x64,x86ssse3,x86x86base
7+
// Reduced from 123.1 KiB to 0.5 KiB in 00:00:46
8+
// Debug: Outputs <0, 0, 0, 0, 0, 0, 0, 0>
9+
// Release: Outputs <0, 0, 0, 0, -1, -1, -1, -1>
10+
11+
using System;
12+
using System.Numerics;
13+
using System.Runtime.Intrinsics;
14+
using System.Runtime.Intrinsics.X86;
15+
using Xunit;
16+
17+
public class Runtime_114921
18+
{
19+
public static Vector512<long> s_4 = Vector512.Create<long>(-1);
20+
public static Vector128<long> s_8;
21+
22+
[Fact]
23+
public static void Problem1()
24+
{
25+
if (Avx512F.IsSupported)
26+
{
27+
var vr1 = Vector512.Create<long>(0);
28+
s_4 = Avx512F.BlendVariable(s_4, Avx512F.InsertVector128(vr1, s_8, 0), s_4);
29+
System.Console.WriteLine(s_4);
30+
Assert.Equal(Vector512.Create(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), s_4);
31+
}
32+
}
33+
}
34+
35+
// Generated by Fuzzlyn v2.5 on 2025-04-22 17:37:13
36+
// Run on X64 Windows
37+
// Seed: 14731447107126414231-vectort,vector128,vector256,vector512,x86aes,x86avx,x86avx2,x86avx512bw,x86avx512bwvl,x86avx512cd,x86avx512cdvl,x86avx512dq,x86avx512dqvl,x86avx512f,x86avx512fvl,x86avx512fx64,x86avx512vbmi,x86avx512vbmivl,x86bmi1,x86bmi1x64,x86bmi2,x86bmi2x64,x86fma,x86lzcnt,x86lzcntx64,x86pclmulqdq,x86popcnt,x86popcntx64,x86sse,x86ssex64,x86sse2,x86sse2x64,x86sse3,x86sse41,x86sse41x64,x86sse42,x86sse42x64,x86ssse3,x86x86base
38+
// Reduced from 217.7 KiB to 1.0 KiB in 00:02:50
39+
// Debug: Outputs <9223372036854775807, 0, 0, 0, 0, 0, 0, 0>
40+
// Release: Outputs <4294967295, 0, 0, 0, 0, 0, 0, 0>
41+
42+
public struct S2
43+
{
44+
public Vector128<long> F0;
45+
public S2(Vector128<long> f0) : this()
46+
{
47+
F0 = f0;
48+
}
49+
}
50+
51+
public class Runtime_114921_2
52+
{
53+
public static IRuntime s_rt;
54+
55+
[Fact]
56+
public static void Problem2()
57+
{
58+
if (Avx512F.IsSupported)
59+
{
60+
s_rt = new Runtime();
61+
long vr6 = default(long);
62+
S2 vr7 = new S2(Vector128.CreateScalar(9223372036854775807L));
63+
Vector512<long> vr14 = default(Vector512<long>);
64+
var vr9 = Vector512.Create<long>(vr6);
65+
var vr10 = vr7.F0;
66+
var vr11 = Avx512F.InsertVector128(vr9, vr10, 0);
67+
var vr12 = Vector512.CreateScalar(-9223372036854775808L);
68+
var vr13 = Avx512F.BlendVariable(vr14, vr11, vr12);
69+
s_rt.WriteLine(vr13);
70+
Assert.Equal(Vector512.Create(9223372036854775807L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), vr13);
71+
}
72+
}
73+
}
74+
75+
public interface IRuntime
76+
{
77+
void WriteLine<T>(T value);
78+
}
79+
80+
public class Runtime : IRuntime
81+
{
82+
public void WriteLine<T>(T value) => System.Console.WriteLine(value);
83+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
<PropertyGroup>
3+
<Optimize>True</Optimize>
4+
</PropertyGroup>
5+
<ItemGroup>
6+
<Compile Include="$(MSBuildProjectName).cs" />
7+
</ItemGroup>
8+
</Project>

0 commit comments

Comments
 (0)