Skip to content

Commit 2e44b07

Browse files
authored
[X86] Do not directly fold for VINSERTPS (llvm#65718)
We have already customized folding for VINSERTPS by 7e6606f, which do the folding when alignment >= 4 bytes. We cannot arbitrarily fold it like others because we need to calculate the source offset.
1 parent 9426416 commit 2e44b07

File tree

3 files changed

+6
-4
lines changed

3 files changed

+6
-4
lines changed

llvm/test/CodeGen/X86/avx.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,12 +184,14 @@ define <4 x float> @nofold_insertps(ptr %a, <4 x float> %b) {
184184
; X86-LABEL: nofold_insertps:
185185
; X86: ## %bb.0:
186186
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
187-
; X86-NEXT: vinsertps $176, (%eax), %xmm0, %xmm0 ## xmm0 = xmm0[0,1,2],mem[2]
187+
; X86-NEXT: vmovups (%eax), %xmm1
188+
; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[2]
188189
; X86-NEXT: retl
189190
;
190191
; X64-LABEL: nofold_insertps:
191192
; X64: ## %bb.0:
192-
; X64-NEXT: vinsertps $176, (%rdi), %xmm0, %xmm0 ## xmm0 = xmm0[0,1,2],mem[2]
193+
; X64-NEXT: vmovups (%rdi), %xmm1
194+
; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[2]
193195
; X64-NEXT: retq
194196
%1 = load <4 x float>, ptr %a, align 1
195197
%2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 6>

llvm/test/TableGen/x86-fold-tables.inc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2105,8 +2105,6 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
21052105
{X86::VINSERTI64x2Z256rr, X86::VINSERTI64x2Z256rm, 0},
21062106
{X86::VINSERTI64x2Zrr, X86::VINSERTI64x2Zrm, 0},
21072107
{X86::VINSERTI64x4Zrr, X86::VINSERTI64x4Zrm, 0},
2108-
{X86::VINSERTPSZrr, X86::VINSERTPSZrm, TB_NO_REVERSE},
2109-
{X86::VINSERTPSrr, X86::VINSERTPSrm, TB_NO_REVERSE},
21102108
{X86::VMAXCPDYrr, X86::VMAXCPDYrm, 0},
21112109
{X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rm, 0},
21122110
{X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rm, 0},

llvm/utils/TableGen/X86ManualFoldTables.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,8 @@ NOFOLD(MMX_MOVQ64rr_REV)
225225
// =>
226226
// insertpsrm xmm1, m32, imm
227227
NOFOLD(INSERTPSrr)
228+
NOFOLD(VINSERTPSZrr)
229+
NOFOLD(VINSERTPSrr)
228230
#undef NOFOLD
229231

230232
#ifndef ENTRY

0 commit comments

Comments
 (0)