From 2ea27a1b4500aa975ccb4dfe1e28fe6367c8dd2a Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 17 Aug 2023 11:55:38 +0200 Subject: [PATCH] [X86] Fix i128 argument passing under SysV ABI The x86_64 SysV ABI specifies that __int128 is passed either in two registers (if available) or in a 16 byte aligned stack slot. GCC implements this behavior. However, if only one free register is available, LLVM will instead pass one half of the i128 in a register, and the other on the stack. Make sure that either both are passed in registers or both on the stack. Fixes https://github.com/llvm/llvm-project/issues/41784. The patch is basically what craig.topper proposed to do there. Differential Revision: https://reviews.llvm.org/D158169 --- clang/docs/ReleaseNotes.rst | 2 ++ llvm/lib/Target/X86/X86CallingConv.td | 8 ++++++ llvm/test/CodeGen/X86/addcarry.ll | 2 +- llvm/test/CodeGen/X86/i128-abi.ll | 19 ++++++++------ llvm/test/CodeGen/X86/sadd_sat_vec.ll | 36 +++++++++++++-------------- llvm/test/CodeGen/X86/ssub_sat_vec.ll | 36 +++++++++++++-------------- llvm/test/CodeGen/X86/subcarry.ll | 2 +- llvm/test/CodeGen/X86/uadd_sat_vec.ll | 8 +++--- llvm/test/CodeGen/X86/usub_sat_vec.ll | 8 +++--- 9 files changed, 68 insertions(+), 53 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 94418a08671d7..57f945f266821 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -46,6 +46,8 @@ C++ Specific Potentially Breaking Changes ABI Changes in This Version --------------------------- +- Following the SystemV ABI for x86-64, ``__int128`` arguments will no longer + be split between a register and a stack slot. What's New in Clang |release|? ============================== diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td index 3ce59dc4aa61b..19a295cd10962 100644 --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -572,6 +572,14 @@ def CC_X86_64_C : CallingConv<[ // The first 6 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>, + + // i128 can be either passed in two i64 registers, or on the stack, but + // not split across register and stack. As such, do not allow using R9 + // for a split i64. + CCIfType<[i64], + CCIfSplit>>, + CCIfType<[i64], CCIfSplit>>, + CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>, // The first 8 MMX vector arguments are passed in XMM registers on Darwin. diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll index af8f921ef9773..231645f641591 100644 --- a/llvm/test/CodeGen/X86/addcarry.ll +++ b/llvm/test/CodeGen/X86/addcarry.ll @@ -48,7 +48,7 @@ define i256 @add256(i256 %a, i256 %b) nounwind { ; CHECK-LABEL: add256: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: addq %r9, %rsi +; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rsi ; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rdx ; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %r8 diff --git a/llvm/test/CodeGen/X86/i128-abi.ll b/llvm/test/CodeGen/X86/i128-abi.ll index 1d1df9d592d72..d1d6f86e08fb8 100644 --- a/llvm/test/CodeGen/X86/i128-abi.ll +++ b/llvm/test/CodeGen/X86/i128-abi.ll @@ -13,8 +13,8 @@ define i128 @in_reg(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i128 %a4) { define i128 @on_stack(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5) { ; CHECK-LABEL: on_stack: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %r9, %rax -; CHECK-NEXT: movq 8(%rsp), %rdx +; CHECK-NEXT: movq 8(%rsp), %rax +; CHECK-NEXT: movq 16(%rsp), %rdx ; CHECK-NEXT: retq ret i128 %a5 } @@ -22,7 +22,7 @@ define i128 @on_stack(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5) { define i64 @trailing_arg_on_stack(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5, i64 %a6) { ; CHECK-LABEL: trailing_arg_on_stack: ; CHECK: # %bb.0: -; CHECK-NEXT: movq 16(%rsp), %rax +; CHECK-NEXT: movq 24(%rsp), %rax ; CHECK-NEXT: retq ret i64 %a6 } @@ -48,14 +48,17 @@ define void @call_on_stack(i128 %x) nounwind { ; CHECK-LABEL: call_on_stack: ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: movq %rdi, %r9 -; CHECK-NEXT: movq %rsi, (%rsp) ; CHECK-NEXT: movl $1, %esi ; CHECK-NEXT: movl $2, %edx ; CHECK-NEXT: movl $3, %ecx ; CHECK-NEXT: movl $4, %r8d ; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: pushq %r9 ; CHECK-NEXT: callq on_stack@PLT +; CHECK-NEXT: addq $16, %rsp ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq call i128 @on_stack(i64 0, i64 1, i64 2, i64 3, i64 4, i128 %x) @@ -67,17 +70,19 @@ define void @call_trailing_arg_on_stack(i128 %x, i64 %y) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: movq %rdx, %rax -; CHECK-NEXT: movq %rsi, %r10 -; CHECK-NEXT: movq %rdi, %r9 +; CHECK-NEXT: movq %rsi, %r9 +; CHECK-NEXT: movq %rdi, %r10 +; CHECK-NEXT: subq $8, %rsp ; CHECK-NEXT: movl $1, %esi ; CHECK-NEXT: movl $2, %edx ; CHECK-NEXT: movl $3, %ecx ; CHECK-NEXT: movl $4, %r8d ; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: pushq %rax +; CHECK-NEXT: pushq %r9 ; CHECK-NEXT: pushq %r10 ; CHECK-NEXT: callq trailing_arg_on_stack@PLT -; CHECK-NEXT: addq $16, %rsp +; CHECK-NEXT: addq $32, %rsp ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq call i128 @trailing_arg_on_stack(i64 0, i64 1, i64 2, i64 3, i64 4, i128 %x, i64 %y) diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll index 8d914ba81a096..45a8a6fd5449a 100644 --- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll @@ -1795,27 +1795,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rcx ; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %r8 ; SSE-NEXT: seto %dil -; SSE-NEXT: movq %r8, %r10 -; SSE-NEXT: sarq $63, %r10 +; SSE-NEXT: movq %r8, %r9 +; SSE-NEXT: sarq $63, %r9 ; SSE-NEXT: testb %dil, %dil -; SSE-NEXT: cmovneq %r10, %rcx -; SSE-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000 -; SSE-NEXT: xorq %r11, %r10 +; SSE-NEXT: cmovneq %r9, %rcx +; SSE-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000 +; SSE-NEXT: xorq %r10, %r9 ; SSE-NEXT: testb %dil, %dil -; SSE-NEXT: cmoveq %r8, %r10 -; SSE-NEXT: addq %r9, %rsi +; SSE-NEXT: cmoveq %r8, %r9 +; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rsi ; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %rdx ; SSE-NEXT: seto %dil ; SSE-NEXT: movq %rdx, %r8 ; SSE-NEXT: sarq $63, %r8 ; SSE-NEXT: testb %dil, %dil ; SSE-NEXT: cmovneq %r8, %rsi -; SSE-NEXT: xorq %r11, %r8 +; SSE-NEXT: xorq %r10, %r8 ; SSE-NEXT: testb %dil, %dil ; SSE-NEXT: cmoveq %rdx, %r8 ; SSE-NEXT: movq %rcx, 16(%rax) ; SSE-NEXT: movq %rsi, (%rax) -; SSE-NEXT: movq %r10, 24(%rax) +; SSE-NEXT: movq %r9, 24(%rax) ; SSE-NEXT: movq %r8, 8(%rax) ; SSE-NEXT: retq ; @@ -1825,27 +1825,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rcx ; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %r8 ; AVX-NEXT: seto %dil -; AVX-NEXT: movq %r8, %r10 -; AVX-NEXT: sarq $63, %r10 +; AVX-NEXT: movq %r8, %r9 +; AVX-NEXT: sarq $63, %r9 ; AVX-NEXT: testb %dil, %dil -; AVX-NEXT: cmovneq %r10, %rcx -; AVX-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000 -; AVX-NEXT: xorq %r11, %r10 +; AVX-NEXT: cmovneq %r9, %rcx +; AVX-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000 +; AVX-NEXT: xorq %r10, %r9 ; AVX-NEXT: testb %dil, %dil -; AVX-NEXT: cmoveq %r8, %r10 -; AVX-NEXT: addq %r9, %rsi +; AVX-NEXT: cmoveq %r8, %r9 +; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rsi ; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %rdx ; AVX-NEXT: seto %dil ; AVX-NEXT: movq %rdx, %r8 ; AVX-NEXT: sarq $63, %r8 ; AVX-NEXT: testb %dil, %dil ; AVX-NEXT: cmovneq %r8, %rsi -; AVX-NEXT: xorq %r11, %r8 +; AVX-NEXT: xorq %r10, %r8 ; AVX-NEXT: testb %dil, %dil ; AVX-NEXT: cmoveq %rdx, %r8 ; AVX-NEXT: movq %rcx, 16(%rax) ; AVX-NEXT: movq %rsi, (%rax) -; AVX-NEXT: movq %r10, 24(%rax) +; AVX-NEXT: movq %r9, 24(%rax) ; AVX-NEXT: movq %r8, 8(%rax) ; AVX-NEXT: retq %z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y) diff --git a/llvm/test/CodeGen/X86/ssub_sat_vec.ll b/llvm/test/CodeGen/X86/ssub_sat_vec.ll index 14f1985c60ff6..d99d5aaa87536 100644 --- a/llvm/test/CodeGen/X86/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/X86/ssub_sat_vec.ll @@ -2026,27 +2026,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rcx ; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %r8 ; SSE-NEXT: seto %dil -; SSE-NEXT: movq %r8, %r10 -; SSE-NEXT: sarq $63, %r10 +; SSE-NEXT: movq %r8, %r9 +; SSE-NEXT: sarq $63, %r9 ; SSE-NEXT: testb %dil, %dil -; SSE-NEXT: cmovneq %r10, %rcx -; SSE-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000 -; SSE-NEXT: xorq %r11, %r10 +; SSE-NEXT: cmovneq %r9, %rcx +; SSE-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000 +; SSE-NEXT: xorq %r10, %r9 ; SSE-NEXT: testb %dil, %dil -; SSE-NEXT: cmoveq %r8, %r10 -; SSE-NEXT: subq %r9, %rsi +; SSE-NEXT: cmoveq %r8, %r9 +; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rsi ; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx ; SSE-NEXT: seto %dil ; SSE-NEXT: movq %rdx, %r8 ; SSE-NEXT: sarq $63, %r8 ; SSE-NEXT: testb %dil, %dil ; SSE-NEXT: cmovneq %r8, %rsi -; SSE-NEXT: xorq %r11, %r8 +; SSE-NEXT: xorq %r10, %r8 ; SSE-NEXT: testb %dil, %dil ; SSE-NEXT: cmoveq %rdx, %r8 ; SSE-NEXT: movq %rcx, 16(%rax) ; SSE-NEXT: movq %rsi, (%rax) -; SSE-NEXT: movq %r10, 24(%rax) +; SSE-NEXT: movq %r9, 24(%rax) ; SSE-NEXT: movq %r8, 8(%rax) ; SSE-NEXT: retq ; @@ -2056,27 +2056,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rcx ; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %r8 ; AVX-NEXT: seto %dil -; AVX-NEXT: movq %r8, %r10 -; AVX-NEXT: sarq $63, %r10 +; AVX-NEXT: movq %r8, %r9 +; AVX-NEXT: sarq $63, %r9 ; AVX-NEXT: testb %dil, %dil -; AVX-NEXT: cmovneq %r10, %rcx -; AVX-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000 -; AVX-NEXT: xorq %r11, %r10 +; AVX-NEXT: cmovneq %r9, %rcx +; AVX-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000 +; AVX-NEXT: xorq %r10, %r9 ; AVX-NEXT: testb %dil, %dil -; AVX-NEXT: cmoveq %r8, %r10 -; AVX-NEXT: subq %r9, %rsi +; AVX-NEXT: cmoveq %r8, %r9 +; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rsi ; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx ; AVX-NEXT: seto %dil ; AVX-NEXT: movq %rdx, %r8 ; AVX-NEXT: sarq $63, %r8 ; AVX-NEXT: testb %dil, %dil ; AVX-NEXT: cmovneq %r8, %rsi -; AVX-NEXT: xorq %r11, %r8 +; AVX-NEXT: xorq %r10, %r8 ; AVX-NEXT: testb %dil, %dil ; AVX-NEXT: cmoveq %rdx, %r8 ; AVX-NEXT: movq %rcx, 16(%rax) ; AVX-NEXT: movq %rsi, (%rax) -; AVX-NEXT: movq %r10, 24(%rax) +; AVX-NEXT: movq %r9, 24(%rax) ; AVX-NEXT: movq %r8, 8(%rax) ; AVX-NEXT: retq %z = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %x, <2 x i128> %y) diff --git a/llvm/test/CodeGen/X86/subcarry.ll b/llvm/test/CodeGen/X86/subcarry.ll index 9538ea1061cd1..1e9db9f55a8d5 100644 --- a/llvm/test/CodeGen/X86/subcarry.ll +++ b/llvm/test/CodeGen/X86/subcarry.ll @@ -21,7 +21,7 @@ define i256 @sub256(i256 %a, i256 %b) nounwind { ; CHECK-LABEL: sub256: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: subq %r9, %rsi +; CHECK-NEXT: subq {{[0-9]+}}(%rsp), %rsi ; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx ; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %r8 diff --git a/llvm/test/CodeGen/X86/uadd_sat_vec.ll b/llvm/test/CodeGen/X86/uadd_sat_vec.ll index f97603ebea92b..50c73009314a9 100644 --- a/llvm/test/CodeGen/X86/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/X86/uadd_sat_vec.ll @@ -1161,11 +1161,11 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; SSE-LABEL: v2i128: ; SSE: # %bb.0: ; SSE-NEXT: movq %rdi, %rax -; SSE-NEXT: addq %r9, %rsi +; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rsi ; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %rdx ; SSE-NEXT: movq $-1, %rdi -; SSE-NEXT: cmovbq %rdi, %rsi ; SSE-NEXT: cmovbq %rdi, %rdx +; SSE-NEXT: cmovbq %rdi, %rsi ; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rcx ; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %r8 ; SSE-NEXT: cmovbq %rdi, %r8 @@ -1179,11 +1179,11 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; AVX-LABEL: v2i128: ; AVX: # %bb.0: ; AVX-NEXT: movq %rdi, %rax -; AVX-NEXT: addq %r9, %rsi +; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rsi ; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %rdx ; AVX-NEXT: movq $-1, %rdi -; AVX-NEXT: cmovbq %rdi, %rsi ; AVX-NEXT: cmovbq %rdi, %rdx +; AVX-NEXT: cmovbq %rdi, %rsi ; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rcx ; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %r8 ; AVX-NEXT: cmovbq %rdi, %r8 diff --git a/llvm/test/CodeGen/X86/usub_sat_vec.ll b/llvm/test/CodeGen/X86/usub_sat_vec.ll index a9cf02991d428..8823b98c4ff8a 100644 --- a/llvm/test/CodeGen/X86/usub_sat_vec.ll +++ b/llvm/test/CodeGen/X86/usub_sat_vec.ll @@ -1057,10 +1057,10 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; SSE: # %bb.0: ; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: xorl %edi, %edi -; SSE-NEXT: subq %r9, %rsi +; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rsi ; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx -; SSE-NEXT: cmovbq %rdi, %rsi ; SSE-NEXT: cmovbq %rdi, %rdx +; SSE-NEXT: cmovbq %rdi, %rsi ; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rcx ; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %r8 ; SSE-NEXT: cmovbq %rdi, %r8 @@ -1075,10 +1075,10 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; AVX: # %bb.0: ; AVX-NEXT: movq %rdi, %rax ; AVX-NEXT: xorl %edi, %edi -; AVX-NEXT: subq %r9, %rsi +; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rsi ; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx -; AVX-NEXT: cmovbq %rdi, %rsi ; AVX-NEXT: cmovbq %rdi, %rdx +; AVX-NEXT: cmovbq %rdi, %rsi ; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rcx ; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %r8 ; AVX-NEXT: cmovbq %rdi, %r8