Skip to content

Commit

Permalink
[AArch64][SME] Fix generating incorrect TBZ when lowering lazy save. (l…
Browse files Browse the repository at this point in the history
…lvm#68429)

After calling arm_sme_state, the -S assembly would show clang generating
a “tbz xN, #0, Lbb”. However, disassembling it showed that it was
actually encoded as “tbz xN, #32, Lbb”. The issue is that for TBZ, if
you want a bit offset <32 you need to use the W variant, since the
instruction overloads the top bit of the immediate.
  • Loading branch information
aemerson committed Oct 6, 2023
1 parent 7050ff4 commit f045f2c
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 25 deletions.
6 changes: 4 additions & 2 deletions llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1005,10 +1005,12 @@ AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
// expected value for the callee (0 for a normal callee and 1 for a streaming
// callee).
auto PStateSM = MI.getOperand(2).getReg();
auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32);
bool IsStreamingCallee = MI.getOperand(3).getImm();
unsigned Opc = IsStreamingCallee ? AArch64::TBZX : AArch64::TBNZX;
unsigned Opc = IsStreamingCallee ? AArch64::TBZW : AArch64::TBNZW;
MachineInstrBuilder Tbx =
BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(PStateSM).addImm(0);
BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0);

// Split MBB and create two new blocks:
// - MBB now contains all instructions before MSRcond_pstatesvcrImm1.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ define void @streaming_compatible() #0 {
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB0_2
; CHECK-NEXT: tbz w19, #0, .LBB0_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: bl non_streaming
; CHECK-NEXT: tbz x19, #0, .LBB0_4
; CHECK-NEXT: tbz w19, #0, .LBB0_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB0_4:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -413,14 +413,14 @@ define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compati
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: bl __arm_sme_state
; CHECK-COMMON-NEXT: and x19, x0, #0x1
; CHECK-COMMON-NEXT: tbz x19, #0, .LBB12_2
; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_2
; CHECK-COMMON-NEXT: // %bb.1:
; CHECK-COMMON-NEXT: smstop sm
; CHECK-COMMON-NEXT: .LBB12_2:
; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: bl fmodf
; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-COMMON-NEXT: tbz x19, #0, .LBB12_4
; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_4
; CHECK-COMMON-NEXT: // %bb.3:
; CHECK-COMMON-NEXT: smstart sm
; CHECK-COMMON-NEXT: .LBB12_4:
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,12 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_pstate_z
; CHECK-NEXT: msr TPIDR2_EL0, x9
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB3_2
; CHECK-NEXT: tbz w19, #0, .LBB3_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB3_2:
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: tbz x19, #0, .LBB3_4
; CHECK-NEXT: tbz w19, #0, .LBB3_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB3_4:
Expand Down Expand Up @@ -187,12 +187,12 @@ define void @za_shared_caller_za_preserved_callee() nounwind "aarch64_pstate_za_
; CHECK-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB4_2
; CHECK-NEXT: tbz w19, #0, .LBB4_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB4_2:
; CHECK-NEXT: bl private_za_preserved_callee
; CHECK-NEXT: tbz x19, #0, .LBB4_4
; CHECK-NEXT: tbz w19, #0, .LBB4_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB4_4:
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ define void @streaming_compatible_caller_normal_callee() "aarch64_pstate_sm_comp
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB1_2
; CHECK-NEXT: tbz w19, #0, .LBB1_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: bl normal_callee
; CHECK-NEXT: tbz x19, #0, .LBB1_4
; CHECK-NEXT: tbz w19, #0, .LBB1_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB1_4:
Expand Down Expand Up @@ -79,12 +79,12 @@ define void @streaming_compatible_caller_streaming_callee() "aarch64_pstate_sm_c
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbnz x19, #0, .LBB2_2
; CHECK-NEXT: tbnz w19, #0, .LBB2_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB2_2:
; CHECK-NEXT: bl streaming_callee
; CHECK-NEXT: tbnz x19, #0, .LBB2_4
; CHECK-NEXT: tbnz w19, #0, .LBB2_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB2_4:
Expand Down Expand Up @@ -134,7 +134,7 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "
; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB4_2
; CHECK-NEXT: tbz w19, #0, .LBB4_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB4_2:
Expand All @@ -143,7 +143,7 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "
; CHECK-NEXT: bl normal_callee_vec_arg
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: tbz x19, #0, .LBB4_4
; CHECK-NEXT: tbz w19, #0, .LBB4_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB4_4:
Expand Down Expand Up @@ -204,14 +204,14 @@ define <vscale x 2 x double> @streaming_compatible_with_scalable_vectors(<vscale
; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB5_2
; CHECK-NEXT: tbz w19, #0, .LBB5_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB5_2:
; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: bl normal_callee_scalable_vec_arg
; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: tbz x19, #0, .LBB5_4
; CHECK-NEXT: tbz w19, #0, .LBB5_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB5_4:
Expand Down Expand Up @@ -296,14 +296,14 @@ define <vscale x 2 x i1> @streaming_compatible_with_predicate_vectors(<vscale x
; CHECK-NEXT: str p0, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB6_2
; CHECK-NEXT: tbz w19, #0, .LBB6_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB6_2:
; CHECK-NEXT: ldr p0, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: bl normal_callee_predicate_vec_arg
; CHECK-NEXT: str p0, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: tbz x19, #0, .LBB6_4
; CHECK-NEXT: tbz w19, #0, .LBB6_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB6_4:
Expand Down Expand Up @@ -360,7 +360,7 @@ define i32 @conditional_smstart_unreachable_block() "aarch64_pstate_sm_compatibl
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbnz x19, #0, .LBB7_2
; CHECK-NEXT: tbnz w19, #0, .LBB7_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB7_2:
Expand All @@ -381,12 +381,12 @@ define void @conditional_smstart_no_successor_block(i1 %p) "aarch64_pstate_sm_co
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbnz x19, #0, .LBB8_3
; CHECK-NEXT: tbnz w19, #0, .LBB8_3
; CHECK-NEXT: // %bb.2: // %if.then
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB8_3: // %if.then
; CHECK-NEXT: bl streaming_callee
; CHECK-NEXT: tbnz x19, #0, .LBB8_5
; CHECK-NEXT: tbnz w19, #0, .LBB8_5
; CHECK-NEXT: // %bb.4: // %if.then
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB8_5: // %if.then
Expand Down Expand Up @@ -417,12 +417,12 @@ define void @disable_tailcallopt() "aarch64_pstate_sm_compatible" nounwind {
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB9_2
; CHECK-NEXT: tbz w19, #0, .LBB9_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB9_2:
; CHECK-NEXT: bl normal_callee
; CHECK-NEXT: tbz x19, #0, .LBB9_4
; CHECK-NEXT: tbz w19, #0, .LBB9_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB9_4:
Expand Down

0 comments on commit f045f2c

Please sign in to comment.