Skip to content

Commit 943db8f

Browse files
committed
[AArch64] Handle XAR with v1i64 operand types
When converting ROTR(XOR(a, b)) to XAR(a, b), or ROTR(a, a) to XAR(a, zero) we were not handling v1i64 types, meaning illegal copies get generated. This addresses that by generating insert_subreg and extract_subreg for v1i64 types to keep the values with the correct types. Fixes #141746
1 parent 3a42cbd commit 943db8f

File tree

2 files changed

+65
-2
lines changed

2 files changed

+65
-2
lines changed

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4637,15 +4637,39 @@ bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
46374637

46384638
if (!IsXOROperand) {
46394639
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4640-
SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4640+
SDNode *MOV =
4641+
CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
46414642
SDValue MOVIV = SDValue(MOV, 0);
46424643
R1 = N1->getOperand(0);
46434644
R2 = MOVIV;
46444645
}
46454646

4647+
// If the input is a v1i64, widen to a v2i64 to use XAR.
4648+
assert((VT == MVT::v1i64 || VT == MVT::v2i64) && "Unexpected XAR type!");
4649+
if (VT == MVT::v1i64) {
4650+
EVT SVT = MVT::v2i64;
4651+
SDValue Undef =
4652+
SDValue(CurDAG->getMachineNode(AArch64::IMPLICIT_DEF, DL, SVT), 0);
4653+
SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4654+
R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4655+
R1, DSub),
4656+
0);
4657+
if (R2.getValueType() == MVT::v1i64)
4658+
R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4659+
Undef, R2, DSub),
4660+
0);
4661+
}
4662+
46464663
SDValue Ops[] = {R1, R2, Imm};
4647-
CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
4664+
SDNode *XAR =
4665+
CurDAG->getMachineNode(AArch64::XAR, DL, R1.getValueType(), Ops);
46484666

4667+
if (VT == MVT::v1i64) {
4668+
SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4669+
XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4670+
SDValue(XAR, 0), DSub);
4671+
}
4672+
ReplaceNode(N, XAR);
46494673
return true;
46504674
}
46514675

llvm/test/CodeGen/AArch64/xar.ll

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,26 @@ define <2 x i64> @xar(<2 x i64> %x, <2 x i64> %y) {
1919
ret <2 x i64> %b
2020
}
2121

22+
define <1 x i64> @xar_v1i64(<1 x i64> %a, <1 x i64> %b) {
23+
; SHA3-LABEL: xar_v1i64:
24+
; SHA3: // %bb.0:
25+
; SHA3-NEXT: // kill: def $d0 killed $d0 def $q0
26+
; SHA3-NEXT: // kill: def $d1 killed $d1 def $q1
27+
; SHA3-NEXT: xar v0.2d, v0.2d, v1.2d, #63
28+
; SHA3-NEXT: // kill: def $d0 killed $d0 killed $q0
29+
; SHA3-NEXT: ret
30+
;
31+
; NOSHA3-LABEL: xar_v1i64:
32+
; NOSHA3: // %bb.0:
33+
; NOSHA3-NEXT: eor v1.8b, v0.8b, v1.8b
34+
; NOSHA3-NEXT: shl d0, d1, #1
35+
; NOSHA3-NEXT: usra d0, d1, #63
36+
; NOSHA3-NEXT: ret
37+
%v.val = xor <1 x i64> %a, %b
38+
%fshl = tail call <1 x i64> @llvm.fshl.v1i64(<1 x i64> %v.val, <1 x i64> %v.val, <1 x i64> splat (i64 1))
39+
ret <1 x i64> %fshl
40+
}
41+
2242
define <2 x i64> @xar_instead_of_or1(<2 x i64> %r) {
2343
; SHA3-LABEL: xar_instead_of_or1:
2444
; SHA3: // %bb.0: // %entry
@@ -37,6 +57,25 @@ entry:
3757
ret <2 x i64> %or
3858
}
3959

60+
define <1 x i64> @xar_instead_of_or_v1i64(<1 x i64> %v.val) {
61+
; SHA3-LABEL: xar_instead_of_or_v1i64:
62+
; SHA3: // %bb.0:
63+
; SHA3-NEXT: movi v1.2d, #0000000000000000
64+
; SHA3-NEXT: // kill: def $d0 killed $d0 def $q0
65+
; SHA3-NEXT: xar v0.2d, v0.2d, v1.2d, #63
66+
; SHA3-NEXT: // kill: def $d0 killed $d0 killed $q0
67+
; SHA3-NEXT: ret
68+
;
69+
; NOSHA3-LABEL: xar_instead_of_or_v1i64:
70+
; NOSHA3: // %bb.0:
71+
; NOSHA3-NEXT: shl d1, d0, #1
72+
; NOSHA3-NEXT: usra d1, d0, #63
73+
; NOSHA3-NEXT: fmov d0, d1
74+
; NOSHA3-NEXT: ret
75+
%fshl = tail call <1 x i64> @llvm.fshl.v1i64(<1 x i64> %v.val, <1 x i64> %v.val, <1 x i64> splat (i64 1))
76+
ret <1 x i64> %fshl
77+
}
78+
4079
define <4 x i32> @xar_instead_of_or2(<4 x i32> %r) {
4180
; SHA3-LABEL: xar_instead_of_or2:
4281
; SHA3: // %bb.0: // %entry

0 commit comments

Comments
 (0)