Skip to content

Commit 32837f3

Browse files
authored
[AArch64] Handle XAR with v1i64 operand types (#141754)
When converting ROTR(XOR(a, b)) to XAR(a, b), or ROTR(a, a) to XAR(a, zero) we were not handling v1i64 types, meaning illegal copies get generated. This addresses that by generating insert_subreg and extract_subreg for v1i64 to keep the values with the correct types. Fixes #141746
1 parent ebe25d8 commit 32837f3

File tree

2 files changed

+64
-2
lines changed

2 files changed

+64
-2
lines changed

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4637,15 +4637,38 @@ bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
46374637

46384638
if (!IsXOROperand) {
46394639
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4640-
SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4640+
SDNode *MOV =
4641+
CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
46414642
SDValue MOVIV = SDValue(MOV, 0);
46424643
R1 = N1->getOperand(0);
46434644
R2 = MOVIV;
46444645
}
46454646

4647+
// If the input is a v1i64, widen to a v2i64 to use XAR.
4648+
assert((VT == MVT::v1i64 || VT == MVT::v2i64) && "Unexpected XAR type!");
4649+
if (VT == MVT::v1i64) {
4650+
EVT SVT = MVT::v2i64;
4651+
SDValue Undef =
4652+
SDValue(CurDAG->getMachineNode(AArch64::IMPLICIT_DEF, DL, SVT), 0);
4653+
SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4654+
R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4655+
R1, DSub),
4656+
0);
4657+
if (R2.getValueType() == MVT::v1i64)
4658+
R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4659+
Undef, R2, DSub),
4660+
0);
4661+
}
4662+
46464663
SDValue Ops[] = {R1, R2, Imm};
4647-
CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
4664+
SDNode *XAR = CurDAG->getMachineNode(AArch64::XAR, DL, MVT::v2i64, Ops);
46484665

4666+
if (VT == MVT::v1i64) {
4667+
SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4668+
XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4669+
SDValue(XAR, 0), DSub);
4670+
}
4671+
ReplaceNode(N, XAR);
46494672
return true;
46504673
}
46514674

llvm/test/CodeGen/AArch64/xar.ll

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,26 @@ define <2 x i64> @xar(<2 x i64> %x, <2 x i64> %y) {
1919
ret <2 x i64> %b
2020
}
2121

22+
define <1 x i64> @xar_v1i64(<1 x i64> %a, <1 x i64> %b) {
23+
; SHA3-LABEL: xar_v1i64:
24+
; SHA3: // %bb.0:
25+
; SHA3-NEXT: // kill: def $d0 killed $d0 def $q0
26+
; SHA3-NEXT: // kill: def $d1 killed $d1 def $q1
27+
; SHA3-NEXT: xar v0.2d, v0.2d, v1.2d, #63
28+
; SHA3-NEXT: // kill: def $d0 killed $d0 killed $q0
29+
; SHA3-NEXT: ret
30+
;
31+
; NOSHA3-LABEL: xar_v1i64:
32+
; NOSHA3: // %bb.0:
33+
; NOSHA3-NEXT: eor v1.8b, v0.8b, v1.8b
34+
; NOSHA3-NEXT: shl d0, d1, #1
35+
; NOSHA3-NEXT: usra d0, d1, #63
36+
; NOSHA3-NEXT: ret
37+
%v.val = xor <1 x i64> %a, %b
38+
%fshl = tail call <1 x i64> @llvm.fshl.v1i64(<1 x i64> %v.val, <1 x i64> %v.val, <1 x i64> splat (i64 1))
39+
ret <1 x i64> %fshl
40+
}
41+
2242
define <2 x i64> @xar_instead_of_or1(<2 x i64> %r) {
2343
; SHA3-LABEL: xar_instead_of_or1:
2444
; SHA3: // %bb.0: // %entry
@@ -37,6 +57,25 @@ entry:
3757
ret <2 x i64> %or
3858
}
3959

60+
define <1 x i64> @xar_instead_of_or_v1i64(<1 x i64> %v.val) {
61+
; SHA3-LABEL: xar_instead_of_or_v1i64:
62+
; SHA3: // %bb.0:
63+
; SHA3-NEXT: movi v1.2d, #0000000000000000
64+
; SHA3-NEXT: // kill: def $d0 killed $d0 def $q0
65+
; SHA3-NEXT: xar v0.2d, v0.2d, v1.2d, #63
66+
; SHA3-NEXT: // kill: def $d0 killed $d0 killed $q0
67+
; SHA3-NEXT: ret
68+
;
69+
; NOSHA3-LABEL: xar_instead_of_or_v1i64:
70+
; NOSHA3: // %bb.0:
71+
; NOSHA3-NEXT: shl d1, d0, #1
72+
; NOSHA3-NEXT: usra d1, d0, #63
73+
; NOSHA3-NEXT: fmov d0, d1
74+
; NOSHA3-NEXT: ret
75+
%fshl = tail call <1 x i64> @llvm.fshl.v1i64(<1 x i64> %v.val, <1 x i64> %v.val, <1 x i64> splat (i64 1))
76+
ret <1 x i64> %fshl
77+
}
78+
4079
define <4 x i32> @xar_instead_of_or2(<4 x i32> %r) {
4180
; SHA3-LABEL: xar_instead_of_or2:
4281
; SHA3: // %bb.0: // %entry

0 commit comments

Comments
 (0)