Skip to content

Commit d5684f7

Browse files
committed
GlobalISel: Allow bitcount ops to have different result type
For AMDGPU the result is always 32-bit for 64-bit inputs. llvm-svn: 352717
1 parent 8db2001 commit d5684f7

File tree

12 files changed

+551
-56
lines changed

12 files changed

+551
-56
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,12 @@ class LegalizerHelper {
114114
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx = 0,
115115
unsigned TruncOpcode = TargetOpcode::G_TRUNC);
116116

117+
// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
118+
// Def by truncating the operand's type to \p NarrowTy, replacing in place and
119+
// extending back with \p ExtOpcode.
120+
void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx,
121+
unsigned ExtOpcode);
122+
117123
/// Helper function to split a wide generic register into bitwise blocks with
118124
/// the given Type (which implies the number of blocks needed). The generic
119125
/// registers created are appended to Ops, starting at bit 0 of Reg.

llvm/include/llvm/Target/GenericOpcodes.td

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -123,31 +123,31 @@ def G_VAARG : GenericInstruction {
123123

124124
def G_CTLZ : GenericInstruction {
125125
let OutOperandList = (outs type0:$dst);
126-
let InOperandList = (ins type0:$src);
126+
let InOperandList = (ins type1:$src);
127127
let hasSideEffects = 0;
128128
}
129129

130130
def G_CTLZ_ZERO_UNDEF : GenericInstruction {
131131
let OutOperandList = (outs type0:$dst);
132-
let InOperandList = (ins type0:$src);
132+
let InOperandList = (ins type1:$src);
133133
let hasSideEffects = 0;
134134
}
135135

136136
def G_CTTZ : GenericInstruction {
137137
let OutOperandList = (outs type0:$dst);
138-
let InOperandList = (ins type0:$src);
138+
let InOperandList = (ins type1:$src);
139139
let hasSideEffects = 0;
140140
}
141141

142142
def G_CTTZ_ZERO_UNDEF : GenericInstruction {
143143
let OutOperandList = (outs type0:$dst);
144-
let InOperandList = (ins type0:$src);
144+
let InOperandList = (ins type1:$src);
145145
let hasSideEffects = 0;
146146
}
147147

148148
def G_CTPOP : GenericInstruction {
149149
let OutOperandList = (outs type0:$dst);
150-
let InOperandList = (ins type0:$src);
150+
let InOperandList = (ins type1:$src);
151151
let hasSideEffects = 0;
152152
}
153153

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -786,6 +786,18 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
786786
Observer.changedInstr(MI);
787787
return Legalized;
788788
}
789+
case TargetOpcode::G_CTLZ:
790+
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
791+
case TargetOpcode::G_CTTZ:
792+
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
793+
case TargetOpcode::G_CTPOP:
794+
if (TypeIdx != 0)
795+
return UnableToLegalize; // TODO
796+
797+
Observer.changingInstr(MI);
798+
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
799+
Observer.changedInstr(MI);
800+
return Legalized;
789801
}
790802
}
791803

@@ -813,6 +825,15 @@ void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
813825
MO.setReg(DstExt);
814826
}
815827

828+
void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
829+
unsigned OpIdx, unsigned ExtOpcode) {
830+
MachineOperand &MO = MI.getOperand(OpIdx);
831+
unsigned DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
832+
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
833+
MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc});
834+
MO.setReg(DstTrunc);
835+
}
836+
816837
LegalizerHelper::LegalizeResult
817838
LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
818839
MIRBuilder.setInstr(MI);
@@ -890,6 +911,11 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
890911
case TargetOpcode::G_CTLZ:
891912
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
892913
case TargetOpcode::G_CTPOP: {
914+
if (TypeIdx == 0) {
915+
widenScalarDst(MI, WideTy, 0);
916+
return Legalized;
917+
}
918+
893919
// First ZEXT the input.
894920
auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg());
895921
LLT CurTy = MRI.getType(MI.getOperand(0).getReg());
@@ -1943,7 +1969,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
19431969
case TargetOpcode::G_CTLZ: {
19441970
unsigned SrcReg = MI.getOperand(1).getReg();
19451971
unsigned Len = Ty.getSizeInBits();
1946-
if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) {
1972+
if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
19471973
// If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
19481974
auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
19491975
{Ty}, {SrcReg});
@@ -1993,7 +2019,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
19932019
case TargetOpcode::G_CTTZ: {
19942020
unsigned SrcReg = MI.getOperand(1).getReg();
19952021
unsigned Len = Ty.getSizeInBits();
1996-
if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) {
2022+
if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
19972023
// If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
19982024
// zero.
19992025
auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
@@ -2018,8 +2044,8 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
20182044
TargetOpcode::G_AND, {Ty},
20192045
{MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
20202046
{SrcReg, MIBCstNeg1})});
2021-
if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) &&
2022-
isSupported({TargetOpcode::G_CTLZ, {Ty}})) {
2047+
if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
2048+
isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
20232049
auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
20242050
MIRBuilder.buildInstr(
20252051
TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -211,12 +211,17 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
211211
.legalFor({S32})
212212
.scalarize(0);
213213

214-
setAction({G_CTLZ, S32}, Legal);
215-
setAction({G_CTLZ_ZERO_UNDEF, S32}, Legal);
216-
setAction({G_CTTZ, S32}, Legal);
217-
setAction({G_CTTZ_ZERO_UNDEF, S32}, Legal);
214+
// The 64-bit versions produce 32-bit results, but only on the SALU.
215+
getActionDefinitionsBuilder({G_CTLZ, G_CTLZ_ZERO_UNDEF,
216+
G_CTTZ, G_CTTZ_ZERO_UNDEF,
217+
G_CTPOP})
218+
.legalFor({{S32, S32}, {S32, S64}})
219+
.clampScalar(0, S32, S32)
220+
.clampScalar(1, S32, S64);
221+
// TODO: Scalarize
222+
223+
218224
setAction({G_BSWAP, S32}, Legal);
219-
setAction({G_CTPOP, S32}, Legal);
220225

221226
getActionDefinitionsBuilder(G_INTTOPTR)
222227
.legalIf([](const LegalityQuery &Query) {

llvm/lib/Target/ARM/ARMLegalizerInfo.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,17 +143,21 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
143143

144144
if (ST.hasV5TOps()) {
145145
getActionDefinitionsBuilder(G_CTLZ)
146-
.legalFor({s32})
146+
.legalFor({s32, s32})
147+
.clampScalar(1, s32, s32)
147148
.clampScalar(0, s32, s32);
148149
getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
149-
.lowerFor({s32})
150+
.lowerFor({s32, s32})
151+
.clampScalar(1, s32, s32)
150152
.clampScalar(0, s32, s32);
151153
} else {
152154
getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
153-
.libcallFor({s32})
155+
.libcallFor({s32, s32})
156+
.clampScalar(1, s32, s32)
154157
.clampScalar(0, s32, s32);
155158
getActionDefinitionsBuilder(G_CTLZ)
156-
.lowerFor({s32})
159+
.lowerFor({s32, s32})
160+
.clampScalar(1, s32, s32)
157161
.clampScalar(0, s32, s32);
158162
}
159163

llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -312,19 +312,19 @@
312312
# DEBUG-NEXT: G_SHUFFLE_VECTOR (opcode {{[0-9]+}}): 3 type indices
313313
# DEBUG: .. type index coverage check SKIPPED: no rules defined
314314
#
315-
# DEBUG-NEXT: G_CTTZ (opcode {{[0-9]+}}): 1 type index
315+
# DEBUG-NEXT: G_CTTZ (opcode {{[0-9]+}}): 2 type indices
316316
# DEBUG: .. type index coverage check SKIPPED: no rules defined
317317
#
318-
# DEBUG-NEXT: G_CTTZ_ZERO_UNDEF (opcode {{[0-9]+}}): 1 type index
318+
# DEBUG-NEXT: G_CTTZ_ZERO_UNDEF (opcode {{[0-9]+}}): 2 type indices
319319
# DEBUG: .. type index coverage check SKIPPED: no rules defined
320320
#
321-
# DEBUG-NEXT: G_CTLZ (opcode {{[0-9]+}}): 1 type index
321+
# DEBUG-NEXT: G_CTLZ (opcode {{[0-9]+}}): 2 type indices
322322
# DEBUG: .. type index coverage check SKIPPED: no rules defined
323323
#
324-
# DEBUG-NEXT: G_CTLZ_ZERO_UNDEF (opcode {{[0-9]+}}): 1 type index
324+
# DEBUG-NEXT: G_CTLZ_ZERO_UNDEF (opcode {{[0-9]+}}): 2 type indices
325325
# DEBUG: .. type index coverage check SKIPPED: no rules defined
326326
#
327-
# DEBUG-NEXT: G_CTPOP (opcode {{[0-9]+}}): 1 type index
327+
# DEBUG-NEXT: G_CTPOP (opcode {{[0-9]+}}): 2 type indices
328328
# DEBUG: .. type index coverage check SKIPPED: no rules defined
329329
#
330330
# DEBUG-NEXT: G_BSWAP (opcode {{[0-9]+}}): 1 type index
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s
3+
4+
---
5+
name: ctlz_zero_undef_s32_s32
6+
7+
body: |
8+
bb.0:
9+
liveins: $vgpr0
10+
; CHECK-LABEL: name: ctlz_zero_undef_s32_s32
11+
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
12+
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s32)
13+
; CHECK: $vgpr0 = COPY [[CTLZ_ZERO_UNDEF]](s32)
14+
%0:_(s32) = COPY $vgpr0
15+
%1:_(s32) = G_CTLZ_ZERO_UNDEF %0
16+
$vgpr0 = COPY %1
17+
...
18+
19+
---
20+
name: ctlz_zero_undef_s32_s64
21+
22+
body: |
23+
bb.0:
24+
liveins: $vgpr0_vgpr1
25+
; CHECK-LABEL: name: ctlz_zero_undef_s32_s64
26+
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
27+
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64)
28+
; CHECK: $vgpr0 = COPY [[CTLZ_ZERO_UNDEF]](s32)
29+
%0:_(s64) = COPY $vgpr0_vgpr1
30+
%1:_(s32) = G_CTLZ_ZERO_UNDEF %0
31+
$vgpr0 = COPY %1
32+
...
33+
34+
---
35+
name: ctlz_zero_undef_s64_s64
36+
37+
body: |
38+
bb.0:
39+
liveins: $vgpr0_vgpr1
40+
; CHECK-LABEL: name: ctlz_zero_undef_s64_s64
41+
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
42+
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64)
43+
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTLZ_ZERO_UNDEF]](s32)
44+
; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
45+
%0:_(s64) = COPY $vgpr0_vgpr1
46+
%1:_(s64) = G_CTLZ_ZERO_UNDEF %0
47+
$vgpr0_vgpr1 = COPY %1
48+
...
49+
50+
---
51+
name: ctlz_zero_undef_s16_s32
52+
53+
body: |
54+
bb.0:
55+
liveins: $vgpr0
56+
; CHECK-LABEL: name: ctlz_zero_undef_s16_s32
57+
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
58+
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s32)
59+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
60+
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTLZ_ZERO_UNDEF]](s32)
61+
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
62+
; CHECK: $vgpr0 = COPY [[AND]](s32)
63+
%0:_(s32) = COPY $vgpr0
64+
%1:_(s16) = G_CTLZ_ZERO_UNDEF %0
65+
%2:_(s32) = G_ZEXT %1
66+
$vgpr0 = COPY %2
67+
...
68+
69+
---
70+
name: ctlz_zero_undef_s16_s16
71+
72+
body: |
73+
bb.0:
74+
liveins: $vgpr0
75+
; CHECK-LABEL: name: ctlz_zero_undef_s16_s16
76+
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
77+
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
78+
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[TRUNC]](s16)
79+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
80+
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTLZ_ZERO_UNDEF]](s32)
81+
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
82+
; CHECK: $vgpr0 = COPY [[AND]](s32)
83+
%0:_(s32) = COPY $vgpr0
84+
%1:_(s16) = G_TRUNC %0
85+
%2:_(s16) = G_CTLZ_ZERO_UNDEF %1
86+
%3:_(s32) = G_ZEXT %2
87+
$vgpr0 = COPY %3
88+
...
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s
3+
4+
---
5+
name: ctlz_s32_s32
6+
7+
body: |
8+
bb.0:
9+
liveins: $vgpr0
10+
; CHECK-LABEL: name: ctlz_s32_s32
11+
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
12+
; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s32)
13+
; CHECK: $vgpr0 = COPY [[CTLZ]](s32)
14+
%0:_(s32) = COPY $vgpr0
15+
%1:_(s32) = G_CTLZ %0
16+
$vgpr0 = COPY %1
17+
...
18+
19+
---
20+
name: ctlz_s32_s64
21+
22+
body: |
23+
bb.0:
24+
liveins: $vgpr0_vgpr1
25+
; CHECK-LABEL: name: ctlz_s32_s64
26+
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
27+
; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s64)
28+
; CHECK: $vgpr0 = COPY [[CTLZ]](s32)
29+
%0:_(s64) = COPY $vgpr0_vgpr1
30+
%1:_(s32) = G_CTLZ %0
31+
$vgpr0 = COPY %1
32+
...
33+
34+
---
35+
name: ctlz_s64_s64
36+
37+
body: |
38+
bb.0:
39+
liveins: $vgpr0_vgpr1
40+
; CHECK-LABEL: name: ctlz_s64_s64
41+
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
42+
; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s64)
43+
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTLZ]](s32)
44+
; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
45+
%0:_(s64) = COPY $vgpr0_vgpr1
46+
%1:_(s64) = G_CTLZ %0
47+
$vgpr0_vgpr1 = COPY %1
48+
...
49+
50+
---
51+
name: ctlz_s16_s32
52+
53+
body: |
54+
bb.0:
55+
liveins: $vgpr0
56+
; CHECK-LABEL: name: ctlz_s16_s32
57+
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
58+
; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s32)
59+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
60+
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32)
61+
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
62+
; CHECK: $vgpr0 = COPY [[AND]](s32)
63+
%0:_(s32) = COPY $vgpr0
64+
%1:_(s16) = G_CTLZ %0
65+
%2:_(s32) = G_ZEXT %1
66+
$vgpr0 = COPY %2
67+
...
68+
69+
---
70+
name: ctlz_s16_s16
71+
72+
body: |
73+
bb.0:
74+
liveins: $vgpr0
75+
; CHECK-LABEL: name: ctlz_s16_s16
76+
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
77+
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
78+
; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[TRUNC]](s16)
79+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
80+
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32)
81+
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
82+
; CHECK: $vgpr0 = COPY [[AND]](s32)
83+
%0:_(s32) = COPY $vgpr0
84+
%1:_(s16) = G_TRUNC %0
85+
%2:_(s16) = G_CTLZ %1
86+
%3:_(s32) = G_ZEXT %2
87+
$vgpr0 = COPY %3
88+
...

0 commit comments

Comments
 (0)