Skip to content

Commit 7217b01

Browse files
committed
[AMDGPU] Add globalisel checks for ctlz_zero_undef/cttz_zero_undef
1 parent e78bf49 commit 7217b01

File tree

2 files changed

+596
-0
lines changed

2 files changed

+596
-0
lines changed

llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll

Lines changed: 261 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=SI %s
33
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=VI %s
44
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefixes=EG %s
5+
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-GISEL %s
56

67
declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
78

@@ -50,6 +51,17 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out,
5051
; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
5152
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
5253
; EG-NEXT: FFBH_UINT * T1.X, KC0[2].Z,
54+
;
55+
; GFX9-GISEL-LABEL: s_ctlz_zero_undef_i32:
56+
; GFX9-GISEL: ; %bb.0:
57+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
58+
; GFX9-GISEL-NEXT: s_load_dword s4, s[0:1], 0x2c
59+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
60+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
61+
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s0, s4
62+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
63+
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
64+
; GFX9-GISEL-NEXT: s_endpgm
5365
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
5466
store i32 %ctlz, i32 addrspace(1)* %out, align 4
5567
ret void
@@ -109,6 +121,19 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out,
109121
; EG-NEXT: FFBH_UINT T0.X, T0.X,
110122
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
111123
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
124+
;
125+
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i32:
126+
; GFX9-GISEL: ; %bb.0:
127+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
128+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
129+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
130+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
131+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
132+
; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[4:5]
133+
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
134+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
135+
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
136+
; GFX9-GISEL-NEXT: s_endpgm
112137
%tid = call i32 @llvm.amdgcn.workitem.id.x()
113138
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
114139
%val = load i32, i32 addrspace(1)* %in.gep, align 4
@@ -174,6 +199,20 @@ define amdgpu_kernel void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noali
174199
; EG-NEXT: FFBH_UINT T0.X, T0.X,
175200
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
176201
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
202+
;
203+
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i32:
204+
; GFX9-GISEL: ; %bb.0:
205+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
206+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
207+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 3, v0
208+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
209+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
210+
; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v0, s[4:5]
211+
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
212+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
213+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
214+
; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
215+
; GFX9-GISEL-NEXT: s_endpgm
177216
%tid = call i32 @llvm.amdgcn.workitem.id.x()
178217
%in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
179218
%val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
@@ -245,6 +284,22 @@ define amdgpu_kernel void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noali
245284
; EG-NEXT: FFBH_UINT T0.X, T0.X,
246285
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
247286
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
287+
;
288+
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v4i32:
289+
; GFX9-GISEL: ; %bb.0:
290+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
291+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
292+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0
293+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0
294+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
295+
; GFX9-GISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
296+
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
297+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
298+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
299+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
300+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v3, v3
301+
; GFX9-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
302+
; GFX9-GISEL-NEXT: s_endpgm
248303
%tid = call i32 @llvm.amdgcn.workitem.id.x()
249304
%in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
250305
%val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
@@ -318,6 +373,24 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i
318373
; EG-NEXT: MOV * T0.Z, 0.0,
319374
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
320375
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
376+
;
377+
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i8:
378+
; GFX9-GISEL: ; %bb.0:
379+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
380+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
381+
; GFX9-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0
382+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
383+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s4
384+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s5
385+
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0
386+
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v3, vcc
387+
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
388+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
389+
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
390+
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
391+
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v0, 24, v0
392+
; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[2:3]
393+
; GFX9-GISEL-NEXT: s_endpgm
321394
%tid = call i32 @llvm.amdgcn.workitem.id.x()
322395
%in.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
323396
%val = load i8, i8 addrspace(1)* %in.gep
@@ -377,6 +450,19 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out,
377450
; EG-NEXT: MOV T0.Y, 0.0,
378451
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
379452
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
453+
;
454+
; GFX9-GISEL-LABEL: s_ctlz_zero_undef_i64:
455+
; GFX9-GISEL: ; %bb.0:
456+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
457+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x4c
458+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
459+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
460+
; GFX9-GISEL-NEXT: s_flbit_i32_b64 s0, s[4:5]
461+
; GFX9-GISEL-NEXT: s_bfe_u64 s[0:1], s[0:1], 0x200000
462+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
463+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
464+
; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
465+
; GFX9-GISEL-NEXT: s_endpgm
380466
%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
381467
store i64 %ctlz, i64 addrspace(1)* %out
382468
ret void
@@ -430,6 +516,17 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias
430516
; EG-NEXT: CNDE_INT T0.X, KC0[3].X, PS, PV.W,
431517
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
432518
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
519+
;
520+
; GFX9-GISEL-LABEL: s_ctlz_zero_undef_i64_trunc:
521+
; GFX9-GISEL: ; %bb.0:
522+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
523+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
524+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
525+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
526+
; GFX9-GISEL-NEXT: s_flbit_i32_b64 s0, s[4:5]
527+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
528+
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
529+
; GFX9-GISEL-NEXT: s_endpgm
433530
%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
434531
%trunc = trunc i64 %ctlz to i32
435532
store i32 %trunc, i32 addrspace(1)* %out
@@ -506,6 +603,23 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out,
506603
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.W,
507604
; EG-NEXT: LSHR * T1.X, PV.W, literal.x,
508605
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
606+
;
607+
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i64:
608+
; GFX9-GISEL: ; %bb.0:
609+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
610+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
611+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v3, 3, v0
612+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
613+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
614+
; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v3, s[4:5]
615+
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
616+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
617+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v4, v1
618+
; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 32, v0
619+
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
620+
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v0, vcc
621+
; GFX9-GISEL-NEXT: global_store_dwordx2 v3, v[1:2], s[2:3]
622+
; GFX9-GISEL-NEXT: s_endpgm
509623
%tid = call i32 @llvm.amdgcn.workitem.id.x()
510624
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
511625
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
@@ -585,6 +699,23 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias
585699
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, PV.Z,
586700
; EG-NEXT: LSHR * T1.X, PV.W, literal.x,
587701
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
702+
;
703+
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i64_trunc:
704+
; GFX9-GISEL: ; %bb.0:
705+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
706+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
707+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 3, v0
708+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
709+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
710+
; GFX9-GISEL-NEXT: global_load_dwordx2 v[1:2], v1, s[4:5]
711+
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
712+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
713+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v3, v2
714+
; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 32, v1
715+
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
716+
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
717+
; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[2:3]
718+
; GFX9-GISEL-NEXT: s_endpgm
588719
%tid = call i32 @llvm.amdgcn.workitem.id.x()
589720
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
590721
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@@ -650,6 +781,21 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(i32 addrspace(1)* n
650781
; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
651782
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
652783
; EG-NEXT: -1(nan), 2(2.802597e-45)
784+
;
785+
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i32_sel_eq_neg1:
786+
; GFX9-GISEL: ; %bb.0:
787+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
788+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
789+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
790+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
791+
; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[4:5]
792+
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
793+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0
794+
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
795+
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc
796+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
797+
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
798+
; GFX9-GISEL-NEXT: s_endpgm
653799
%tid = call i32 @llvm.amdgcn.workitem.id.x()
654800
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
655801
%val = load i32, i32 addrspace(1)* %in.gep
@@ -715,6 +861,21 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* n
715861
; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
716862
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
717863
; EG-NEXT: -1(nan), 2(2.802597e-45)
864+
;
865+
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i32_sel_ne_neg1:
866+
; GFX9-GISEL: ; %bb.0:
867+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
868+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
869+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
870+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
871+
; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[4:5]
872+
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
873+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0
874+
; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
875+
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v1, vcc
876+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
877+
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
878+
; GFX9-GISEL-NEXT: s_endpgm
718879
%tid = call i32 @llvm.amdgcn.workitem.id.x()
719880
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
720881
%val = load i32, i32 addrspace(1)* %in.gep
@@ -785,6 +946,27 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noa
785946
; EG-NEXT: MOV * T0.Z, 0.0,
786947
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
787948
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
949+
;
950+
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i8_sel_eq_neg1:
951+
; GFX9-GISEL: ; %bb.0:
952+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
953+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
954+
; GFX9-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0
955+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
956+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s4
957+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s5
958+
; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0
959+
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v3, vcc
960+
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
961+
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
962+
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
963+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0
964+
; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, 24, v1
965+
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
966+
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc
967+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
968+
; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[2:3]
969+
; GFX9-GISEL-NEXT: s_endpgm
788970
%tid = call i32 @llvm.amdgcn.workitem.id.x()
789971
%valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
790972
%val = load i8, i8 addrspace(1)* %valptr.gep
@@ -868,6 +1050,25 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(i32 addrspa
8681050
; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
8691051
; EG-NEXT: LSHR * T3.X, KC0[2].Y, literal.y,
8701052
; EG-NEXT: -1(nan), 2(2.802597e-45)
1053+
;
1054+
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i32_sel_eq_neg1_two_use:
1055+
; GFX9-GISEL: ; %bb.0:
1056+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
1057+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
1058+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1059+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
1060+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1061+
; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[4:5]
1062+
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1063+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v0
1064+
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1065+
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc
1066+
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1067+
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
1068+
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1069+
; GFX9-GISEL-NEXT: global_store_byte v[0:1], v2, off
1070+
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1071+
; GFX9-GISEL-NEXT: s_endpgm
8711072
%tid = call i32 @llvm.amdgcn.workitem.id.x()
8721073
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
8731074
%val = load i32, i32 addrspace(1)* %in.gep
@@ -939,6 +1140,21 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(i32 addrspace(1)* noal
9391140
; EG-NEXT: CNDE_INT T0.X, T0.X, 0.0, PV.W,
9401141
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
9411142
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1143+
;
1144+
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i32_sel_eq_0:
1145+
; GFX9-GISEL: ; %bb.0:
1146+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
1147+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
1148+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1149+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1150+
; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[4:5]
1151+
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1152+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0
1153+
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1154+
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
1155+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
1156+
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
1157+
; GFX9-GISEL-NEXT: s_endpgm
9421158
%tid = call i32 @llvm.amdgcn.workitem.id.x()
9431159
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
9441160
%val = load i32, i32 addrspace(1)* %in.gep
@@ -1009,6 +1225,21 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noal
10091225
; EG-NEXT: CNDE_INT T0.X, T0.X, 0.0, PV.W,
10101226
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
10111227
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1228+
;
1229+
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i32_sel_ne_0:
1230+
; GFX9-GISEL: ; %bb.0:
1231+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
1232+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
1233+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1234+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1235+
; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[4:5]
1236+
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1237+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0
1238+
; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
1239+
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
1240+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
1241+
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
1242+
; GFX9-GISEL-NEXT: s_endpgm
10121243
%tid = call i32 @llvm.amdgcn.workitem.id.x()
10131244
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
10141245
%val = load i32, i32 addrspace(1)* %in.gep
@@ -1080,6 +1311,21 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(i32 addrspace(1
10801311
; EG-NEXT: CNDE_INT T0.X, PS, PV.W, 0.0,
10811312
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
10821313
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1314+
;
1315+
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i32_sel_eq_cmp_non0:
1316+
; GFX9-GISEL: ; %bb.0:
1317+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
1318+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
1319+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1320+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1321+
; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[4:5]
1322+
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1323+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0
1324+
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
1325+
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
1326+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
1327+
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
1328+
; GFX9-GISEL-NEXT: s_endpgm
10831329
%tid = call i32 @llvm.amdgcn.workitem.id.x()
10841330
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
10851331
%val = load i32, i32 addrspace(1)* %in.gep
@@ -1151,6 +1397,21 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_cmp_non0(i32 addrspace(1
11511397
; EG-NEXT: CNDE_INT T0.X, PS, 0.0, PV.W,
11521398
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
11531399
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1400+
;
1401+
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i32_sel_ne_cmp_non0:
1402+
; GFX9-GISEL: ; %bb.0:
1403+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
1404+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
1405+
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1406+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1407+
; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[4:5]
1408+
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1409+
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0
1410+
; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0
1411+
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
1412+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
1413+
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
1414+
; GFX9-GISEL-NEXT: s_endpgm
11541415
%tid = call i32 @llvm.amdgcn.workitem.id.x()
11551416
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
11561417
%val = load i32, i32 addrspace(1)* %in.gep

0 commit comments

Comments
 (0)