|
2 | 2 | ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=SI %s
|
3 | 3 | ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=VI %s
|
4 | 4 | ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefixes=EG %s
|
| 5 | +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-GISEL %s |
5 | 6 |
|
6 | 7 | declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
|
7 | 8 |
|
@@ -50,6 +51,17 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out,
|
50 | 51 | ; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
|
51 | 52 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
52 | 53 | ; EG-NEXT: FFBH_UINT * T1.X, KC0[2].Z,
|
| 54 | +; |
| 55 | +; GFX9-GISEL-LABEL: s_ctlz_zero_undef_i32: |
| 56 | +; GFX9-GISEL: ; %bb.0: |
| 57 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| 58 | +; GFX9-GISEL-NEXT: s_load_dword s4, s[0:1], 0x2c |
| 59 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
| 60 | +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 61 | +; GFX9-GISEL-NEXT: s_flbit_i32_b32 s0, s4 |
| 62 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 |
| 63 | +; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3] |
| 64 | +; GFX9-GISEL-NEXT: s_endpgm |
53 | 65 | %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
|
54 | 66 | store i32 %ctlz, i32 addrspace(1)* %out, align 4
|
55 | 67 | ret void
|
@@ -109,6 +121,19 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out,
|
109 | 121 | ; EG-NEXT: FFBH_UINT T0.X, T0.X,
|
110 | 122 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
111 | 123 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
| 124 | +; |
| 125 | +; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i32: |
| 126 | +; GFX9-GISEL: ; %bb.0: |
| 127 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| 128 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c |
| 129 | +; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 130 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
| 131 | +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 132 | +; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[4:5] |
| 133 | +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 134 | +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0 |
| 135 | +; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3] |
| 136 | +; GFX9-GISEL-NEXT: s_endpgm |
112 | 137 | %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
113 | 138 | %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
114 | 139 | %val = load i32, i32 addrspace(1)* %in.gep, align 4
|
@@ -174,6 +199,20 @@ define amdgpu_kernel void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noali
|
174 | 199 | ; EG-NEXT: FFBH_UINT T0.X, T0.X,
|
175 | 200 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
176 | 201 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
| 202 | +; |
| 203 | +; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i32: |
| 204 | +; GFX9-GISEL: ; %bb.0: |
| 205 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| 206 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c |
| 207 | +; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 3, v0 |
| 208 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 |
| 209 | +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 210 | +; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v0, s[4:5] |
| 211 | +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 212 | +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0 |
| 213 | +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1 |
| 214 | +; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] |
| 215 | +; GFX9-GISEL-NEXT: s_endpgm |
177 | 216 | %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
178 | 217 | %in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
|
179 | 218 | %val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
|
@@ -245,6 +284,22 @@ define amdgpu_kernel void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noali
|
245 | 284 | ; EG-NEXT: FFBH_UINT T0.X, T0.X,
|
246 | 285 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
247 | 286 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
| 287 | +; |
| 288 | +; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v4i32: |
| 289 | +; GFX9-GISEL: ; %bb.0: |
| 290 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| 291 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c |
| 292 | +; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0 |
| 293 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0 |
| 294 | +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 295 | +; GFX9-GISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] |
| 296 | +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 297 | +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0 |
| 298 | +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1 |
| 299 | +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2 |
| 300 | +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v3, v3 |
| 301 | +; GFX9-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] |
| 302 | +; GFX9-GISEL-NEXT: s_endpgm |
248 | 303 | %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
249 | 304 | %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
|
250 | 305 | %val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
|
@@ -318,6 +373,24 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i
|
318 | 373 | ; EG-NEXT: MOV * T0.Z, 0.0,
|
319 | 374 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
320 | 375 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
| 376 | +; |
| 377 | +; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i8: |
| 378 | +; GFX9-GISEL: ; %bb.0: |
| 379 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| 380 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c |
| 381 | +; GFX9-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0 |
| 382 | +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 383 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s4 |
| 384 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s5 |
| 385 | +; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0 |
| 386 | +; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v3, vcc |
| 387 | +; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off |
| 388 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
| 389 | +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 390 | +; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 |
| 391 | +; GFX9-GISEL-NEXT: v_subrev_u32_e32 v0, 24, v0 |
| 392 | +; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[2:3] |
| 393 | +; GFX9-GISEL-NEXT: s_endpgm |
321 | 394 | %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
322 | 395 | %in.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
|
323 | 396 | %val = load i8, i8 addrspace(1)* %in.gep
|
@@ -377,6 +450,19 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out,
|
377 | 450 | ; EG-NEXT: MOV T0.Y, 0.0,
|
378 | 451 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
379 | 452 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
| 453 | +; |
| 454 | +; GFX9-GISEL-LABEL: s_ctlz_zero_undef_i64: |
| 455 | +; GFX9-GISEL: ; %bb.0: |
| 456 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| 457 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x4c |
| 458 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 |
| 459 | +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 460 | +; GFX9-GISEL-NEXT: s_flbit_i32_b64 s0, s[4:5] |
| 461 | +; GFX9-GISEL-NEXT: s_bfe_u64 s[0:1], s[0:1], 0x200000 |
| 462 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 |
| 463 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 |
| 464 | +; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] |
| 465 | +; GFX9-GISEL-NEXT: s_endpgm |
380 | 466 | %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
|
381 | 467 | store i64 %ctlz, i64 addrspace(1)* %out
|
382 | 468 | ret void
|
@@ -430,6 +516,17 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias
|
430 | 516 | ; EG-NEXT: CNDE_INT T0.X, KC0[3].X, PS, PV.W,
|
431 | 517 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
432 | 518 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
| 519 | +; |
| 520 | +; GFX9-GISEL-LABEL: s_ctlz_zero_undef_i64_trunc: |
| 521 | +; GFX9-GISEL: ; %bb.0: |
| 522 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| 523 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c |
| 524 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
| 525 | +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 526 | +; GFX9-GISEL-NEXT: s_flbit_i32_b64 s0, s[4:5] |
| 527 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 |
| 528 | +; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3] |
| 529 | +; GFX9-GISEL-NEXT: s_endpgm |
433 | 530 | %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
|
434 | 531 | %trunc = trunc i64 %ctlz to i32
|
435 | 532 | store i32 %trunc, i32 addrspace(1)* %out
|
@@ -506,6 +603,23 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out,
|
506 | 603 | ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.W,
|
507 | 604 | ; EG-NEXT: LSHR * T1.X, PV.W, literal.x,
|
508 | 605 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
| 606 | +; |
| 607 | +; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i64: |
| 608 | +; GFX9-GISEL: ; %bb.0: |
| 609 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| 610 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c |
| 611 | +; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v3, 3, v0 |
| 612 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 |
| 613 | +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 614 | +; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v3, s[4:5] |
| 615 | +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 616 | +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0 |
| 617 | +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v4, v1 |
| 618 | +; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 32, v0 |
| 619 | +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 |
| 620 | +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v0, vcc |
| 621 | +; GFX9-GISEL-NEXT: global_store_dwordx2 v3, v[1:2], s[2:3] |
| 622 | +; GFX9-GISEL-NEXT: s_endpgm |
509 | 623 | %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
510 | 624 | %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
511 | 625 | %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
|
@@ -585,6 +699,23 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias
|
585 | 699 | ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, PV.Z,
|
586 | 700 | ; EG-NEXT: LSHR * T1.X, PV.W, literal.x,
|
587 | 701 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
| 702 | +; |
| 703 | +; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i64_trunc: |
| 704 | +; GFX9-GISEL: ; %bb.0: |
| 705 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| 706 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c |
| 707 | +; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| 708 | +; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 709 | +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 710 | +; GFX9-GISEL-NEXT: global_load_dwordx2 v[1:2], v1, s[4:5] |
| 711 | +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 712 | +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1 |
| 713 | +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v3, v2 |
| 714 | +; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 32, v1 |
| 715 | +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 |
| 716 | +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc |
| 717 | +; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[2:3] |
| 718 | +; GFX9-GISEL-NEXT: s_endpgm |
588 | 719 | %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
589 | 720 | %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
590 | 721 | %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
@@ -650,6 +781,21 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(i32 addrspace(1)* n
|
650 | 781 | ; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
|
651 | 782 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
|
652 | 783 | ; EG-NEXT: -1(nan), 2(2.802597e-45)
|
| 784 | +; |
| 785 | +; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i32_sel_eq_neg1: |
| 786 | +; GFX9-GISEL: ; %bb.0: |
| 787 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| 788 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c |
| 789 | +; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 790 | +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 791 | +; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[4:5] |
| 792 | +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 793 | +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 |
| 794 | +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| 795 | +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc |
| 796 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
| 797 | +; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3] |
| 798 | +; GFX9-GISEL-NEXT: s_endpgm |
653 | 799 | %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
654 | 800 | %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
655 | 801 | %val = load i32, i32 addrspace(1)* %in.gep
|
@@ -715,6 +861,21 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* n
|
715 | 861 | ; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
|
716 | 862 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
|
717 | 863 | ; EG-NEXT: -1(nan), 2(2.802597e-45)
|
| 864 | +; |
| 865 | +; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i32_sel_ne_neg1: |
| 866 | +; GFX9-GISEL: ; %bb.0: |
| 867 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| 868 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c |
| 869 | +; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 870 | +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 871 | +; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[4:5] |
| 872 | +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 873 | +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 |
| 874 | +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 |
| 875 | +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v1, vcc |
| 876 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
| 877 | +; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3] |
| 878 | +; GFX9-GISEL-NEXT: s_endpgm |
718 | 879 | %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
719 | 880 | %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
720 | 881 | %val = load i32, i32 addrspace(1)* %in.gep
|
@@ -785,6 +946,27 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noa
|
785 | 946 | ; EG-NEXT: MOV * T0.Z, 0.0,
|
786 | 947 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
787 | 948 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
| 949 | +; |
| 950 | +; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i8_sel_eq_neg1: |
| 951 | +; GFX9-GISEL: ; %bb.0: |
| 952 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| 953 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c |
| 954 | +; GFX9-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0 |
| 955 | +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 956 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s4 |
| 957 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s5 |
| 958 | +; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0 |
| 959 | +; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v3, vcc |
| 960 | +; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off |
| 961 | +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 962 | +; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| 963 | +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 |
| 964 | +; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, 24, v1 |
| 965 | +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| 966 | +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc |
| 967 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
| 968 | +; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[2:3] |
| 969 | +; GFX9-GISEL-NEXT: s_endpgm |
788 | 970 | %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
789 | 971 | %valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
|
790 | 972 | %val = load i8, i8 addrspace(1)* %valptr.gep
|
@@ -868,6 +1050,25 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(i32 addrspa
|
868 | 1050 | ; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
|
869 | 1051 | ; EG-NEXT: LSHR * T3.X, KC0[2].Y, literal.y,
|
870 | 1052 | ; EG-NEXT: -1(nan), 2(2.802597e-45)
|
| 1053 | +; |
| 1054 | +; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i32_sel_eq_neg1_two_use: |
| 1055 | +; GFX9-GISEL: ; %bb.0: |
| 1056 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| 1057 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c |
| 1058 | +; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1059 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
| 1060 | +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 1061 | +; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[4:5] |
| 1062 | +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 1063 | +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v0 |
| 1064 | +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| 1065 | +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc |
| 1066 | +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc |
| 1067 | +; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3] |
| 1068 | +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 1069 | +; GFX9-GISEL-NEXT: global_store_byte v[0:1], v2, off |
| 1070 | +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 1071 | +; GFX9-GISEL-NEXT: s_endpgm |
871 | 1072 | %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
872 | 1073 | %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
873 | 1074 | %val = load i32, i32 addrspace(1)* %in.gep
|
@@ -939,6 +1140,21 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(i32 addrspace(1)* noal
|
939 | 1140 | ; EG-NEXT: CNDE_INT T0.X, T0.X, 0.0, PV.W,
|
940 | 1141 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
941 | 1142 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
| 1143 | +; |
| 1144 | +; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i32_sel_eq_0: |
| 1145 | +; GFX9-GISEL: ; %bb.0: |
| 1146 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| 1147 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c |
| 1148 | +; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1149 | +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 1150 | +; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[4:5] |
| 1151 | +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 1152 | +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 |
| 1153 | +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| 1154 | +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc |
| 1155 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
| 1156 | +; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3] |
| 1157 | +; GFX9-GISEL-NEXT: s_endpgm |
942 | 1158 | %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
943 | 1159 | %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
944 | 1160 | %val = load i32, i32 addrspace(1)* %in.gep
|
@@ -1009,6 +1225,21 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noal
|
1009 | 1225 | ; EG-NEXT: CNDE_INT T0.X, T0.X, 0.0, PV.W,
|
1010 | 1226 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
1011 | 1227 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
| 1228 | +; |
| 1229 | +; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i32_sel_ne_0: |
| 1230 | +; GFX9-GISEL: ; %bb.0: |
| 1231 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| 1232 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c |
| 1233 | +; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1234 | +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 1235 | +; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[4:5] |
| 1236 | +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 1237 | +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 |
| 1238 | +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 |
| 1239 | +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc |
| 1240 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
| 1241 | +; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3] |
| 1242 | +; GFX9-GISEL-NEXT: s_endpgm |
1012 | 1243 | %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
1013 | 1244 | %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
1014 | 1245 | %val = load i32, i32 addrspace(1)* %in.gep
|
@@ -1080,6 +1311,21 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(i32 addrspace(1
|
1080 | 1311 | ; EG-NEXT: CNDE_INT T0.X, PS, PV.W, 0.0,
|
1081 | 1312 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
1082 | 1313 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
| 1314 | +; |
| 1315 | +; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i32_sel_eq_cmp_non0: |
| 1316 | +; GFX9-GISEL: ; %bb.0: |
| 1317 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| 1318 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c |
| 1319 | +; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1320 | +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 1321 | +; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[4:5] |
| 1322 | +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 1323 | +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 |
| 1324 | +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 |
| 1325 | +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc |
| 1326 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
| 1327 | +; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3] |
| 1328 | +; GFX9-GISEL-NEXT: s_endpgm |
1083 | 1329 | %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
1084 | 1330 | %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
1085 | 1331 | %val = load i32, i32 addrspace(1)* %in.gep
|
@@ -1151,6 +1397,21 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_cmp_non0(i32 addrspace(1
|
1151 | 1397 | ; EG-NEXT: CNDE_INT T0.X, PS, 0.0, PV.W,
|
1152 | 1398 | ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
1153 | 1399 | ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
| 1400 | +; |
| 1401 | +; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i32_sel_ne_cmp_non0: |
| 1402 | +; GFX9-GISEL: ; %bb.0: |
| 1403 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| 1404 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c |
| 1405 | +; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1406 | +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 1407 | +; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[4:5] |
| 1408 | +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 1409 | +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 |
| 1410 | +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 |
| 1411 | +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc |
| 1412 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 |
| 1413 | +; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3] |
| 1414 | +; GFX9-GISEL-NEXT: s_endpgm |
1154 | 1415 | %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
1155 | 1416 | %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
1156 | 1417 | %val = load i32, i32 addrspace(1)* %in.gep
|
|
0 commit comments