diff --git a/llvm/test/CodeGen/AMDGPU/uniform-shifts-widening.ll b/llvm/test/CodeGen/AMDGPU/uniform-shifts-widening.ll new file mode 100644 index 0000000000000..386c58ba2145a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/uniform-shifts-widening.ll @@ -0,0 +1,289 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s +; RUN: llc -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s +; RUN: llc -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s +; RUN: llc -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s + +define amdgpu_ps i8 @s_ashr_i8(i8 inreg %value, i8 inreg %amount) { +; GFX6-LABEL: s_ashr_i8: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_sext_i32_i8 s0, s0 +; GFX6-NEXT: s_ashr_i32 s0, s0, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_ashr_i8: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_bfe_i32 s0, s0, 0x80000 +; GFX8-NEXT: s_sext_i32_i16 s0, s0 +; GFX8-NEXT: s_ashr_i32 s0, s0, s1 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_ashr_i8: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_bfe_i32 s0, s0, 0x80000 +; GFX9-NEXT: s_sext_i32_i16 s0, s0 +; GFX9-NEXT: s_ashr_i32 s0, s0, s1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10PLUS-LABEL: s_ashr_i8: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x80000 +; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0 +; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1 +; GFX10PLUS-NEXT: ; return to shader part epilog + %result = ashr i8 %value, %amount + ret i8 %result +} + +define amdgpu_ps i8 @s_ashr_i8_7(i8 inreg %value) { +; GFX6-LABEL: s_ashr_i8_7: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_sext_i32_i8 s0, s0 +; GFX6-NEXT: s_ashr_i32 s0, s0, 7 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_ashr_i8_7: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_bfe_i32 s0, s0, 0x80000 +; GFX8-NEXT: s_sext_i32_i16 s0, s0 +; GFX8-NEXT: s_lshr_b32 s0, s0, 7 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_ashr_i8_7: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_bfe_i32 s0, s0, 0x80000 +; GFX9-NEXT: s_sext_i32_i16 s0, s0 +; GFX9-NEXT: s_lshr_b32 s0, s0, 7 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10PLUS-LABEL: s_ashr_i8_7: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x80000 +; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0 +; GFX10PLUS-NEXT: s_lshr_b32 s0, s0, 7 +; GFX10PLUS-NEXT: ; return to shader part epilog + %result = ashr i8 %value, 7 + ret i8 %result +} + + +define amdgpu_ps i16 @s_ashr_i16(i16 inreg %value, i16 inreg %amount) { +; GFX6-LABEL: s_ashr_i16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_sext_i32_i16 s0, s0 +; GFX6-NEXT: s_ashr_i32 s0, s0, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_ashr_i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX8-NEXT: s_sext_i32_i16 s0, s0 +; GFX8-NEXT: s_ashr_i32 s0, s0, s1 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_ashr_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX9-NEXT: s_sext_i32_i16 s0, s0 +; GFX9-NEXT: s_ashr_i32 s0, s0, s1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10PLUS-LABEL: s_ashr_i16: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0 +; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1 +; GFX10PLUS-NEXT: ; return to shader part epilog + %result = ashr i16 %value, %amount + ret i16 %result +} + +define amdgpu_ps i16 @s_ashr_i16_15(i16 inreg %value) { +; GFX6-LABEL: s_ashr_i16_15: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_sext_i32_i16 s0, s0 +; GFX6-NEXT: s_ashr_i32 s0, s0, 15 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_ashr_i16_15: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_sext_i32_i16 s0, s0 +; GFX8-NEXT: s_lshr_b32 s0, s0, 15 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_ashr_i16_15: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_sext_i32_i16 s0, s0 +; GFX9-NEXT: s_lshr_b32 s0, s0, 15 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10PLUS-LABEL: s_ashr_i16_15: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0 +; GFX10PLUS-NEXT: s_lshr_b32 s0, s0, 15 +; GFX10PLUS-NEXT: ; return to shader part epilog + %result = ashr i16 %value, 15 + ret i16 %result +} + +define amdgpu_ps i16 @s_lshr_i16(i16 inreg %value, i16 inreg %amount) { +; GFX6-LABEL: s_lshr_i16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_and_b32 s0, s0, 0xffff +; GFX6-NEXT: s_lshr_b32 s0, s0, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_lshr_i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX8-NEXT: s_lshr_b32 s0, s0, s1 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_lshr_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX9-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX9-NEXT: s_lshr_b32 s0, s0, s1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10PLUS-LABEL: s_lshr_i16: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX10PLUS-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX10PLUS-NEXT: s_lshr_b32 s0, s0, s1 +; GFX10PLUS-NEXT: ; return to shader part epilog + %result = lshr i16 %value, %amount + ret i16 %result +} + +define amdgpu_ps i16 @s_lshr_i16_15(i16 inreg %value) { +; GFX6-LABEL: s_lshr_i16_15: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1000f +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_lshr_i16_15: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX8-NEXT: s_lshr_b32 s0, s0, 15 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_lshr_i16_15: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX9-NEXT: s_lshr_b32 s0, s0, 15 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10PLUS-LABEL: s_lshr_i16_15: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX10PLUS-NEXT: s_lshr_b32 s0, s0, 15 +; GFX10PLUS-NEXT: ; return to shader part epilog + %result = lshr i16 %value, 15 + ret i16 %result +} + +define amdgpu_ps i8 @s_lshr_i8(i8 inreg %value, i8 inreg %amount) { +; GCN-LABEL: s_lshr_i8: +; GCN: ; %bb.0: +; GCN-NEXT: s_and_b32 s0, s0, 0xff +; GCN-NEXT: s_lshr_b32 s0, s0, s1 +; GCN-NEXT: ; return to shader part epilog +; +; GFX10PLUS-LABEL: s_lshr_i8: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xff +; GFX10PLUS-NEXT: s_lshr_b32 s0, s0, s1 +; GFX10PLUS-NEXT: ; return to shader part epilog + %result = lshr i8 %value, %amount + ret i8 %result +} + +define amdgpu_ps i8 @s_lshr_i8_7(i8 inreg %value) { +; GCN-LABEL: s_lshr_i8_7: +; GCN: ; %bb.0: +; GCN-NEXT: s_bfe_u32 s0, s0, 0x10007 +; GCN-NEXT: ; return to shader part epilog +; +; GFX10PLUS-LABEL: s_lshr_i8_7: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_bfe_u32 s0, s0, 0x10007 +; GFX10PLUS-NEXT: ; return to shader part epilog + %result = lshr i8 %value, 7 + ret i8 %result +} + +define amdgpu_ps i16 @s_lshl_i16(i16 inreg %value, i16 inreg %amount) { +; GFX6-LABEL: s_lshl_i16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshl_b32 s0, s0, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_lshl_i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX8-NEXT: s_lshl_b32 s0, s0, s1 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_lshl_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX9-NEXT: s_lshl_b32 s0, s0, s1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10PLUS-LABEL: s_lshl_i16: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s1 +; GFX10PLUS-NEXT: ; return to shader part epilog + %result = shl i16 %value, %amount + ret i16 %result +} + +define amdgpu_ps i16 @s_lshl_i16_15(i16 inreg %value) { +; GCN-LABEL: s_lshl_i16_15: +; GCN: ; %bb.0: +; GCN-NEXT: s_lshl_b32 s0, s0, 15 +; GCN-NEXT: ; return to shader part epilog +; +; GFX10PLUS-LABEL: s_lshl_i16_15: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 15 +; GFX10PLUS-NEXT: ; return to shader part epilog + %result = shl i16 %value, 15 + ret i16 %result +} + +define amdgpu_ps i8 @s_lshl_i8(i8 inreg %value, i8 inreg %amount) { +; GCN-LABEL: s_lshl_i8: +; GCN: ; %bb.0: +; GCN-NEXT: s_lshl_b32 s0, s0, s1 +; GCN-NEXT: ; return to shader part epilog +; +; GFX10PLUS-LABEL: s_lshl_i8: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s1 +; GFX10PLUS-NEXT: ; return to shader part epilog + %result = shl i8 %value, %amount + ret i8 %result +} + +define amdgpu_ps i8 @s_lshl_i8_7(i8 inreg %value) { +; GCN-LABEL: s_lshl_i8_7: +; GCN: ; %bb.0: +; GCN-NEXT: s_lshl_b32 s0, s0, 7 +; GCN-NEXT: ; return to shader part epilog +; +; GFX10PLUS-LABEL: s_lshl_i8_7: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 7 +; GFX10PLUS-NEXT: ; return to shader part epilog + %result = shl i8 %value, 7 + ret i8 %result +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX10: {{.*}} +; GFX11: {{.*}}