Skip to content

Commit b075896

Browse files
committed
update tests
1 parent 25f6d5b commit b075896

File tree

3 files changed

+50
-17
lines changed

3 files changed

+50
-17
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp
7474
; GFX10-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
7575
; GFX10: ; %bb.0:
7676
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77-
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
77+
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
78+
; GFX10-NEXT: v_med3_f32 v0, v0, 1.0, 0
7879
; GFX10-NEXT: s_setpc_b64 s[30:31]
7980
;
8081
; GFX12-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
@@ -84,7 +85,9 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp
8485
; GFX12-NEXT: s_wait_samplecnt 0x0
8586
; GFX12-NEXT: s_wait_bvhcnt 0x0
8687
; GFX12-NEXT: s_wait_kmcnt 0x0
87-
; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
88+
; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
89+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
90+
; GFX12-NEXT: v_med3_num_f32 v0, v0, 1.0, 0
8891
; GFX12-NEXT: s_setpc_b64 s[30:31]
8992
%fmul = fmul float %a, 2.0
9093
%fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
@@ -134,7 +137,9 @@ define float @test_fmed3_f32_maybe_NaN_ieee_false(float %a) #1 {
134137
; GFX12-NEXT: s_wait_samplecnt 0x0
135138
; GFX12-NEXT: s_wait_bvhcnt 0x0
136139
; GFX12-NEXT: s_wait_kmcnt 0x0
137-
; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
140+
; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
141+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
142+
; GFX12-NEXT: v_med3_num_f32 v0, v0, 1.0, 0
138143
; GFX12-NEXT: s_setpc_b64 s[30:31]
139144
%fmul = fmul float %a, 2.0
140145
%fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
@@ -172,7 +177,8 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2
172177
; GFX10-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
173178
; GFX10: ; %bb.0:
174179
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175-
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
180+
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
181+
; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0
176182
; GFX10-NEXT: s_setpc_b64 s[30:31]
177183
;
178184
; GFX12-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
@@ -182,7 +188,9 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2
182188
; GFX12-NEXT: s_wait_samplecnt 0x0
183189
; GFX12-NEXT: s_wait_bvhcnt 0x0
184190
; GFX12-NEXT: s_wait_kmcnt 0x0
185-
; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
191+
; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
192+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
193+
; GFX12-NEXT: v_med3_num_f32 v0, v0, 0, 1.0
186194
; GFX12-NEXT: s_setpc_b64 s[30:31]
187195
%fmul = fmul float %a, 2.0
188196
%fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0)

llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,9 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
9595
; GFX10-LABEL: test_min_max_splat_padded_with_undef:
9696
; GFX10: ; %bb.0:
9797
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98-
; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
98+
; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0]
99+
; GFX10-NEXT: v_pk_max_f16 v0, v0, 0
100+
; GFX10-NEXT: v_pk_min_f16 v0, v0, 1.0
99101
; GFX10-NEXT: s_setpc_b64 s[30:31]
100102
;
101103
; GFX12-LABEL: test_min_max_splat_padded_with_undef:
@@ -105,7 +107,10 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
105107
; GFX12-NEXT: s_wait_samplecnt 0x0
106108
; GFX12-NEXT: s_wait_bvhcnt 0x0
107109
; GFX12-NEXT: s_wait_kmcnt 0x0
108-
; GFX12-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
110+
; GFX12-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0]
111+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
112+
; GFX12-NEXT: v_pk_max_num_f16 v0, v0, 0
113+
; GFX12-NEXT: v_pk_min_num_f16 v0, v0, 1.0
109114
; GFX12-NEXT: s_setpc_b64 s[30:31]
110115
%fmul = fmul <2 x half> %a, <half 2.0, half 2.0>
111116
%maxnum = call <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 0.0, half poison>, <2 x half> %fmul)

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,12 @@ body: |
162162
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
163163
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
164164
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
165-
; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
166-
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
165+
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
166+
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
167+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
168+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
169+
; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
170+
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
167171
;
168172
; GFX12-LABEL: name: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true
169173
; GFX12: liveins: $vgpr0
@@ -172,8 +176,12 @@ body: |
172176
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
173177
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
174178
; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
175-
; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
176-
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
179+
; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
180+
; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
181+
; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
182+
; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
183+
; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
184+
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
177185
%0:vgpr(s32) = COPY $vgpr0
178186
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
179187
%8:vgpr(s32) = COPY %2(s32)
@@ -222,8 +230,12 @@ body: |
222230
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
223231
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
224232
; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
225-
; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
226-
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
233+
; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
234+
; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
235+
; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
236+
; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
237+
; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
238+
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
227239
%0:vgpr(s32) = COPY $vgpr0
228240
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
229241
%8:vgpr(s32) = COPY %2(s32)
@@ -307,8 +319,12 @@ body: |
307319
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
308320
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
309321
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
310-
; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
311-
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
322+
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
323+
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
324+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
325+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
326+
; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
327+
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
312328
;
313329
; GFX12-LABEL: name: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true
314330
; GFX12: liveins: $vgpr0
@@ -317,8 +333,12 @@ body: |
317333
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
318334
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
319335
; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
320-
; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
321-
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
336+
; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
337+
; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
338+
; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
339+
; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
340+
; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
341+
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
322342
%0:vgpr(s32) = COPY $vgpr0
323343
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
324344
%8:vgpr(s32) = COPY %2(s32)

0 commit comments

Comments
 (0)