Skip to content

Commit afcba08

Browse files
committed
update tests
1 parent 3b299a1 commit afcba08

6 files changed

+78
-84
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp
7474
; GFX10-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
7575
; GFX10: ; %bb.0:
7676
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77-
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
77+
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
78+
; GFX10-NEXT: v_med3_f32 v0, v0, 1.0, 0
7879
; GFX10-NEXT: s_setpc_b64 s[30:31]
7980
;
8081
; GFX12-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
@@ -84,7 +85,9 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp
8485
; GFX12-NEXT: s_wait_samplecnt 0x0
8586
; GFX12-NEXT: s_wait_bvhcnt 0x0
8687
; GFX12-NEXT: s_wait_kmcnt 0x0
87-
; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
88+
; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
89+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
90+
; GFX12-NEXT: v_med3_num_f32 v0, v0, 1.0, 0
8891
; GFX12-NEXT: s_setpc_b64 s[30:31]
8992
%fmul = fmul float %a, 2.0
9093
%fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
@@ -134,7 +137,9 @@ define float @test_fmed3_f32_maybe_NaN_ieee_false(float %a) #1 {
134137
; GFX12-NEXT: s_wait_samplecnt 0x0
135138
; GFX12-NEXT: s_wait_bvhcnt 0x0
136139
; GFX12-NEXT: s_wait_kmcnt 0x0
137-
; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
140+
; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
141+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
142+
; GFX12-NEXT: v_med3_num_f32 v0, v0, 1.0, 0
138143
; GFX12-NEXT: s_setpc_b64 s[30:31]
139144
%fmul = fmul float %a, 2.0
140145
%fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
@@ -172,7 +177,8 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2
172177
; GFX10-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
173178
; GFX10: ; %bb.0:
174179
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175-
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
180+
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
181+
; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0
176182
; GFX10-NEXT: s_setpc_b64 s[30:31]
177183
;
178184
; GFX12-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
@@ -182,7 +188,9 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2
182188
; GFX12-NEXT: s_wait_samplecnt 0x0
183189
; GFX12-NEXT: s_wait_bvhcnt 0x0
184190
; GFX12-NEXT: s_wait_kmcnt 0x0
185-
; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
191+
; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
192+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
193+
; GFX12-NEXT: v_med3_num_f32 v0, v0, 0, 1.0
186194
; GFX12-NEXT: s_setpc_b64 s[30:31]
187195
%fmul = fmul float %a, 2.0
188196
%fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0)

llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,9 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
9595
; GFX10-LABEL: test_min_max_splat_padded_with_undef:
9696
; GFX10: ; %bb.0:
9797
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98-
; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
98+
; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0]
99+
; GFX10-NEXT: v_pk_max_f16 v0, v0, 0
100+
; GFX10-NEXT: v_pk_min_f16 v0, v0, 1.0
99101
; GFX10-NEXT: s_setpc_b64 s[30:31]
100102
;
101103
; GFX12-LABEL: test_min_max_splat_padded_with_undef:
@@ -105,7 +107,10 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
105107
; GFX12-NEXT: s_wait_samplecnt 0x0
106108
; GFX12-NEXT: s_wait_bvhcnt 0x0
107109
; GFX12-NEXT: s_wait_kmcnt 0x0
108-
; GFX12-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
110+
; GFX12-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0]
111+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
112+
; GFX12-NEXT: v_pk_max_num_f16 v0, v0, 0
113+
; GFX12-NEXT: v_pk_min_num_f16 v0, v0, 1.0
109114
; GFX12-NEXT: s_setpc_b64 s[30:31]
110115
%fmul = fmul <2 x half> %a, <half 2.0, half 2.0>
111116
%maxnum = call <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 0.0, half poison>, <2 x half> %fmul)
@@ -305,9 +310,7 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
305310
; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
306311
; GFX10: ; %bb.0:
307312
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308-
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
309-
; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
310-
; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
313+
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
311314
; GFX10-NEXT: s_setpc_b64 s[30:31]
312315
;
313316
; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
@@ -330,8 +333,7 @@ define float @test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false(float %a) #
330333
; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false:
331334
; GFX10: ; %bb.0:
332335
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
333-
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
334-
; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0
336+
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
335337
; GFX10-NEXT: s_setpc_b64 s[30:31]
336338
;
337339
; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false:
@@ -355,9 +357,7 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
355357
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true:
356358
; GFX10: ; %bb.0:
357359
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
358-
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
359-
; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
360-
; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
360+
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
361361
; GFX10-NEXT: s_setpc_b64 s[30:31]
362362
;
363363
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true:
@@ -367,9 +367,7 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
367367
; GFX12-NEXT: s_wait_samplecnt 0x0
368368
; GFX12-NEXT: s_wait_bvhcnt 0x0
369369
; GFX12-NEXT: s_wait_kmcnt 0x0
370-
; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
371-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
372-
; GFX12-NEXT: v_minmax_num_f32 v0, v0, 1.0, 0
370+
; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
373371
; GFX12-NEXT: s_setpc_b64 s[30:31]
374372
%fmul = fmul float %a, 2.0
375373
%minnum = call float @llvm.minnum.f32(float %fmul, float 1.0)
@@ -381,9 +379,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
381379
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
382380
; GFX10: ; %bb.0:
383381
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384-
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
385-
; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
386-
; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
382+
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
387383
; GFX10-NEXT: s_setpc_b64 s[30:31]
388384
;
389385
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:
@@ -393,9 +389,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
393389
; GFX12-NEXT: s_wait_samplecnt 0x0
394390
; GFX12-NEXT: s_wait_bvhcnt 0x0
395391
; GFX12-NEXT: s_wait_kmcnt 0x0
396-
; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
397-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
398-
; GFX12-NEXT: v_minmax_num_f32 v0, v0, 1.0, 0
392+
; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
399393
; GFX12-NEXT: s_setpc_b64 s[30:31]
400394
%fmul = fmul float %a, 2.0
401395
%minnum = call float @llvm.minnum.f32(float %fmul, float 1.0)

llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -456,15 +456,13 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
456456
; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
457457
; GFX10: ; %bb.0:
458458
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
459-
; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
460-
; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
459+
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
461460
; GFX10-NEXT: s_setpc_b64 s[30:31]
462461
;
463462
; GFX8-LABEL: test_min_max_maybe_NaN_input_ieee_false:
464463
; GFX8: ; %bb.0:
465464
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
466-
; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
467-
; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
465+
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
468466
; GFX8-NEXT: s_setpc_b64 s[30:31]
469467
;
470468
; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
@@ -489,15 +487,13 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
489487
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
490488
; GFX10: ; %bb.0:
491489
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492-
; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
493-
; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
490+
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
494491
; GFX10-NEXT: s_setpc_b64 s[30:31]
495492
;
496493
; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_false:
497494
; GFX8: ; %bb.0:
498495
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
499-
; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
500-
; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
496+
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
501497
; GFX8-NEXT: s_setpc_b64 s[30:31]
502498
;
503499
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:
@@ -509,7 +505,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
509505
; GFX12-NEXT: s_wait_kmcnt 0x0
510506
; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
511507
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
512-
; GFX12-NEXT: v_minmax_num_f32 v0, v0, 4.0, 2.0
508+
; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
513509
; GFX12-NEXT: s_setpc_b64 s[30:31]
514510
%minnum = call float @llvm.minnum.f32(float %a, float 4.0)
515511
%fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
@@ -522,16 +518,14 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
522518
; GFX10: ; %bb.0:
523519
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
524520
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
525-
; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
526-
; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
521+
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
527522
; GFX10-NEXT: s_setpc_b64 s[30:31]
528523
;
529524
; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_true:
530525
; GFX8: ; %bb.0:
531526
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
532527
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
533-
; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
534-
; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
528+
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
535529
; GFX8-NEXT: s_setpc_b64 s[30:31]
536530
;
537531
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true:
@@ -543,7 +537,7 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
543537
; GFX12-NEXT: s_wait_kmcnt 0x0
544538
; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
545539
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
546-
; GFX12-NEXT: v_minmax_num_f32 v0, v0, 4.0, 2.0
540+
; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
547541
; GFX12-NEXT: s_setpc_b64 s[30:31]
548542
%minnum = call float @llvm.minnum.f32(float %a, float 4.0)
549543
%fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,12 @@ body: |
162162
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
163163
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
164164
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
165-
; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
166-
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
165+
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
166+
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
167+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
168+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
169+
; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
170+
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
167171
;
168172
; GFX12-LABEL: name: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true
169173
; GFX12: liveins: $vgpr0
@@ -172,8 +176,12 @@ body: |
172176
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
173177
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
174178
; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
175-
; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
176-
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
179+
; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
180+
; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
181+
; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
182+
; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
183+
; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
184+
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
177185
%0:vgpr(s32) = COPY $vgpr0
178186
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
179187
%8:vgpr(s32) = COPY %2(s32)
@@ -222,8 +230,12 @@ body: |
222230
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
223231
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
224232
; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
225-
; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
226-
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
233+
; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
234+
; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
235+
; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
236+
; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
237+
; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
238+
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
227239
%0:vgpr(s32) = COPY $vgpr0
228240
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
229241
%8:vgpr(s32) = COPY %2(s32)
@@ -307,8 +319,12 @@ body: |
307319
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
308320
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
309321
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
310-
; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
311-
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
322+
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
323+
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
324+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
325+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
326+
; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
327+
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
312328
;
313329
; GFX12-LABEL: name: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true
314330
; GFX12: liveins: $vgpr0
@@ -317,8 +333,12 @@ body: |
317333
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
318334
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
319335
; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
320-
; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
321-
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
336+
; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
337+
; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
338+
; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
339+
; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
340+
; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
341+
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
322342
%0:vgpr(s32) = COPY $vgpr0
323343
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
324344
%8:vgpr(s32) = COPY %2(s32)

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir

Lines changed: 8 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -441,13 +441,8 @@ body: |
441441
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
442442
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
443443
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
444-
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
445-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
446-
; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMUL]], [[COPY2]]
447-
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
448-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
449-
; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMAXNUM]], [[COPY3]]
450-
; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32)
444+
; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
445+
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
451446
%0:vgpr(s32) = COPY $vgpr0
452447
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
453448
%9:vgpr(s32) = COPY %2(s32)
@@ -481,13 +476,9 @@ body: |
481476
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
482477
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
483478
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
484-
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
485479
; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[FMUL]]
486-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
487-
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
488-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
489-
; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FCANONICALIZE]], [[COPY2]], [[COPY3]]
490-
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
480+
; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FCANONICALIZE]]
481+
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
491482
%0:vgpr(s32) = COPY $vgpr0
492483
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
493484
%10:vgpr(s32) = COPY %2(s32)
@@ -522,14 +513,9 @@ body: |
522513
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
523514
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
524515
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
525-
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
526516
; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[FMUL]]
527-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
528-
; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY2]]
529-
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
530-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
531-
; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY3]]
532-
; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
517+
; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FCANONICALIZE]]
518+
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
533519
%0:vgpr(s32) = COPY $vgpr0
534520
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
535521
%10:vgpr(s32) = COPY %2(s32)
@@ -564,13 +550,8 @@ body: |
564550
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
565551
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
566552
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
567-
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
568-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
569-
; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMUL]], [[COPY2]]
570-
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
571-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
572-
; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMINNUM]], [[COPY3]]
573-
; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32)
553+
; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
554+
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
574555
%0:vgpr(s32) = COPY $vgpr0
575556
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
576557
%9:vgpr(s32) = COPY %2(s32)

0 commit comments

Comments
 (0)