@@ -31,7 +31,7 @@ define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) {
31
31
; ZNVER1-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
32
32
; ZNVER1-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.25]
33
33
; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
34
- ; ZNVER1-NEXT: retq # sched: [5 :0.50]
34
+ ; ZNVER1-NEXT: retq # sched: [1 :0.50]
35
35
%1 = call <32 x i8 > @llvm.x86.avx2.pabs.b (<32 x i8 > %a0 )
36
36
%2 = load <32 x i8 >, <32 x i8 > *%a1 , align 32
37
37
%3 = call <32 x i8 > @llvm.x86.avx2.pabs.b (<32 x i8 > %2 )
@@ -67,7 +67,7 @@ define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) {
67
67
; ZNVER1-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
68
68
; ZNVER1-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.25]
69
69
; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
70
- ; ZNVER1-NEXT: retq # sched: [5 :0.50]
70
+ ; ZNVER1-NEXT: retq # sched: [1 :0.50]
71
71
%1 = call <8 x i32 > @llvm.x86.avx2.pabs.d (<8 x i32 > %a0 )
72
72
%2 = load <8 x i32 >, <8 x i32 > *%a1 , align 32
73
73
%3 = call <8 x i32 > @llvm.x86.avx2.pabs.d (<8 x i32 > %2 )
@@ -103,7 +103,7 @@ define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) {
103
103
; ZNVER1-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
104
104
; ZNVER1-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.25]
105
105
; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
106
- ; ZNVER1-NEXT: retq # sched: [5 :0.50]
106
+ ; ZNVER1-NEXT: retq # sched: [1 :0.50]
107
107
%1 = call <16 x i16 > @llvm.x86.avx2.pabs.w (<16 x i16 > %a0 )
108
108
%2 = load <16 x i16 >, <16 x i16 > *%a1 , align 32
109
109
%3 = call <16 x i16 > @llvm.x86.avx2.pabs.w (<16 x i16 > %2 )
@@ -135,7 +135,7 @@ define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
135
135
; ZNVER1: # BB#0:
136
136
; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
137
137
; ZNVER1-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
138
- ; ZNVER1-NEXT: retq # sched: [5 :0.50]
138
+ ; ZNVER1-NEXT: retq # sched: [1 :0.50]
139
139
%1 = add <32 x i8 > %a0 , %a1
140
140
%2 = load <32 x i8 >, <32 x i8 > *%a2 , align 32
141
141
%3 = add <32 x i8 > %1 , %2
@@ -165,7 +165,7 @@ define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
165
165
; ZNVER1: # BB#0:
166
166
; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
167
167
; ZNVER1-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
168
- ; ZNVER1-NEXT: retq # sched: [5 :0.50]
168
+ ; ZNVER1-NEXT: retq # sched: [1 :0.50]
169
169
%1 = add <8 x i32 > %a0 , %a1
170
170
%2 = load <8 x i32 >, <8 x i32 > *%a2 , align 32
171
171
%3 = add <8 x i32 > %1 , %2
@@ -195,7 +195,7 @@ define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
195
195
; ZNVER1: # BB#0:
196
196
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
197
197
; ZNVER1-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
198
- ; ZNVER1-NEXT: retq # sched: [5 :0.50]
198
+ ; ZNVER1-NEXT: retq # sched: [1 :0.50]
199
199
%1 = add <4 x i64 > %a0 , %a1
200
200
%2 = load <4 x i64 >, <4 x i64 > *%a2 , align 32
201
201
%3 = add <4 x i64 > %1 , %2
@@ -225,7 +225,7 @@ define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
225
225
; ZNVER1: # BB#0:
226
226
; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
227
227
; ZNVER1-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
228
- ; ZNVER1-NEXT: retq # sched: [5 :0.50]
228
+ ; ZNVER1-NEXT: retq # sched: [1 :0.50]
229
229
%1 = add <16 x i16 > %a0 , %a1
230
230
%2 = load <16 x i16 >, <16 x i16 > *%a2 , align 32
231
231
%3 = add <16 x i16 > %1 , %2
@@ -259,7 +259,7 @@ define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
259
259
; ZNVER1-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
260
260
; ZNVER1-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
261
261
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
262
- ; ZNVER1-NEXT: retq # sched: [5 :0.50]
262
+ ; ZNVER1-NEXT: retq # sched: [1 :0.50]
263
263
%1 = and <4 x i64 > %a0 , %a1
264
264
%2 = load <4 x i64 >, <4 x i64 > *%a2 , align 32
265
265
%3 = and <4 x i64 > %1 , %2
@@ -294,7 +294,7 @@ define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
294
294
; ZNVER1-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
295
295
; ZNVER1-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
296
296
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
297
- ; ZNVER1-NEXT: retq # sched: [5 :0.50]
297
+ ; ZNVER1-NEXT: retq # sched: [1 :0.50]
298
298
%1 = xor <4 x i64 > %a0 , <i64 -1 , i64 -1 , i64 -1 , i64 -1 >
299
299
%2 = and <4 x i64 > %a1 , %1
300
300
%3 = load <4 x i64 >, <4 x i64 > *%a2 , align 32
@@ -325,9 +325,9 @@ define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
325
325
;
326
326
; ZNVER1-LABEL: test_pmulld:
327
327
; ZNVER1: # BB#0:
328
- ; ZNVER1-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [4:1 .00]
329
- ; ZNVER1-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [11:1 .00]
330
- ; ZNVER1-NEXT: retq # sched: [5 :0.50]
328
+ ; ZNVER1-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:2 .00]
329
+ ; ZNVER1-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [12:2 .00]
330
+ ; ZNVER1-NEXT: retq # sched: [1 :0.50]
331
331
%1 = mul <8 x i32 > %a0 , %a1
332
332
%2 = load <8 x i32 >, <8 x i32 > *%a2 , align 32
333
333
%3 = mul <8 x i32 > %1 , %2
@@ -357,7 +357,7 @@ define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
357
357
; ZNVER1: # BB#0:
358
358
; ZNVER1-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
359
359
; ZNVER1-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
360
- ; ZNVER1-NEXT: retq # sched: [5 :0.50]
360
+ ; ZNVER1-NEXT: retq # sched: [1 :0.50]
361
361
%1 = mul <16 x i16 > %a0 , %a1
362
362
%2 = load <16 x i16 >, <16 x i16 > *%a2 , align 32
363
363
%3 = mul <16 x i16 > %1 , %2
@@ -391,7 +391,7 @@ define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
391
391
; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
392
392
; ZNVER1-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
393
393
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
394
- ; ZNVER1-NEXT: retq # sched: [5 :0.50]
394
+ ; ZNVER1-NEXT: retq # sched: [1 :0.50]
395
395
%1 = or <4 x i64 > %a0 , %a1
396
396
%2 = load <4 x i64 >, <4 x i64 > *%a2 , align 32
397
397
%3 = or <4 x i64 > %1 , %2
@@ -422,7 +422,7 @@ define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
422
422
; ZNVER1: # BB#0:
423
423
; ZNVER1-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
424
424
; ZNVER1-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
425
- ; ZNVER1-NEXT: retq # sched: [5 :0.50]
425
+ ; ZNVER1-NEXT: retq # sched: [1 :0.50]
426
426
%1 = sub <32 x i8 > %a0 , %a1
427
427
%2 = load <32 x i8 >, <32 x i8 > *%a2 , align 32
428
428
%3 = sub <32 x i8 > %1 , %2
@@ -452,7 +452,7 @@ define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
452
452
; ZNVER1: # BB#0:
453
453
; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
454
454
; ZNVER1-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
455
- ; ZNVER1-NEXT: retq # sched: [5 :0.50]
455
+ ; ZNVER1-NEXT: retq # sched: [1 :0.50]
456
456
%1 = sub <8 x i32 > %a0 , %a1
457
457
%2 = load <8 x i32 >, <8 x i32 > *%a2 , align 32
458
458
%3 = sub <8 x i32 > %1 , %2
@@ -482,7 +482,7 @@ define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
482
482
; ZNVER1: # BB#0:
483
483
; ZNVER1-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
484
484
; ZNVER1-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
485
- ; ZNVER1-NEXT: retq # sched: [5 :0.50]
485
+ ; ZNVER1-NEXT: retq # sched: [1 :0.50]
486
486
%1 = sub <4 x i64 > %a0 , %a1
487
487
%2 = load <4 x i64 >, <4 x i64 > *%a2 , align 32
488
488
%3 = sub <4 x i64 > %1 , %2
@@ -512,7 +512,7 @@ define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
512
512
; ZNVER1: # BB#0:
513
513
; ZNVER1-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
514
514
; ZNVER1-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
515
- ; ZNVER1-NEXT: retq # sched: [5 :0.50]
515
+ ; ZNVER1-NEXT: retq # sched: [1 :0.50]
516
516
%1 = sub <16 x i16 > %a0 , %a1
517
517
%2 = load <16 x i16 >, <16 x i16 > *%a2 , align 32
518
518
%3 = sub <16 x i16 > %1 , %2
@@ -546,7 +546,7 @@ define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
546
546
; ZNVER1-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
547
547
; ZNVER1-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
548
548
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
549
- ; ZNVER1-NEXT: retq # sched: [5 :0.50]
549
+ ; ZNVER1-NEXT: retq # sched: [1 :0.50]
550
550
%1 = xor <4 x i64 > %a0 , %a1
551
551
%2 = load <4 x i64 >, <4 x i64 > *%a2 , align 32
552
552
%3 = xor <4 x i64 > %1 , %2
0 commit comments