Skip to content

Commit bfcac0b

Browse files
committed
AMD family 17h (znver1) scheduler model update.
Summary: This patch enables the following: 1) Regex based Instruction itineraries for integer instructions. 2) The instructions are grouped as per the nature of the instructions (move, arithmetic, logic, Misc, Control Transfer). 3) FP instructions and their itineraries are added which includes values for SSE4A, BMI, BMI2 and SHA instructions. Patch by Ganesh Gopalasubramanian Reviewers: RKSimon, craig.topper Subscribers: vprasad, shivaram, ddibyend, andreadb, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D36617 llvm-svn: 312237
1 parent 23a86ea commit bfcac0b

20 files changed

+2204
-657
lines changed

llvm/lib/Target/X86/X86ScheduleZnver1.td

Lines changed: 1550 additions & 3 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/aes-schedule.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ define <2 x i64> @test_aesdec(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
5050
; ZNVER1: # BB#0:
5151
; ZNVER1-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
5252
; ZNVER1-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
53-
; ZNVER1-NEXT: retq # sched: [5:0.50]
53+
; ZNVER1-NEXT: retq # sched: [1:0.50]
5454
%1 = load <2 x i64>, <2 x i64> *%a2, align 16
5555
%2 = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1)
5656
%3 = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %2, <2 x i64> %1)
@@ -99,7 +99,7 @@ define <2 x i64> @test_aesdeclast(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
9999
; ZNVER1: # BB#0:
100100
; ZNVER1-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
101101
; ZNVER1-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
102-
; ZNVER1-NEXT: retq # sched: [5:0.50]
102+
; ZNVER1-NEXT: retq # sched: [1:0.50]
103103
%1 = load <2 x i64>, <2 x i64> *%a2, align 16
104104
%2 = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1)
105105
%3 = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %2, <2 x i64> %1)
@@ -148,7 +148,7 @@ define <2 x i64> @test_aesenc(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
148148
; ZNVER1: # BB#0:
149149
; ZNVER1-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
150150
; ZNVER1-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
151-
; ZNVER1-NEXT: retq # sched: [5:0.50]
151+
; ZNVER1-NEXT: retq # sched: [1:0.50]
152152
%1 = load <2 x i64>, <2 x i64> *%a2, align 16
153153
%2 = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1)
154154
%3 = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %2, <2 x i64> %1)
@@ -197,7 +197,7 @@ define <2 x i64> @test_aesenclast(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
197197
; ZNVER1: # BB#0:
198198
; ZNVER1-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
199199
; ZNVER1-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
200-
; ZNVER1-NEXT: retq # sched: [5:0.50]
200+
; ZNVER1-NEXT: retq # sched: [1:0.50]
201201
%1 = load <2 x i64>, <2 x i64> *%a2, align 16
202202
%2 = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1)
203203
%3 = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %2, <2 x i64> %1)
@@ -253,7 +253,7 @@ define <2 x i64> @test_aesimc(<2 x i64> %a0, <2 x i64> *%a1) {
253253
; ZNVER1-NEXT: vaesimc (%rdi), %xmm1 # sched: [11:0.50]
254254
; ZNVER1-NEXT: vaesimc %xmm0, %xmm0 # sched: [4:0.50]
255255
; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
256-
; ZNVER1-NEXT: retq # sched: [5:0.50]
256+
; ZNVER1-NEXT: retq # sched: [1:0.50]
257257
%1 = load <2 x i64>, <2 x i64> *%a1, align 16
258258
%2 = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0)
259259
%3 = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %1)
@@ -310,7 +310,7 @@ define <2 x i64> @test_aeskeygenassist(<2 x i64> %a0, <2 x i64> *%a1) {
310310
; ZNVER1-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [11:0.50]
311311
; ZNVER1-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [4:0.50]
312312
; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
313-
; ZNVER1-NEXT: retq # sched: [5:0.50]
313+
; ZNVER1-NEXT: retq # sched: [1:0.50]
314314
%1 = load <2 x i64>, <2 x i64> *%a1, align 16
315315
%2 = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7)
316316
%3 = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %1, i8 7)

llvm/test/CodeGen/X86/avx-schedule.ll

Lines changed: 136 additions & 136 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/avx2-schedule.ll

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) {
3131
; ZNVER1-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
3232
; ZNVER1-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.25]
3333
; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
34-
; ZNVER1-NEXT: retq # sched: [5:0.50]
34+
; ZNVER1-NEXT: retq # sched: [1:0.50]
3535
%1 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0)
3636
%2 = load <32 x i8>, <32 x i8> *%a1, align 32
3737
%3 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %2)
@@ -67,7 +67,7 @@ define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) {
6767
; ZNVER1-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
6868
; ZNVER1-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.25]
6969
; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
70-
; ZNVER1-NEXT: retq # sched: [5:0.50]
70+
; ZNVER1-NEXT: retq # sched: [1:0.50]
7171
%1 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0)
7272
%2 = load <8 x i32>, <8 x i32> *%a1, align 32
7373
%3 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %2)
@@ -103,7 +103,7 @@ define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) {
103103
; ZNVER1-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
104104
; ZNVER1-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.25]
105105
; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
106-
; ZNVER1-NEXT: retq # sched: [5:0.50]
106+
; ZNVER1-NEXT: retq # sched: [1:0.50]
107107
%1 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0)
108108
%2 = load <16 x i16>, <16 x i16> *%a1, align 32
109109
%3 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %2)
@@ -135,7 +135,7 @@ define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
135135
; ZNVER1: # BB#0:
136136
; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
137137
; ZNVER1-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
138-
; ZNVER1-NEXT: retq # sched: [5:0.50]
138+
; ZNVER1-NEXT: retq # sched: [1:0.50]
139139
%1 = add <32 x i8> %a0, %a1
140140
%2 = load <32 x i8>, <32 x i8> *%a2, align 32
141141
%3 = add <32 x i8> %1, %2
@@ -165,7 +165,7 @@ define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
165165
; ZNVER1: # BB#0:
166166
; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
167167
; ZNVER1-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
168-
; ZNVER1-NEXT: retq # sched: [5:0.50]
168+
; ZNVER1-NEXT: retq # sched: [1:0.50]
169169
%1 = add <8 x i32> %a0, %a1
170170
%2 = load <8 x i32>, <8 x i32> *%a2, align 32
171171
%3 = add <8 x i32> %1, %2
@@ -195,7 +195,7 @@ define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
195195
; ZNVER1: # BB#0:
196196
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
197197
; ZNVER1-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
198-
; ZNVER1-NEXT: retq # sched: [5:0.50]
198+
; ZNVER1-NEXT: retq # sched: [1:0.50]
199199
%1 = add <4 x i64> %a0, %a1
200200
%2 = load <4 x i64>, <4 x i64> *%a2, align 32
201201
%3 = add <4 x i64> %1, %2
@@ -225,7 +225,7 @@ define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
225225
; ZNVER1: # BB#0:
226226
; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
227227
; ZNVER1-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
228-
; ZNVER1-NEXT: retq # sched: [5:0.50]
228+
; ZNVER1-NEXT: retq # sched: [1:0.50]
229229
%1 = add <16 x i16> %a0, %a1
230230
%2 = load <16 x i16>, <16 x i16> *%a2, align 32
231231
%3 = add <16 x i16> %1, %2
@@ -259,7 +259,7 @@ define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
259259
; ZNVER1-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
260260
; ZNVER1-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
261261
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
262-
; ZNVER1-NEXT: retq # sched: [5:0.50]
262+
; ZNVER1-NEXT: retq # sched: [1:0.50]
263263
%1 = and <4 x i64> %a0, %a1
264264
%2 = load <4 x i64>, <4 x i64> *%a2, align 32
265265
%3 = and <4 x i64> %1, %2
@@ -294,7 +294,7 @@ define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
294294
; ZNVER1-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
295295
; ZNVER1-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
296296
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
297-
; ZNVER1-NEXT: retq # sched: [5:0.50]
297+
; ZNVER1-NEXT: retq # sched: [1:0.50]
298298
%1 = xor <4 x i64> %a0, <i64 -1, i64 -1, i64 -1, i64 -1>
299299
%2 = and <4 x i64> %a1, %1
300300
%3 = load <4 x i64>, <4 x i64> *%a2, align 32
@@ -325,9 +325,9 @@ define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
325325
;
326326
; ZNVER1-LABEL: test_pmulld:
327327
; ZNVER1: # BB#0:
328-
; ZNVER1-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
329-
; ZNVER1-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
330-
; ZNVER1-NEXT: retq # sched: [5:0.50]
328+
; ZNVER1-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
329+
; ZNVER1-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
330+
; ZNVER1-NEXT: retq # sched: [1:0.50]
331331
%1 = mul <8 x i32> %a0, %a1
332332
%2 = load <8 x i32>, <8 x i32> *%a2, align 32
333333
%3 = mul <8 x i32> %1, %2
@@ -357,7 +357,7 @@ define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
357357
; ZNVER1: # BB#0:
358358
; ZNVER1-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
359359
; ZNVER1-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
360-
; ZNVER1-NEXT: retq # sched: [5:0.50]
360+
; ZNVER1-NEXT: retq # sched: [1:0.50]
361361
%1 = mul <16 x i16> %a0, %a1
362362
%2 = load <16 x i16>, <16 x i16> *%a2, align 32
363363
%3 = mul <16 x i16> %1, %2
@@ -391,7 +391,7 @@ define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
391391
; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
392392
; ZNVER1-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
393393
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
394-
; ZNVER1-NEXT: retq # sched: [5:0.50]
394+
; ZNVER1-NEXT: retq # sched: [1:0.50]
395395
%1 = or <4 x i64> %a0, %a1
396396
%2 = load <4 x i64>, <4 x i64> *%a2, align 32
397397
%3 = or <4 x i64> %1, %2
@@ -422,7 +422,7 @@ define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
422422
; ZNVER1: # BB#0:
423423
; ZNVER1-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
424424
; ZNVER1-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
425-
; ZNVER1-NEXT: retq # sched: [5:0.50]
425+
; ZNVER1-NEXT: retq # sched: [1:0.50]
426426
%1 = sub <32 x i8> %a0, %a1
427427
%2 = load <32 x i8>, <32 x i8> *%a2, align 32
428428
%3 = sub <32 x i8> %1, %2
@@ -452,7 +452,7 @@ define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
452452
; ZNVER1: # BB#0:
453453
; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
454454
; ZNVER1-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
455-
; ZNVER1-NEXT: retq # sched: [5:0.50]
455+
; ZNVER1-NEXT: retq # sched: [1:0.50]
456456
%1 = sub <8 x i32> %a0, %a1
457457
%2 = load <8 x i32>, <8 x i32> *%a2, align 32
458458
%3 = sub <8 x i32> %1, %2
@@ -482,7 +482,7 @@ define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
482482
; ZNVER1: # BB#0:
483483
; ZNVER1-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
484484
; ZNVER1-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
485-
; ZNVER1-NEXT: retq # sched: [5:0.50]
485+
; ZNVER1-NEXT: retq # sched: [1:0.50]
486486
%1 = sub <4 x i64> %a0, %a1
487487
%2 = load <4 x i64>, <4 x i64> *%a2, align 32
488488
%3 = sub <4 x i64> %1, %2
@@ -512,7 +512,7 @@ define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
512512
; ZNVER1: # BB#0:
513513
; ZNVER1-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
514514
; ZNVER1-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
515-
; ZNVER1-NEXT: retq # sched: [5:0.50]
515+
; ZNVER1-NEXT: retq # sched: [1:0.50]
516516
%1 = sub <16 x i16> %a0, %a1
517517
%2 = load <16 x i16>, <16 x i16> *%a2, align 32
518518
%3 = sub <16 x i16> %1, %2
@@ -546,7 +546,7 @@ define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
546546
; ZNVER1-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
547547
; ZNVER1-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
548548
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
549-
; ZNVER1-NEXT: retq # sched: [5:0.50]
549+
; ZNVER1-NEXT: retq # sched: [1:0.50]
550550
%1 = xor <4 x i64> %a0, %a1
551551
%2 = load <4 x i64>, <4 x i64> *%a2, align 32
552552
%3 = xor <4 x i64> %1, %2

llvm/test/CodeGen/X86/bmi-schedule.ll

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ define i16 @test_andn_i16(i16 zeroext %a0, i16 zeroext %a1, i16 *%a2) {
4141
; ZNVER1-NEXT: andw (%rdx), %di # sched: [5:0.50]
4242
; ZNVER1-NEXT: addl %edi, %eax # sched: [1:0.25]
4343
; ZNVER1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
44-
; ZNVER1-NEXT: retq # sched: [5:0.50]
44+
; ZNVER1-NEXT: retq # sched: [1:0.50]
4545
%1 = load i16, i16 *%a2
4646
%2 = xor i16 %a0, -1
4747
%3 = and i16 %2, %a1
@@ -77,7 +77,7 @@ define i32 @test_andn_i32(i32 %a0, i32 %a1, i32 *%a2) {
7777
; ZNVER1-NEXT: andnl (%rdx), %edi, %eax # sched: [5:0.50]
7878
; ZNVER1-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.25]
7979
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
80-
; ZNVER1-NEXT: retq # sched: [5:0.50]
80+
; ZNVER1-NEXT: retq # sched: [1:0.50]
8181
%1 = load i32, i32 *%a2
8282
%2 = xor i32 %a0, -1
8383
%3 = and i32 %2, %a1
@@ -113,7 +113,7 @@ define i64 @test_andn_i64(i64 %a0, i64 %a1, i64 *%a2) {
113113
; ZNVER1-NEXT: andnq (%rdx), %rdi, %rax # sched: [5:0.50]
114114
; ZNVER1-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.25]
115115
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
116-
; ZNVER1-NEXT: retq # sched: [5:0.50]
116+
; ZNVER1-NEXT: retq # sched: [1:0.50]
117117
%1 = load i64, i64 *%a2
118118
%2 = xor i64 %a0, -1
119119
%3 = and i64 %2, %a1
@@ -146,10 +146,10 @@ define i32 @test_bextr_i32(i32 %a0, i32 %a1, i32 *%a2) {
146146
;
147147
; ZNVER1-LABEL: test_bextr_i32:
148148
; ZNVER1: # BB#0:
149-
; ZNVER1-NEXT: bextrl %edi, (%rdx), %ecx
150-
; ZNVER1-NEXT: bextrl %edi, %esi, %eax
149+
; ZNVER1-NEXT: bextrl %edi, (%rdx), %ecx # sched: [5:0.50]
150+
; ZNVER1-NEXT: bextrl %edi, %esi, %eax # sched: [1:0.25]
151151
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
152-
; ZNVER1-NEXT: retq # sched: [5:0.50]
152+
; ZNVER1-NEXT: retq # sched: [1:0.50]
153153
%1 = load i32, i32 *%a2
154154
%2 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %1, i32 %a0)
155155
%3 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a1, i32 %a0)
@@ -182,10 +182,10 @@ define i64 @test_bextr_i64(i64 %a0, i64 %a1, i64 *%a2) {
182182
;
183183
; ZNVER1-LABEL: test_bextr_i64:
184184
; ZNVER1: # BB#0:
185-
; ZNVER1-NEXT: bextrq %rdi, (%rdx), %rcx
186-
; ZNVER1-NEXT: bextrq %rdi, %rsi, %rax
185+
; ZNVER1-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [5:0.50]
186+
; ZNVER1-NEXT: bextrq %rdi, %rsi, %rax # sched: [1:0.25]
187187
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
188-
; ZNVER1-NEXT: retq # sched: [5:0.50]
188+
; ZNVER1-NEXT: retq # sched: [1:0.50]
189189
%1 = load i64, i64 *%a2
190190
%2 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %1, i64 %a0)
191191
%3 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a1, i64 %a0)
@@ -218,10 +218,10 @@ define i32 @test_blsi_i32(i32 %a0, i32 *%a1) {
218218
;
219219
; ZNVER1-LABEL: test_blsi_i32:
220220
; ZNVER1: # BB#0:
221-
; ZNVER1-NEXT: blsil (%rsi), %ecx
222-
; ZNVER1-NEXT: blsil %edi, %eax
221+
; ZNVER1-NEXT: blsil (%rsi), %ecx # sched: [6:0.50]
222+
; ZNVER1-NEXT: blsil %edi, %eax # sched: [2:0.25]
223223
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
224-
; ZNVER1-NEXT: retq # sched: [5:0.50]
224+
; ZNVER1-NEXT: retq # sched: [1:0.50]
225225
%1 = load i32, i32 *%a1
226226
%2 = sub i32 0, %1
227227
%3 = sub i32 0, %a0
@@ -255,10 +255,10 @@ define i64 @test_blsi_i64(i64 %a0, i64 *%a1) {
255255
;
256256
; ZNVER1-LABEL: test_blsi_i64:
257257
; ZNVER1: # BB#0:
258-
; ZNVER1-NEXT: blsiq (%rsi), %rcx
259-
; ZNVER1-NEXT: blsiq %rdi, %rax
258+
; ZNVER1-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50]
259+
; ZNVER1-NEXT: blsiq %rdi, %rax # sched: [2:0.25]
260260
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
261-
; ZNVER1-NEXT: retq # sched: [5:0.50]
261+
; ZNVER1-NEXT: retq # sched: [1:0.50]
262262
%1 = load i64, i64 *%a1
263263
%2 = sub i64 0, %1
264264
%3 = sub i64 0, %a0
@@ -292,10 +292,10 @@ define i32 @test_blsmsk_i32(i32 %a0, i32 *%a1) {
292292
;
293293
; ZNVER1-LABEL: test_blsmsk_i32:
294294
; ZNVER1: # BB#0:
295-
; ZNVER1-NEXT: blsmskl (%rsi), %ecx
296-
; ZNVER1-NEXT: blsmskl %edi, %eax
295+
; ZNVER1-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50]
296+
; ZNVER1-NEXT: blsmskl %edi, %eax # sched: [2:0.25]
297297
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
298-
; ZNVER1-NEXT: retq # sched: [5:0.50]
298+
; ZNVER1-NEXT: retq # sched: [1:0.50]
299299
%1 = load i32, i32 *%a1
300300
%2 = sub i32 %1, 1
301301
%3 = sub i32 %a0, 1
@@ -329,10 +329,10 @@ define i64 @test_blsmsk_i64(i64 %a0, i64 *%a1) {
329329
;
330330
; ZNVER1-LABEL: test_blsmsk_i64:
331331
; ZNVER1: # BB#0:
332-
; ZNVER1-NEXT: blsmskq (%rsi), %rcx
333-
; ZNVER1-NEXT: blsmskq %rdi, %rax
332+
; ZNVER1-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50]
333+
; ZNVER1-NEXT: blsmskq %rdi, %rax # sched: [2:0.25]
334334
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
335-
; ZNVER1-NEXT: retq # sched: [5:0.50]
335+
; ZNVER1-NEXT: retq # sched: [1:0.50]
336336
%1 = load i64, i64 *%a1
337337
%2 = sub i64 %1, 1
338338
%3 = sub i64 %a0, 1
@@ -366,10 +366,10 @@ define i32 @test_blsr_i32(i32 %a0, i32 *%a1) {
366366
;
367367
; ZNVER1-LABEL: test_blsr_i32:
368368
; ZNVER1: # BB#0:
369-
; ZNVER1-NEXT: blsrl (%rsi), %ecx
370-
; ZNVER1-NEXT: blsrl %edi, %eax
369+
; ZNVER1-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50]
370+
; ZNVER1-NEXT: blsrl %edi, %eax # sched: [2:0.25]
371371
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
372-
; ZNVER1-NEXT: retq # sched: [5:0.50]
372+
; ZNVER1-NEXT: retq # sched: [1:0.50]
373373
%1 = load i32, i32 *%a1
374374
%2 = sub i32 %1, 1
375375
%3 = sub i32 %a0, 1
@@ -403,10 +403,10 @@ define i64 @test_blsr_i64(i64 %a0, i64 *%a1) {
403403
;
404404
; ZNVER1-LABEL: test_blsr_i64:
405405
; ZNVER1: # BB#0:
406-
; ZNVER1-NEXT: blsrq (%rsi), %rcx
407-
; ZNVER1-NEXT: blsrq %rdi, %rax
406+
; ZNVER1-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50]
407+
; ZNVER1-NEXT: blsrq %rdi, %rax # sched: [2:0.25]
408408
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
409-
; ZNVER1-NEXT: retq # sched: [5:0.50]
409+
; ZNVER1-NEXT: retq # sched: [1:0.50]
410410
%1 = load i64, i64 *%a1
411411
%2 = sub i64 %1, 1
412412
%3 = sub i64 %a0, 1
@@ -443,11 +443,11 @@ define i16 @test_cttz_i16(i16 zeroext %a0, i16 *%a1) {
443443
;
444444
; ZNVER1-LABEL: test_cttz_i16:
445445
; ZNVER1: # BB#0:
446-
; ZNVER1-NEXT: tzcntw (%rsi), %cx
447-
; ZNVER1-NEXT: tzcntw %di, %ax
446+
; ZNVER1-NEXT: tzcntw (%rsi), %cx # sched: [6:0.50]
447+
; ZNVER1-NEXT: tzcntw %di, %ax # sched: [2:0.25]
448448
; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
449449
; ZNVER1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
450-
; ZNVER1-NEXT: retq # sched: [5:0.50]
450+
; ZNVER1-NEXT: retq # sched: [1:0.50]
451451
%1 = load i16, i16 *%a1
452452
%2 = tail call i16 @llvm.cttz.i16( i16 %1, i1 false )
453453
%3 = tail call i16 @llvm.cttz.i16( i16 %a0, i1 false )
@@ -480,10 +480,10 @@ define i32 @test_cttz_i32(i32 %a0, i32 *%a1) {
480480
;
481481
; ZNVER1-LABEL: test_cttz_i32:
482482
; ZNVER1: # BB#0:
483-
; ZNVER1-NEXT: tzcntl (%rsi), %ecx
484-
; ZNVER1-NEXT: tzcntl %edi, %eax
483+
; ZNVER1-NEXT: tzcntl (%rsi), %ecx # sched: [6:0.50]
484+
; ZNVER1-NEXT: tzcntl %edi, %eax # sched: [2:0.25]
485485
; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
486-
; ZNVER1-NEXT: retq # sched: [5:0.50]
486+
; ZNVER1-NEXT: retq # sched: [1:0.50]
487487
%1 = load i32, i32 *%a1
488488
%2 = tail call i32 @llvm.cttz.i32( i32 %1, i1 false )
489489
%3 = tail call i32 @llvm.cttz.i32( i32 %a0, i1 false )
@@ -516,10 +516,10 @@ define i64 @test_cttz_i64(i64 %a0, i64 *%a1) {
516516
;
517517
; ZNVER1-LABEL: test_cttz_i64:
518518
; ZNVER1: # BB#0:
519-
; ZNVER1-NEXT: tzcntq (%rsi), %rcx
520-
; ZNVER1-NEXT: tzcntq %rdi, %rax
519+
; ZNVER1-NEXT: tzcntq (%rsi), %rcx # sched: [6:0.50]
520+
; ZNVER1-NEXT: tzcntq %rdi, %rax # sched: [2:0.25]
521521
; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25]
522-
; ZNVER1-NEXT: retq # sched: [5:0.50]
522+
; ZNVER1-NEXT: retq # sched: [1:0.50]
523523
%1 = load i64, i64 *%a1
524524
%2 = tail call i64 @llvm.cttz.i64( i64 %1, i1 false )
525525
%3 = tail call i64 @llvm.cttz.i64( i64 %a0, i1 false )

0 commit comments

Comments
 (0)