@@ -373,3 +373,120 @@ func.func @scaled_ext_scalar_f4e2m1_bf16(%v: vector<2xf4E2M1FN>, %scale: f32) ->
373
373
%ret = amdgpu.scaled_ext_packed %v [0 ], %scale : vector <2 xf4 E2 M1 FN> to vector <2 xbf16 >
374
374
func.return %ret : vector <2 xbf16 >
375
375
}
376
+
377
+ // CHECK-LABEL: func.func @scaled_ext_one_f8e4m3_f32
378
+ // CHECK: [[V:%.+]] = builtin.unrealized_conversion_cast %arg0 : vector<1xf8E4M3FN> to vector<1xi8>
379
+ // CHECK-DAG: [[ZERO:%.+]] = llvm.mlir.zero : vector<4xi8>
380
+ // CHECK-DAG: [[C0:%.+]] = llvm.mlir.constant(0 : i32) : i32
381
+ // CHECK: [[ELEM_0:%.+]] = llvm.extractelement [[V]]{{\[}}[[C0]] : i32] : vector<1xi8>
382
+ // CHECK: [[VEC_0:%.+]] = llvm.insertelement [[ELEM_0]], [[ZERO]]{{\[}}[[C0]] : i32] : vector<4xi8>
383
+ // CHECK: [[BITCAST:%.+]] = llvm.bitcast [[VEC_0]] : vector<4xi8> to i32
384
+ // CHECK: rocdl.cvt.scalef32.pk.f32.fp8 [[BITCAST]][false], %arg1 : vector<2xf32>
385
+ func.func @scaled_ext_one_f8e4m3_f32 (%v: vector <1 xf8 E4 M3 FN>, %scale: f32 ) -> vector <2 xf32 > {
386
+ %ret = amdgpu.scaled_ext_packed %v [0 ], %scale : vector <1 xf8 E4 M3 FN> to vector <2 xf32 >
387
+ func.return %ret : vector <2 xf32 >
388
+ }
389
+
390
+ // CHECK-LABEL: func.func @scaled_ext_one_f8e4m3_f16
391
+ // CHECK: [[V:%.+]] = builtin.unrealized_conversion_cast %arg0 : vector<1xf8E4M3FN> to vector<1xi8>
392
+ // CHECK-DAG: [[ZERO:%.+]] = llvm.mlir.zero : vector<4xi8>
393
+ // CHECK-DAG: [[C0:%.+]] = llvm.mlir.constant(0 : i32) : i32
394
+ // CHECK: [[ELEM_0:%.+]] = llvm.extractelement [[V]]{{\[}}[[C0]] : i32] : vector<1xi8>
395
+ // CHECK: [[VEC_0:%.+]] = llvm.insertelement [[ELEM_0]], [[ZERO]]{{\[}}[[C0]] : i32] : vector<4xi8>
396
+ // CHECK: [[BITCAST:%.+]] = llvm.bitcast [[VEC_0]] : vector<4xi8> to i32
397
+ // CHECK: rocdl.cvt.scalef32.pk.f16.fp8 [[BITCAST]][false], %arg1 : vector<2xf16>
398
+ func.func @scaled_ext_one_f8e4m3_f16 (%v: vector <1 xf8 E4 M3 FN>, %scale: f32 ) -> vector <2 xf16 > {
399
+ %ret = amdgpu.scaled_ext_packed %v [0 ], %scale : vector <1 xf8 E4 M3 FN> to vector <2 xf16 >
400
+ func.return %ret : vector <2 xf16 >
401
+ }
402
+
403
+ // CHECK-LABEL: func.func @scaled_ext_one_f8e4m3_bf16
404
+ // CHECK: [[V:%.+]] = builtin.unrealized_conversion_cast %arg0 : vector<1xf8E4M3FN> to vector<1xi8>
405
+ // CHECK-DAG: [[ZERO:%.+]] = llvm.mlir.zero : vector<4xi8>
406
+ // CHECK-DAG: [[C0:%.+]] = llvm.mlir.constant(0 : i32) : i32
407
+ // CHECK: [[ELEM_0:%.+]] = llvm.extractelement [[V]]{{\[}}[[C0]] : i32] : vector<1xi8>
408
+ // CHECK: [[VEC_0:%.+]] = llvm.insertelement [[ELEM_0]], [[ZERO]]{{\[}}[[C0]] : i32] : vector<4xi8>
409
+ // CHECK: [[BITCAST:%.+]] = llvm.bitcast [[VEC_0]] : vector<4xi8> to i32
410
+ // CHECK: rocdl.cvt.scalef32.pk.bf16.fp8 [[BITCAST]][false], %arg1 : vector<2xbf16>
411
+ func.func @scaled_ext_one_f8e4m3_bf16 (%v: vector <1 xf8 E4 M3 FN>, %scale: f32 ) -> vector <2 xbf16 > {
412
+ %ret = amdgpu.scaled_ext_packed %v [0 ], %scale : vector <1 xf8 E4 M3 FN> to vector <2 xbf16 >
413
+ func.return %ret : vector <2 xbf16 >
414
+ }
415
+
416
+ // CHECK-LABEL: func.func @scaled_ext_one_f8e5m2_f32
417
+ // CHECK: [[V:%.+]] = builtin.unrealized_conversion_cast %arg0 : vector<1xf8E5M2> to vector<1xi8>
418
+ // CHECK-DAG: [[ZERO:%.+]] = llvm.mlir.zero : vector<4xi8>
419
+ // CHECK-DAG: [[C0:%.+]] = llvm.mlir.constant(0 : i32) : i32
420
+ // CHECK: [[ELEM_0:%.+]] = llvm.extractelement [[V]]{{\[}}[[C0]] : i32] : vector<1xi8>
421
+ // CHECK: [[VEC_0:%.+]] = llvm.insertelement [[ELEM_0]], [[ZERO]]{{\[}}[[C0]] : i32] : vector<4xi8>
422
+ // CHECK: [[BITCAST:%.+]] = llvm.bitcast [[VEC_0]] : vector<4xi8> to i32
423
+ // CHECK: rocdl.cvt.scalef32.pk.f32.bf8 [[BITCAST]][false], %arg1 : vector<2xf32>
424
+ func.func @scaled_ext_one_f8e5m2_f32 (%v: vector <1 xf8 E5 M2 >, %scale: f32 ) -> vector <2 xf32 > {
425
+ %ret = amdgpu.scaled_ext_packed %v [0 ], %scale : vector <1 xf8 E5 M2 > to vector <2 xf32 >
426
+ func.return %ret : vector <2 xf32 >
427
+ }
428
+
429
+ // CHECK-LABEL: func.func @scaled_ext_one_f8e5m2_f16
430
+ // CHECK: [[V:%.+]] = builtin.unrealized_conversion_cast %arg0 : vector<1xf8E5M2> to vector<1xi8>
431
+ // CHECK-DAG: [[ZERO:%.+]] = llvm.mlir.zero : vector<4xi8>
432
+ // CHECK-DAG: [[C0:%.+]] = llvm.mlir.constant(0 : i32) : i32
433
+ // CHECK: [[ELEM_0:%.+]] = llvm.extractelement [[V]]{{\[}}[[C0]] : i32] : vector<1xi8>
434
+ // CHECK: [[VEC_0:%.+]] = llvm.insertelement [[ELEM_0]], [[ZERO]]{{\[}}[[C0]] : i32] : vector<4xi8>
435
+ // CHECK: [[BITCAST:%.+]] = llvm.bitcast [[VEC_0]] : vector<4xi8> to i32
436
+ // CHECK: rocdl.cvt.scalef32.pk.f16.bf8 [[BITCAST]][false], %arg1 : vector<2xf16>
437
+ func.func @scaled_ext_one_f8e5m2_f16 (%v: vector <1 xf8 E5 M2 >, %scale: f32 ) -> vector <2 xf16 > {
438
+ %ret = amdgpu.scaled_ext_packed %v [0 ], %scale : vector <1 xf8 E5 M2 > to vector <2 xf16 >
439
+ func.return %ret : vector <2 xf16 >
440
+ }
441
+
442
+ // CHECK-LABEL: func.func @scaled_ext_one_f8e5m2_bf16
443
+ // CHECK: [[V:%.+]] = builtin.unrealized_conversion_cast %arg0 : vector<1xf8E5M2> to vector<1xi8>
444
+ // CHECK-DAG: [[ZERO:%.+]] = llvm.mlir.zero : vector<4xi8>
445
+ // CHECK-DAG: [[C0:%.+]] = llvm.mlir.constant(0 : i32) : i32
446
+ // CHECK: [[ELEM_0:%.+]] = llvm.extractelement [[V]]{{\[}}[[C0]] : i32] : vector<1xi8>
447
+ // CHECK: [[VEC_0:%.+]] = llvm.insertelement [[ELEM_0]], [[ZERO]]{{\[}}[[C0]] : i32] : vector<4xi8>
448
+ // CHECK: [[BITCAST:%.+]] = llvm.bitcast [[VEC_0]] : vector<4xi8> to i32
449
+ // CHECK: rocdl.cvt.scalef32.pk.bf16.bf8 [[BITCAST]][false], %arg1 : vector<2xbf16>
450
+ func.func @scaled_ext_one_f8e5m2_bf16 (%v: vector <1 xf8 E5 M2 >, %scale: f32 ) -> vector <2 xbf16 > {
451
+ %ret = amdgpu.scaled_ext_packed %v [0 ], %scale : vector <1 xf8 E5 M2 > to vector <2 xbf16 >
452
+ func.return %ret : vector <2 xbf16 >
453
+ }
454
+
455
+ // CHECK-LABEL: func.func @scaled_ext_one_f4e2m1_f32
456
+ // CHECK: [[V:%.+]] = builtin.unrealized_conversion_cast %arg0 : vector<1xf4E2M1FN> to vector<1xi4>
457
+ // CHECK-DAG: [[ZERO:%.+]] = llvm.mlir.zero : vector<8xi4>
458
+ // CHECK-DAG: [[C0:%.+]] = llvm.mlir.constant(0 : i32) : i32
459
+ // CHECK: [[ELEM_0:%.+]] = llvm.extractelement [[V]]{{\[}}[[C0]] : i32] : vector<1xi4>
460
+ // CHECK: [[VEC_0:%.+]] = llvm.insertelement [[ELEM_0]], [[ZERO]]{{\[}}[[C0]] : i32] : vector<8xi4>
461
+ // CHECK: [[BITCAST:%.+]] = llvm.bitcast [[VEC_0]] : vector<8xi4> to i32
462
+ // CHECK: rocdl.cvt.scalef32.pk.f32.fp4 [[BITCAST]][0], %arg1 : vector<2xf32>
463
+ func.func @scaled_ext_one_f4e2m1_f32 (%v: vector <1 xf4 E2 M1 FN>, %scale: f32 ) -> vector <2 xf32 > {
464
+ %ret = amdgpu.scaled_ext_packed %v [0 ], %scale : vector <1 xf4 E2 M1 FN> to vector <2 xf32 >
465
+ func.return %ret : vector <2 xf32 >
466
+ }
467
+
468
+ // CHECK-LABEL: func.func @scaled_ext_one_f4e2m1_f16
469
+ // CHECK: [[V:%.+]] = builtin.unrealized_conversion_cast %arg0 : vector<1xf4E2M1FN> to vector<1xi4>
470
+ // CHECK-DAG: [[ZERO:%.+]] = llvm.mlir.zero : vector<8xi4>
471
+ // CHECK-DAG: [[C0:%.+]] = llvm.mlir.constant(0 : i32) : i32
472
+ // CHECK: [[ELEM_0:%.+]] = llvm.extractelement [[V]]{{\[}}[[C0]] : i32] : vector<1xi4>
473
+ // CHECK: [[VEC_0:%.+]] = llvm.insertelement [[ELEM_0]], [[ZERO]]{{\[}}[[C0]] : i32] : vector<8xi4>
474
+ // CHECK: [[BITCAST:%.+]] = llvm.bitcast [[VEC_0]] : vector<8xi4> to i32
475
+ // CHECK: rocdl.cvt.scalef32.pk.f16.fp4 [[BITCAST]][0], %arg1 : vector<2xf16>
476
+ func.func @scaled_ext_one_f4e2m1_f16 (%v: vector <1 xf4 E2 M1 FN>, %scale: f32 ) -> vector <2 xf16 > {
477
+ %ret = amdgpu.scaled_ext_packed %v [0 ], %scale : vector <1 xf4 E2 M1 FN> to vector <2 xf16 >
478
+ func.return %ret : vector <2 xf16 >
479
+ }
480
+
481
+ // CHECK-LABEL: func.func @scaled_ext_one_f4e2m1_bf16
482
+ // CHECK: [[V:%.+]] = builtin.unrealized_conversion_cast %arg0 : vector<1xf4E2M1FN> to vector<1xi4>
483
+ // CHECK-DAG: [[ZERO:%.+]] = llvm.mlir.zero : vector<8xi4>
484
+ // CHECK-DAG: [[C0:%.+]] = llvm.mlir.constant(0 : i32) : i32
485
+ // CHECK: [[ELEM_0:%.+]] = llvm.extractelement [[V]]{{\[}}[[C0]] : i32] : vector<1xi4>
486
+ // CHECK: [[VEC_0:%.+]] = llvm.insertelement [[ELEM_0]], [[ZERO]]{{\[}}[[C0]] : i32] : vector<8xi4>
487
+ // CHECK: [[BITCAST:%.+]] = llvm.bitcast [[VEC_0]] : vector<8xi4> to i32
488
+ // CHECK: rocdl.cvt.scalef32.pk.bf16.fp4 [[BITCAST]][0], %arg1 : vector<2xbf16>
489
+ func.func @scaled_ext_one_f4e2m1_bf16 (%v: vector <1 xf4 E2 M1 FN>, %scale: f32 ) -> vector <2 xbf16 > {
490
+ %ret = amdgpu.scaled_ext_packed %v [0 ], %scale : vector <1 xf4 E2 M1 FN> to vector <2 xbf16 >
491
+ func.return %ret : vector <2 xbf16 >
492
+ }
0 commit comments