@@ -432,3 +432,46 @@ define void @xor_2x2(ptr %lhs, ptr %rhs, ptr %out) {
432
432
store <4 x i32 > %optt , ptr %out
433
433
ret void
434
434
}
435
+
436
+ define void @fabs_2x2f64 (ptr %in , ptr %out ) {
437
+ ; CHECK-LABEL: @fabs_2x2f64(
438
+ ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, ptr [[IN:%.*]], align 32
439
+ ; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN]], i64 2
440
+ ; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 16
441
+ ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[COL_LOAD]])
442
+ ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[COL_LOAD1]])
443
+ ; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[OUT:%.*]], align 32
444
+ ; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr [[OUT]], i64 2
445
+ ; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[VEC_GEP2]], align 16
446
+ ; CHECK-NEXT: ret void
447
+ ;
448
+ %load = load <4 x double >, ptr %in
449
+ %fabs = call <4 x double > @llvm.fabs.v4f64 (<4 x double > %load )
450
+ %fabst = call <4 x double > @llvm.matrix.transpose (<4 x double > %fabs , i32 2 , i32 2 )
451
+ %fabstt = call <4 x double > @llvm.matrix.transpose (<4 x double > %fabst , i32 2 , i32 2 )
452
+ store <4 x double > %fabstt , ptr %out
453
+ ret void
454
+ }
455
+
456
+ define void @fabs_2x2i32 (ptr %in , ptr %out ) {
457
+ ; CHECK-LABEL: @fabs_2x2i32(
458
+ ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x i32>, ptr [[IN:%.*]], align 16
459
+ ; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i32, ptr [[IN]], i64 2
460
+ ; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x i32>, ptr [[VEC_GEP]], align 8
461
+ ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[COL_LOAD]], i1 false)
462
+ ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[COL_LOAD1]], i1 false)
463
+ ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP1]], i1 true)
464
+ ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP2]], i1 true)
465
+ ; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[OUT:%.*]], align 16
466
+ ; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr i32, ptr [[OUT]], i64 2
467
+ ; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr [[VEC_GEP2]], align 8
468
+ ; CHECK-NEXT: ret void
469
+ ;
470
+ %load = load <4 x i32 >, ptr %in
471
+ %abs = call <4 x i32 > @llvm.abs.v4i32 (<4 x i32 > %load , i1 false )
472
+ %abst = call <4 x i32 > @llvm.matrix.transpose (<4 x i32 > %abs , i32 2 , i32 2 )
473
+ %abstt = call <4 x i32 > @llvm.matrix.transpose (<4 x i32 > %abst , i32 2 , i32 2 )
474
+ %absabstt = call <4 x i32 > @llvm.abs.v4i32 (<4 x i32 > %abstt , i1 true )
475
+ store <4 x i32 > %absabstt , ptr %out
476
+ ret void
477
+ }
0 commit comments