|
| 1 | +// Test lower-nontemporal pass |
| 2 | +// RUN: fir-opt --fir-to-llvm-ir %s | FileCheck %s --check-prefixes=CHECK-LABEL,CHECK |
| 3 | + |
| 4 | +// CHECK-LABEL: llvm.func @_QPtest() |
| 5 | +// CHECK: %[[CONST_VAL:.*]] = llvm.mlir.constant(1 : i64) : i64 |
| 6 | +// CHECK: %[[VAL1:.*]] = llvm.alloca %[[CONST_VAL]] x i32 {bindc_name = "n"} : (i64) -> !llvm.ptr |
| 7 | +// CHECK: %[[CONST_VAL1:.*]] = llvm.mlir.constant(1 : i64) : i64 |
| 8 | +// CHECK: %[[VAL2:.*]] = llvm.alloca %[[CONST_VAL1]] x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr |
| 9 | +// CHECK: %[[CONST_VAL2:.*]] = llvm.mlir.constant(1 : i64) : i64 |
| 10 | +// CHECK: %[[VAL3:.*]] = llvm.alloca %[[CONST_VAL2]] x i32 {bindc_name = "c"} : (i64) -> !llvm.ptr |
| 11 | +// CHECK: %[[CONST_VAL3:.*]] = llvm.mlir.constant(1 : i64) : i64 |
| 12 | +// CHECK: %[[VAL4:.*]] = llvm.alloca %[[CONST_VAL3]] x i32 {bindc_name = "b"} : (i64) -> !llvm.ptr |
| 13 | +// CHECK: %[[CONST_VAL4:.*]] = llvm.mlir.constant(1 : i64) : i64 |
| 14 | +// CHECK: %[[VAL5:.*]] = llvm.alloca %[[CONST_VAL4]] x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr |
| 15 | +// CHECK: %[[CONST_VAL5:.*]] = llvm.mlir.constant(1 : i32) : i32 |
| 16 | +// CHECK: %[[VAL6:.*]] = llvm.load %[[VAL1]] : !llvm.ptr -> i32 |
| 17 | +// CHECK: omp.simd nontemporal(%[[VAL5]], %[[VAL3]] : !llvm.ptr, !llvm.ptr) private(@_QFtestEi_private_i32 %[[VAL2]] -> %arg0 : !llvm.ptr) { |
| 18 | +// CHECK: omp.loop_nest (%{{.*}}) : i32 = (%[[CONST_VAL5]]) to (%[[VAL6]]) inclusive step (%[[CONST_VAL5]]) { |
| 19 | +// CHECK: llvm.store %{{.*}}, %{{.*}} : i32, !llvm.ptr |
| 20 | +// CHECK: %[[VAL8:.*]] = llvm.load %[[VAL5]] {nontemporal} : !llvm.ptr -> i32 |
| 21 | +// CHECK: %[[VAL9:.*]] = llvm.load %[[VAL4]] : !llvm.ptr -> i32 |
| 22 | +// CHECK: %[[VAL10:.*]] = llvm.add %[[VAL8]], %[[VAL9]] : i32 |
| 23 | +// CHECK: llvm.store %[[VAL10]], %[[VAL3]] {nontemporal} : i32, !llvm.ptr |
| 24 | +// CHECK: omp.yield |
| 25 | +// CHECK: } |
| 26 | +// CHECK: } |
| 27 | + |
| 28 | + func.func @_QPtest() { |
| 29 | + %c1_i32 = arith.constant 1 : i32 |
| 30 | + %0 = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFtestEa"} |
| 31 | + %1 = fir.alloca i32 {bindc_name = "b", uniq_name = "_QFtestEb"} |
| 32 | + %2 = fir.alloca i32 {bindc_name = "c", uniq_name = "_QFtestEc"} |
| 33 | + %3 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtestEi"} |
| 34 | + %4 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFtestEn"} |
| 35 | + %5 = fir.load %4 : !fir.ref<i32> |
| 36 | + omp.simd nontemporal(%0, %2 : !fir.ref<i32>, !fir.ref<i32>) private(@_QFtestEi_private_i32 %3 -> %arg0 : !fir.ref<i32>) { |
| 37 | + omp.loop_nest (%arg1) : i32 = (%c1_i32) to (%5) inclusive step (%c1_i32) { |
| 38 | + fir.store %arg1 to %arg0 : !fir.ref<i32> |
| 39 | + %6 = fir.load %0 {nontemporal}: !fir.ref<i32> |
| 40 | + %7 = fir.load %1 : !fir.ref<i32> |
| 41 | + %8 = arith.addi %6, %7 : i32 |
| 42 | + fir.store %8 to %2 {nontemporal} : !fir.ref<i32> |
| 43 | + omp.yield |
| 44 | + } |
| 45 | + } |
| 46 | + return |
| 47 | + } |
| 48 | + |
| 49 | +// CHECK-LABEL: llvm.func @_QPsimd_nontemporal_allocatable |
| 50 | +// CHECK: %[[CONST_VAL:.*]] = llvm.mlir.constant(1 : i64) : i64 |
| 51 | +// CHECK: %[[ALLOCA2:.*]] = llvm.alloca %[[CONST_VAL]] x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr |
| 52 | +// CHECK: %[[IDX_VAL:.*]] = llvm.mlir.constant(1 : i32) : i32 |
| 53 | +// CHECK: %[[CONST_VAL1:.*]] = llvm.mlir.constant(0 : index) : i64 |
| 54 | +// CHECK: %[[END_IDX:.*]] = llvm.mlir.constant(100 : i32) : i32 |
| 55 | +// CHECK: omp.simd nontemporal(%[[ARG0:.*]] : !llvm.ptr) private(@_QFsimd_nontemporal_allocatableEi_private_i32 %[[ALLOCA2]] -> %[[ARG2:.*]] : !llvm.ptr) { |
| 56 | +// CHECK: omp.loop_nest (%[[ARG3:.*]]) : i32 = (%[[IDX_VAL]]) to (%[[END_IDX]]) inclusive step (%[[IDX_VAL]]) { |
| 57 | +// CHECK: llvm.store %[[ARG3]], %[[ARG2]] : i32, !llvm.ptr |
| 58 | +// CHECK: %[[CONST_VAL2:.*]] = llvm.mlir.constant(48 : i32) : i32 |
| 59 | +// CHECK: "llvm.intr.memcpy"(%[[ALLOCA1:.*]], %[[ARG0]], %[[CONST_VAL2]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () |
| 60 | +// CHECK: %[[VAL1:.*]] = llvm.load %[[ARG2]] : !llvm.ptr -> i32 |
| 61 | +// CHECK: %[[VAL2:.*]] = llvm.sext %[[VAL1]] : i32 to i64 |
| 62 | +// CHECK: %[[VAL3:.*]] = llvm.getelementptr %[[ALLOCA1]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> |
| 63 | +// CHECK: %[[VAL4:.*]] = llvm.load %[[VAL3]] : !llvm.ptr -> !llvm.ptr |
| 64 | +// CHECK: %[[VAL5:.*]] = llvm.getelementptr %[[ALLOCA1]][0, 7, %[[CONST_VAL1]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> |
| 65 | +// CHECK: %[[VAL6:.*]] = llvm.load %[[VAL5]] : !llvm.ptr -> i64 |
| 66 | +// CHECK: %[[VAL7:.*]] = llvm.getelementptr %[[ALLOCA1]][0, 7, %[[CONST_VAL1]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> |
| 67 | +// CHECK: %[[VAL8:.*]] = llvm.load %[[VAL7]] : !llvm.ptr -> i64 |
| 68 | +// CHECK: %[[VAL10:.*]] = llvm.mlir.constant(1 : i64) : i64 |
| 69 | +// CHECK: %[[VAL11:.*]] = llvm.mlir.constant(0 : i64) : i64 |
| 70 | +// CHECK: %[[VAL12:.*]] = llvm.sub %[[VAL2]], %[[VAL6]] overflow<nsw> : i64 |
| 71 | +// CHECK: %[[VAL13:.*]] = llvm.mul %[[VAL12]], %[[VAL10]] overflow<nsw> : i64 |
| 72 | +// CHECK: %[[VAL14:.*]] = llvm.mul %[[VAL13]], %[[VAL10]] overflow<nsw> : i64 |
| 73 | +// CHECK: %[[VAL15:.*]] = llvm.add %[[VAL14]], %[[VAL11]] overflow<nsw> : i64 |
| 74 | +// CHECK: %[[VAL16:.*]] = llvm.mul %[[VAL10]], %[[VAL8]] overflow<nsw> : i64 |
| 75 | +// CHECK: %[[VAL17:.*]] = llvm.getelementptr %[[VAL4]][%[[VAL15]]] : (!llvm.ptr, i64) -> !llvm.ptr, i32 |
| 76 | +// CHECK: %[[VAL18:.*]] = llvm.load %[[VAL17]] {nontemporal} : !llvm.ptr -> i32 |
| 77 | +// CHECK: %[[VAL19:.*]] = llvm.load %{{.*}} : !llvm.ptr -> i32 |
| 78 | +// CHECK: %[[VAL20:.*]] = llvm.add %[[VAL18]], %[[VAL19]] : i32 |
| 79 | +// CHECK: llvm.store %[[VAL20]], %[[VAL17]] {nontemporal} : i32, !llvm.ptr |
| 80 | +// CHECK: omp.yield |
| 81 | +// CHECK: } |
| 82 | +// CHECK: } |
| 83 | +// CHECK: llvm.return |
| 84 | + |
| 85 | + func.func @_QPsimd_nontemporal_allocatable(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "y"}) { |
| 86 | + %c100 = arith.constant 100 : index |
| 87 | + %c1_i32 = arith.constant 1 : i32 |
| 88 | + %c0 = arith.constant 0 : index |
| 89 | + %c100_i32 = arith.constant 100 : i32 |
| 90 | + %0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimd_nontemporal_allocatableEi"} |
| 91 | + %1 = fir.allocmem !fir.array<?xi32>, %c100 {fir.must_be_heap = true, uniq_name = "_QFsimd_nontemporal_allocatableEx.alloc"} |
| 92 | + %2 = fircg.ext_embox %1(%c100) : (!fir.heap<!fir.array<?xi32>>, index) -> !fir.box<!fir.heap<!fir.array<?xi32>>> |
| 93 | + fir.store %2 to %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> |
| 94 | + omp.simd nontemporal(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) private(@_QFsimd_nontemporal_allocatableEi_private_i32 %0 -> %arg2 : !fir.ref<i32>) { |
| 95 | + omp.loop_nest (%arg3) : i32 = (%c1_i32) to (%c100_i32) inclusive step (%c1_i32) { |
| 96 | + fir.store %arg3 to %arg2 : !fir.ref<i32> |
| 97 | + %7 = fir.load %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> |
| 98 | + %8 = fir.load %arg2 : !fir.ref<i32> |
| 99 | + %9 = fir.convert %8 : (i32) -> i64 |
| 100 | + %10 = fir.box_addr %7 : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>> |
| 101 | + %11:3 = fir.box_dims %7, %c0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index) |
| 102 | + %12 = fircg.ext_array_coor %10(%11#1) origin %11#0<%9> : (!fir.heap<!fir.array<?xi32>>, index, index, i64) -> !fir.ref<i32> |
| 103 | + %13 = fir.load %12 {nontemporal} : !fir.ref<i32> |
| 104 | + %14 = fir.load %arg1 : !fir.ref<i32> |
| 105 | + %15 = arith.addi %13, %14 : i32 |
| 106 | + fir.store %15 to %12 {nontemporal} : !fir.ref<i32> |
| 107 | + omp.yield |
| 108 | + } |
| 109 | + } |
| 110 | + return |
| 111 | + } |
0 commit comments