bytecodealliance · bnjbvr · Apr 9, 2019 · Apr 5, 2019 · Apr 7, 2019 · Apr 7, 2019
diff --git a/cranelift-codegen/src/licm.rs b/cranelift-codegen/src/licm.rs
@@ -5,7 +5,9 @@ use crate::dominator_tree::DominatorTree;
 use crate::entity::{EntityList, ListPool};
 use crate::flowgraph::{BasicBlock, ControlFlowGraph};
 use crate::fx::FxHashSet;
-use crate::ir::{DataFlowGraph, Ebb, Function, Inst, InstBuilder, Layout, Opcode, Type, Value};
+use crate::ir::{
+    DataFlowGraph, Ebb, Function, Inst, InstBuilder, InstructionData, Layout, Opcode, Type, Value,
+};
 use crate::isa::TargetIsa;
 use crate::loop_analysis::{Loop, LoopAnalysis};
 use crate::timing;
@@ -145,8 +147,7 @@ fn change_branch_jump_destination(inst: Inst, new_ebb: Ebb, func: &mut Function)
 
 /// Test whether the given opcode is unsafe to even consider for LICM.
 fn trivially_unsafe_for_licm(opcode: Opcode) -> bool {
-    opcode.can_load()
-        || opcode.can_store()
+    opcode.can_store()
         || opcode.is_call()
         || opcode.is_branch()
         || opcode.is_terminator()
@@ -156,12 +157,25 @@ fn trivially_unsafe_for_licm(opcode: Opcode) -> bool {
         || opcode.writes_cpu_flags()
 }
 
+fn is_unsafe_load(inst_data: &InstructionData) -> bool {
+    match *inst_data {
+        InstructionData::Load { flags, .. } | InstructionData::LoadComplex { flags, .. } => {
+            !flags.readonly() || !flags.notrap()
+        }
+        _ => inst_data.opcode().can_load(),
+    }
+}
+
 /// Test whether the given instruction is loop-invariant.
 fn is_loop_invariant(inst: Inst, dfg: &DataFlowGraph, loop_values: &FxHashSet<Value>) -> bool {
     if trivially_unsafe_for_licm(dfg[inst].opcode()) {
         return false;
     }
 
+    if is_unsafe_load(&dfg[inst]) {
+        return false;
+    }
+
     let inst_args = dfg.inst_args(inst);
     for arg in inst_args {
         let arg = dfg.resolve_aliases(*arg);

diff --git a/filetests/licm/load_readonly_notrap.clif b/filetests/licm/load_readonly_notrap.clif
@@ -0,0 +1,48 @@
+test licm
+
+target x86_64
+
+;; Nontrapping readonly load from address that is not loop-dependent
+;; should be hoisted out of loop.
+
+function %hoist_load(i32, i64 vmctx) -> i32 {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned readonly gv0
+    heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32
+
+ebb0(v0: i32, v1: i64):
+    jump ebb1(v0, v1)
+
+ebb1(v2: i32, v3: i64):
+    v4 = iconst.i32 1
+    v5 = heap_addr.i64 heap0, v4, 1
+    v6 = load.i32 notrap aligned readonly v5
+    v7 = iadd v2, v6
+    brz v2, ebb2(v2)
+    v8 = isub v2, v4
+    jump ebb1(v8, v3)
+
+ebb2(v9: i32):
+    return v9
+}
+
+; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast {
+; nextln:    gv0 = vmctx
+; nextln:    gv1 = load.i64 notrap aligned readonly gv0
+; nextln:    heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32
+; nextln: 
+; nextln: ebb0(v0: i32, v1: i64):
+; nextln:    v4 = iconst.i32 1
+; nextln:    v5 = heap_addr.i64 heap0, v4, 1
+; nextln:    v6 = load.i32 notrap aligned readonly v5
+; nextln:    jump ebb1(v0, v1)
+; nextln: 
+; nextln: ebb1(v2: i32, v3: i64):
+; nextln:    v7 = iadd v2, v6
+; nextln:    brz v2, ebb2(v2)
+; nextln:    v8 = isub v2, v4
+; nextln:    jump ebb1(v8, v3)
+; nextln: 
+; nextln: ebb2(v9: i32):
+; nextln:    return v9
+; nextln: }
diff --git a/filetests/licm/reject_load_notrap.clif b/filetests/licm/reject_load_notrap.clif
@@ -0,0 +1,49 @@
+test licm
+
+target x86_64
+
+;; Nontrapping possibly-not-readonly load from address that is not
+;; loop-dependent should *not* be hoisted out of loop, though the
+;; address computation can be.
+
+function %hoist_load(i32, i64 vmctx) -> i32 {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned readonly gv0
+    heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32
+
+ebb0(v0: i32, v1: i64):
+    v4 = iconst.i32 1
+    v5 = heap_addr.i64 heap0, v4, 1
+    jump ebb1(v0, v1)
+
+ebb1(v2: i32, v3: i64):
+    v6 = load.i32 notrap aligned v5
+    v7 = iadd v2, v6
+    brz v2, ebb2(v2)
+    v8 = isub v2, v4
+    jump ebb1(v8, v3)
+
+ebb2(v9: i32):
+    return v9
+}
+
+; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast {
+; nextln:    gv0 = vmctx
+; nextln:    gv1 = load.i64 notrap aligned readonly gv0
+; nextln:    heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32
+; nextln: 
+; nextln: ebb0(v0: i32, v1: i64):
+; nextln:    v4 = iconst.i32 1
+; nextln:    v5 = heap_addr.i64 heap0, v4, 1
+; nextln:    jump ebb1(v0, v1)
+; nextln: 
+; nextln: ebb1(v2: i32, v3: i64):
+; nextln:    v6 = load.i32 notrap aligned v5
+; nextln:    v7 = iadd v2, v6
+; nextln:    brz v2, ebb2(v2)
+; nextln:    v8 = isub v2, v4
+; nextln:    jump ebb1(v8, v3)
+; nextln: 
+; nextln: ebb2(v9: i32):
+; nextln:    return v9
+; nextln: }
diff --git a/filetests/licm/reject_load_readonly.clif b/filetests/licm/reject_load_readonly.clif
@@ -0,0 +1,49 @@
+test licm
+
+target x86_64
+
+;; Maybe-trapping readonly load from address that is not
+;; loop-dependent should *not* be hoisted out of loop, though the
+;; address computation can be hoisted.
+
+function %hoist_load(i32, i64 vmctx) -> i32 {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned readonly gv0
+    heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32
+
+ebb0(v0: i32, v1: i64):
+    jump ebb1(v0, v1)
+
+ebb1(v2: i32, v3: i64):
+    v4 = iconst.i32 1
+    v5 = heap_addr.i64 heap0, v4, 1
+    v6 = load.i32 aligned readonly v5
+    v7 = iadd v2, v6
+    brz v2, ebb2(v2)
+    v8 = isub v2, v4
+    jump ebb1(v8, v3)
+
+ebb2(v9: i32):
+    return v9
+}
+
+; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast {
+; nextln:    gv0 = vmctx
+; nextln:    gv1 = load.i64 notrap aligned readonly gv0
+; nextln:    heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32
+; nextln: 
+; nextln: ebb0(v0: i32, v1: i64):
+; nextln:    v4 = iconst.i32 1
+; nextln:    v5 = heap_addr.i64 heap0, v4, 1
+; nextln:    jump ebb1(v0, v1)
+; nextln: 
+; nextln: ebb1(v2: i32, v3: i64):
+; nextln:    v6 = load.i32 aligned readonly v5
+; nextln:    v7 = iadd v2, v6
+; nextln:    brz v2, ebb2(v2)
+; nextln:    v8 = isub v2, v4
+; nextln:    jump ebb1(v8, v3)
+; nextln: 
+; nextln: ebb2(v9: i32):
+; nextln:    return v9
+; nextln: }