Skip to content
This repository has been archived by the owner on Jun 26, 2020. It is now read-only.

Allow readonly nontrapping loads to be hoisted by licm #727

Merged
merged 4 commits into from Apr 9, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
20 changes: 17 additions & 3 deletions cranelift-codegen/src/licm.rs
Expand Up @@ -5,7 +5,9 @@ use crate::dominator_tree::DominatorTree;
use crate::entity::{EntityList, ListPool};
use crate::flowgraph::{BasicBlock, ControlFlowGraph};
use crate::fx::FxHashSet;
use crate::ir::{DataFlowGraph, Ebb, Function, Inst, InstBuilder, Layout, Opcode, Type, Value};
use crate::ir::{
DataFlowGraph, Ebb, Function, Inst, InstBuilder, InstructionData, Layout, Opcode, Type, Value,
};
use crate::isa::TargetIsa;
use crate::loop_analysis::{Loop, LoopAnalysis};
use crate::timing;
Expand Down Expand Up @@ -145,8 +147,7 @@ fn change_branch_jump_destination(inst: Inst, new_ebb: Ebb, func: &mut Function)

/// Test whether the given opcode is unsafe to even consider for LICM.
fn trivially_unsafe_for_licm(opcode: Opcode) -> bool {
opcode.can_load()
|| opcode.can_store()
opcode.can_store()
|| opcode.is_call()
|| opcode.is_branch()
|| opcode.is_terminator()
Expand All @@ -156,12 +157,25 @@ fn trivially_unsafe_for_licm(opcode: Opcode) -> bool {
|| opcode.writes_cpu_flags()
}

fn is_unsafe_load(inst_data: &InstructionData) -> bool {
match *inst_data {
InstructionData::Load { flags, .. } | InstructionData::LoadComplex { flags, .. } => {
!flags.readonly() || !flags.notrap()
}
_ => inst_data.opcode().can_load(),
}
}

bnjbvr marked this conversation as resolved.
Show resolved Hide resolved
/// Test whether the given instruction is loop-invariant.
fn is_loop_invariant(inst: Inst, dfg: &DataFlowGraph, loop_values: &FxHashSet<Value>) -> bool {
if trivially_unsafe_for_licm(dfg[inst].opcode()) {
return false;
}

if is_unsafe_load(&dfg[inst]) {
return false;
}

let inst_args = dfg.inst_args(inst);
for arg in inst_args {
let arg = dfg.resolve_aliases(*arg);
Expand Down
48 changes: 48 additions & 0 deletions filetests/licm/load_readonly_notrap.clif
@@ -0,0 +1,48 @@
test licm

target x86_64

;; Nontrapping readonly load from address that is not loop-dependent
;; should be hoisted out of loop.

function %hoist_load(i32, i64 vmctx) -> i32 {
gv0 = vmctx
gv1 = load.i64 notrap aligned readonly gv0
heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32

ebb0(v0: i32, v1: i64):
jump ebb1(v0, v1)

ebb1(v2: i32, v3: i64):
v4 = iconst.i32 1
v5 = heap_addr.i64 heap0, v4, 1
v6 = load.i32 notrap aligned readonly v5
v7 = iadd v2, v6
brz v2, ebb2(v2)
v8 = isub v2, v4
jump ebb1(v8, v3)

ebb2(v9: i32):
return v9
}

; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast {
; nextln: gv0 = vmctx
; nextln: gv1 = load.i64 notrap aligned readonly gv0
; nextln: heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32
; nextln:
; nextln: ebb0(v0: i32, v1: i64):
; nextln: v4 = iconst.i32 1
; nextln: v5 = heap_addr.i64 heap0, v4, 1
; nextln: v6 = load.i32 notrap aligned readonly v5
; nextln: jump ebb1(v0, v1)
; nextln:
; nextln: ebb1(v2: i32, v3: i64):
; nextln: v7 = iadd v2, v6
; nextln: brz v2, ebb2(v2)
; nextln: v8 = isub v2, v4
; nextln: jump ebb1(v8, v3)
; nextln:
; nextln: ebb2(v9: i32):
; nextln: return v9
; nextln: }
49 changes: 49 additions & 0 deletions filetests/licm/reject_load_notrap.clif
@@ -0,0 +1,49 @@
test licm

target x86_64

;; Nontrapping possibly-not-readonly load from address that is not
;; loop-dependent should *not* be hoisted out of loop, though the
;; address computation can be.

function %hoist_load(i32, i64 vmctx) -> i32 {
gv0 = vmctx
gv1 = load.i64 notrap aligned readonly gv0
heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32

ebb0(v0: i32, v1: i64):
v4 = iconst.i32 1
v5 = heap_addr.i64 heap0, v4, 1
jump ebb1(v0, v1)

ebb1(v2: i32, v3: i64):
v6 = load.i32 notrap aligned v5
v7 = iadd v2, v6
brz v2, ebb2(v2)
v8 = isub v2, v4
jump ebb1(v8, v3)

ebb2(v9: i32):
return v9
}

; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast {
; nextln: gv0 = vmctx
; nextln: gv1 = load.i64 notrap aligned readonly gv0
; nextln: heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32
; nextln:
; nextln: ebb0(v0: i32, v1: i64):
; nextln: v4 = iconst.i32 1
; nextln: v5 = heap_addr.i64 heap0, v4, 1
; nextln: jump ebb1(v0, v1)
; nextln:
; nextln: ebb1(v2: i32, v3: i64):
; nextln: v6 = load.i32 notrap aligned v5
; nextln: v7 = iadd v2, v6
; nextln: brz v2, ebb2(v2)
; nextln: v8 = isub v2, v4
; nextln: jump ebb1(v8, v3)
; nextln:
; nextln: ebb2(v9: i32):
; nextln: return v9
; nextln: }
49 changes: 49 additions & 0 deletions filetests/licm/reject_load_readonly.clif
@@ -0,0 +1,49 @@
test licm

target x86_64

;; Maybe-trapping readonly load from address that is not
;; loop-dependent should *not* be hoisted out of loop, though the
;; address computation can be hoisted.

function %hoist_load(i32, i64 vmctx) -> i32 {
gv0 = vmctx
gv1 = load.i64 notrap aligned readonly gv0
heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32

ebb0(v0: i32, v1: i64):
jump ebb1(v0, v1)

ebb1(v2: i32, v3: i64):
v4 = iconst.i32 1
v5 = heap_addr.i64 heap0, v4, 1
v6 = load.i32 aligned readonly v5
v7 = iadd v2, v6
brz v2, ebb2(v2)
v8 = isub v2, v4
jump ebb1(v8, v3)

ebb2(v9: i32):
return v9
}

; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast {
; nextln: gv0 = vmctx
; nextln: gv1 = load.i64 notrap aligned readonly gv0
; nextln: heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32
; nextln:
; nextln: ebb0(v0: i32, v1: i64):
; nextln: v4 = iconst.i32 1
; nextln: v5 = heap_addr.i64 heap0, v4, 1
; nextln: jump ebb1(v0, v1)
; nextln:
; nextln: ebb1(v2: i32, v3: i64):
; nextln: v6 = load.i32 aligned readonly v5
; nextln: v7 = iadd v2, v6
; nextln: brz v2, ebb2(v2)
; nextln: v8 = isub v2, v4
; nextln: jump ebb1(v8, v3)
; nextln:
; nextln: ebb2(v9: i32):
; nextln: return v9
; nextln: }