Skip to content

Commit

Permalink
some updates and fixes to inlining cost
Browse files Browse the repository at this point in the history
- Instead of always inlining functions marked at-inline, increase
  the cost threshold 20x
- Don't inline functions inferred not to return
- statement_cost no longer needs to look at nested Exprs in general
- Fix cost of `:copyast`
  • Loading branch information
JeffBezanson committed Jun 29, 2018
1 parent ba31a93 commit 6cb12f0
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 49 deletions.
8 changes: 6 additions & 2 deletions base/array.jl
Expand Up @@ -1666,7 +1666,9 @@ julia> findnext(isodd, A, CartesianIndex(1, 1))
CartesianIndex(1, 1)
```
"""
function findnext(testf::Function, A, start)
@inline findnext(testf::Function, A, start) = findnext_internal(testf, A, start)

function findnext_internal(testf::Function, A, start)
l = last(keys(A))
i = start
while i <= l
Expand Down Expand Up @@ -1854,7 +1856,9 @@ julia> findprev(isodd, A, CartesianIndex(1, 2))
CartesianIndex(2, 1)
```
"""
function findprev(testf::Function, A, start)
@inline findprev(testf::Function, A, start) = findprev_internal(testf, A, start)

function findprev_internal(testf::Function, A, start)
i = start
while i >= first(keys(A))
testf(A[i]) && return i
Expand Down
5 changes: 3 additions & 2 deletions base/compiler/abstractinterpretation.jl
Expand Up @@ -129,8 +129,9 @@ function abstract_call_method_with_const_args(@nospecialize(f), argtypes::Vector
code === nothing && return Any
code = code::MethodInstance
# decide if it's likely to be worthwhile
cache_inlineable = false
if isdefined(code, :inferred)
declared_inline = isdefined(method, :source) && ccall(:jl_ast_flag_inlineable, Bool, (Any,), method.source)
cache_inlineable = declared_inline
if isdefined(code, :inferred) && !cache_inlineable
cache_inf = code.inferred
if !(cache_inf === nothing)
cache_src_inferred = ccall(:jl_ast_flag_inferred, Bool, (Any,), cache_inf)
Expand Down
79 changes: 42 additions & 37 deletions base/compiler/optimize.jl
Expand Up @@ -204,14 +204,20 @@ function optimize(opt::OptimizationState, @nospecialize(result))
else
force_noinline = true
end
if !opt.src.inlineable && result === Union{}
force_noinline = true
end
end
if force_noinline
opt.src.inlineable = false
elseif !opt.src.inlineable && isa(def, Method)
elseif isa(def, Method)
bonus = 0
if result Tuple && !isbitstype(widenconst(result))
bonus = opt.params.inline_tupleret_bonus
end
if opt.src.inlineable
bonus += opt.params.inline_cost_threshold*19
end
opt.src.inlineable = isinlineable(def, opt, bonus)
end
nothing
Expand Down Expand Up @@ -374,86 +380,85 @@ isknowntype(@nospecialize T) = (T == Union{}) || isconcretetype(T)

function statement_cost(ex::Expr, line::Int, src::CodeInfo, spvals::SimpleVector, slottypes::Vector{Any}, params::Params)
head = ex.head
if is_meta_expr_head(head) || head == :copyast # not sure if copyast is right
if is_meta_expr_head(head)
return 0
end
argcost = 0
for a in ex.args
if a isa Expr
argcost = plus_saturate(argcost, statement_cost(a, -1, src, spvals, slottypes, params))
end
end
if head == :return || head == :(=)
return argcost
end
extyp = line == -1 ? Any : src.ssavaluetypes[line]
if head == :call
elseif head === :call
ftyp = argextype(ex.args[1], src, spvals, slottypes)
if isa(ftyp, Type)
return argcost
end
if isa(ftyp, Const)
f = (ftyp::Const).val
if isa(f, IntrinsicFunction)
iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
if !isassigned(T_IFUNC_COST, iidx)
# unknown/unhandled intrinsic
return plus_saturate(argcost, params.inline_nonleaf_penalty)
return params.inline_nonleaf_penalty
end
return plus_saturate(argcost, T_IFUNC_COST[iidx])
return T_IFUNC_COST[iidx]
end
if isa(f, Builtin)
# The efficiency of operations like a[i] and s.b
# depend strongly on whether the result can be
# inferred, so check the type of ex
if f == Main.Core.getfield || f == Main.Core.tuple
if f === Main.Core.getfield || f === Main.Core.tuple
# we might like to penalize non-inferrability, but
# tuple iteration/destructuring makes that
# impossible
# return plus_saturate(argcost, isknowntype(extyp) ? 1 : params.inline_nonleaf_penalty)
return argcost
elseif f == Main.Core.arrayref && length(ex.args) >= 3
return 0
elseif f === Main.Core.arrayref && length(ex.args) >= 3
atyp = argextype(ex.args[3], src, spvals, slottypes)
return plus_saturate(argcost, isknowntype(atyp) ? 4 : params.inline_nonleaf_penalty)
return isknowntype(atyp) ? 4 : params.inline_nonleaf_penalty
end
fidx = findfirst(x->x===f, T_FFUNC_KEY)
if fidx === nothing
# unknown/unhandled builtin or anonymous function
# Use the generic cost of a direct function call
return plus_saturate(argcost, 20)
return 20
end
return plus_saturate(argcost, T_FFUNC_COST[fidx])
return T_FFUNC_COST[fidx]
end
end
return plus_saturate(argcost, params.inline_nonleaf_penalty)
elseif head == :foreigncall || head == :invoke
return params.inline_nonleaf_penalty
elseif head === :foreigncall || head === :invoke
# Calls whose "return type" is Union{} do not actually return:
# they are errors. Since these are not part of the typical
# run-time of the function, we omit them from
# consideration. This way, non-inlined error branches do not
# prevent inlining.
return extyp == Union{} ? 0 : plus_saturate(20, argcost)
elseif head == :llvmcall
return plus_saturate(10, argcost) # a wild guess at typical cost
elseif head == :enter
extyp = line == -1 ? Any : src.ssavaluetypes[line]
return extyp === Union{} ? 0 : 20
elseif head === :return
a = ex.args[1]
if a isa Expr
return statement_cost(a, -1, src, spvals, slottypes, params)
end
return 0
elseif head === :(=)
a = ex.args[2]
if a isa Expr
return statement_cost(a, -1, src, spvals, slottypes, params)
end
return 0
elseif head === :copyast
return 100
elseif head === :enter
# try/catch is a couple function calls,
# but don't inline functions with try/catch
# since these aren't usually performance-sensitive functions,
# and llvm is more likely to miscompile them when these functions get large
return typemax(Int)
elseif head == :gotoifnot
elseif head === :gotoifnot
target = ex.args[2]::Int
# loops are generally always expensive
# but assume that forward jumps are already counted for from
# summing the cost of the not-taken branch
return target < line ? plus_saturate(40, argcost) : argcost
return target < line ? 40 : 0
end
return argcost
return 0
end

function inline_worthy(body::Array{Any,1}, src::CodeInfo, spvals::SimpleVector, slottypes::Vector{Any},
params::Params, cost_threshold::Integer=params.inline_cost_threshold)
bodycost = 0
bodycost::Int = 0
for line = 1:length(body)
stmt = body[line]
if stmt isa Expr
Expand All @@ -467,9 +472,9 @@ function inline_worthy(body::Array{Any,1}, src::CodeInfo, spvals::SimpleVector,
continue
end
bodycost = plus_saturate(bodycost, thiscost)
bodycost == typemax(Int) && return false
bodycost > cost_threshold && return false
end
return bodycost <= cost_threshold
return true
end

function is_known_call(e::Expr, @nospecialize(func), src, spvals::SimpleVector, slottypes::Vector{Any} = empty_slottypes)
Expand Down
4 changes: 3 additions & 1 deletion base/compiler/tfuncs.jl
Expand Up @@ -185,7 +185,9 @@ add_tfunc(checked_smul_int, 2, 2, chk_tfunc, 10)
add_tfunc(checked_umul_int, 2, 2, chk_tfunc, 10)
## other, misc intrinsics ##
add_tfunc(Core.Intrinsics.llvmcall, 3, INT_INF,
(@nospecialize(fptr), @nospecialize(rt), @nospecialize(at), a...) -> instanceof_tfunc(rt)[1], 10)
# TODO: Lower this inlining cost. We currently need to prevent inlining llvmcall
# to avoid issues with its IR.
(@nospecialize(fptr), @nospecialize(rt), @nospecialize(at), a...) -> instanceof_tfunc(rt)[1], 1000)
cglobal_tfunc(@nospecialize(fptr)) = Ptr{Cvoid}
cglobal_tfunc(@nospecialize(fptr), @nospecialize(t)) = (isType(t) ? Ptr{t.parameters[1]} : Ptr)
cglobal_tfunc(@nospecialize(fptr), t::Const) = (isa(t.val, Type) ? Ptr{t.val} : Ptr)
Expand Down
4 changes: 2 additions & 2 deletions base/compiler/validation.jl
Expand Up @@ -211,7 +211,7 @@ is_valid_lvalue(@nospecialize(x)) = isa(x, Slot) || isa(x, GlobalRef)

function is_valid_argument(@nospecialize(x))
if isa(x, Slot) || isa(x, SSAValue) || isa(x, GlobalRef) || isa(x, QuoteNode) ||
(isa(x,Expr) && (x.head in (:static_parameter, :boundscheck, :copyast))) ||
(isa(x,Expr) && (x.head in (:static_parameter, :boundscheck))) ||
isa(x, Number) || isa(x, AbstractString) || isa(x, AbstractChar) || isa(x, Tuple) ||
isa(x, Type) || isa(x, Core.Box) || isa(x, Module) || x === nothing
return true
Expand All @@ -223,7 +223,7 @@ end

function is_valid_rvalue(@nospecialize(x))
is_valid_argument(x) && return true
if isa(x, Expr) && x.head in (:new, :the_exception, :isdefined, :call, :invoke, :foreigncall, :cfunction, :gc_preserve_begin)
if isa(x, Expr) && x.head in (:new, :the_exception, :isdefined, :call, :invoke, :foreigncall, :cfunction, :gc_preserve_begin, :copyast)
return true
end
return false
Expand Down
5 changes: 3 additions & 2 deletions base/deprecated.jl
Expand Up @@ -735,8 +735,9 @@ end
@deprecate findprev(A, v, i::Integer) something(findprev(isequal(v), A, i), 0)
@deprecate findlast(A, v) something(findlast(isequal(v), A), 0)
# to fix ambiguities introduced by deprecations
findnext(pred::Function, A, i::Integer) = invoke(findnext, Tuple{Function, Any, Any}, pred, A, i)
findprev(pred::Function, A, i::Integer) = invoke(findprev, Tuple{Function, Any, Any}, pred, A, i)
# TODO: also remove find*_internal in array.jl
findnext(pred::Function, A, i::Integer) = findnext_internal(pred, A, i)
findprev(pred::Function, A, i::Integer) = findprev_internal(pred, A, i)
# also remove deprecation warnings in find* functions in array.jl, sparse/sparsematrix.jl,
# and sparse/sparsevector.jl.

Expand Down
2 changes: 1 addition & 1 deletion test/compiler/compiler.jl
Expand Up @@ -860,7 +860,7 @@ function break_21369()
local fr
while true
fr = Base.StackTraces.lookup(bt[i])[end]
if !fr.from_c
if !fr.from_c && fr.func !== :error
break
end
i += 1
Expand Down
6 changes: 4 additions & 2 deletions test/show.jl
Expand Up @@ -1234,10 +1234,12 @@ function compute_annotations(f, types)
join((strip(string(a, " "^(max_loc_method-length(a)), b)) for (a, b) in zip(la, lb)), '\n')
end

g_line() = leaf() # Deliberately not implemented to end up as a leaf after inlining
@noinline leaffunc() = print()

@inline g_line() = leaffunc()

# Test that separate instances of the same function do not get merged
function f_line()
@inline function f_line()
g_line()
g_line()
g_line()
Expand Down

0 comments on commit 6cb12f0

Please sign in to comment.