Skip to content

Commit

Permalink
Adapt to LLVM 17. (#583)
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt committed May 21, 2024
1 parent 104629e commit e18cdd2
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 61 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ InteractiveUtils = "1"
Libdl = "1"
Logging = "1"
UUIDs = "1"
LLVM = "6.6"
LLVM = "7.1"
Scratch = "1"
TimerOutputs = "0.5"
julia = "1.8"
24 changes: 15 additions & 9 deletions src/driver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -325,16 +325,22 @@ const __llvm_initialized = Ref(false)
# global variables. this makes sure that the optimizer can, e.g.,
# rewrite function signatures.
if toplevel
# TODO: there's no good API to use internalize with the new pass manager yet
@dispose pm=ModulePassManager() begin
exports = collect(values(jobs))
for gvar in globals(ir)
if linkage(gvar) == LLVM.API.LLVMExternalLinkage
push!(exports, LLVM.name(gvar))
end
preserved_gvs = collect(values(jobs))
for gvar in globals(ir)
if linkage(gvar) == LLVM.API.LLVMExternalLinkage
push!(preserved_gvs, LLVM.name(gvar))
end
end
if use_newpm && LLVM.version() >= v"17"
@dispose pb=PassBuilder() mpm=NewPMModulePassManager(pb) begin
add!(mpm, InternalizePass(InternalizePassOptions(; preserved_gvs)))
run!(mpm, ir)
end
else
@dispose pm=ModulePassManager() begin
internalize!(pm, preserved_gvs)
run!(pm, ir)
end
internalize!(pm, exports)
run!(pm, ir)
end
end

Expand Down
40 changes: 21 additions & 19 deletions src/irgen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -95,27 +95,29 @@ function irgen(@nospecialize(job::CompilerJob))
end
end

# TODO: there's no good API to use internalize with the new pass manager yet
@dispose pm=ModulePassManager() begin
global current_job
current_job = job

linkage!(entry, LLVM.API.LLVMExternalLinkage)

# internalize all functions, but keep exported global variables
exports = String[LLVM.name(entry)]
for gvar in globals(mod)
push!(exports, LLVM.name(gvar))
# internalize all functions and, but keep exported global variables.
linkage!(entry, LLVM.API.LLVMExternalLinkage)
preserved_gvs = String[LLVM.name(entry)]
for gvar in globals(mod)
push!(preserved_gvs, LLVM.name(gvar))
end
if use_newpm && LLVM.version() >= v"17"
@dispose pb=PassBuilder() mpm=NewPMModulePassManager(pb) begin
add!(mpm, InternalizePass(InternalizePassOptions(; preserved_gvs)))
add!(mpm, AlwaysInlinerPass())
run!(mpm, mod)
end
else
@dispose pm=ModulePassManager() begin
internalize!(pm, preserved_gvs)
always_inliner!(pm)
run!(pm, mod)
end
internalize!(pm, exports)

# inline llvmcall bodies
always_inliner!(pm)

can_throw(job) || add!(pm, ModulePass("LowerThrow", lower_throw!))

run!(pm, mod)
end

global current_job
current_job = job
can_throw(job) || lower_throw!(mod)
end

return mod, compiled
Expand Down
10 changes: 6 additions & 4 deletions src/optim.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ end
## new pm

function optimize_newpm!(@nospecialize(job::CompilerJob), mod::LLVM.Module; opt_level)
triple = llvm_triple(job.config.target)
tm = llvm_machine(job.config.target)

global current_job
Expand All @@ -34,7 +33,9 @@ function buildNewPMPipeline!(mpm, @nospecialize(job::CompilerJob), opt_level)
buildEarlySimplificationPipeline(mpm, job, opt_level)
add!(mpm, AlwaysInlinerPass())
buildEarlyOptimizerPipeline(mpm, job, opt_level)
add!(mpm, LowerSIMDLoopPass())
if VERSION < v"1.10"
add!(mpm, LowerSIMDLoopPass())
end
add!(mpm, NewPMFunctionPassManager) do fpm
buildLoopOptimizerPipeline(fpm, job, opt_level)
buildScalarOptimizerPipeline(fpm, job, opt_level)
Expand Down Expand Up @@ -113,8 +114,9 @@ end

function buildLoopOptimizerPipeline(fpm, @nospecialize(job::CompilerJob), opt_level)
add!(fpm, NewPMLoopPassManager) do lpm
# TODO LowerSIMDLoopPass
# LoopPass since JuliaLang/julia#51883
if VERSION >= v"1.10"
add!(lpm, LowerSIMDLoopPass())
end
if opt_level >= 2
add!(lpm, LoopRotatePass())
end
Expand Down
96 changes: 70 additions & 26 deletions src/ptx.jl
Original file line number Diff line number Diff line change
Expand Up @@ -153,42 +153,86 @@ end
function optimize_module!(@nospecialize(job::CompilerJob{PTXCompilerTarget}),
mod::LLVM.Module)
tm = llvm_machine(job.config.target)
# TODO can't convert to newpm because speculative-execution doesn't have a parameter in the default PassBuilder parser
@dispose pm=ModulePassManager() begin
add_library_info!(pm, triple(mod))
add_transform_info!(pm, tm)

# TODO: need to run this earlier; optimize_module! is called after addOptimizationPasses!
add!(pm, FunctionPass("NVVMReflect", nvvm_reflect!))

# needed by GemmKernels.jl-like code
speculative_execution_if_has_branch_divergence!(pm)
# TODO: Use the registered target passes (JuliaGPU/GPUCompiler.jl#450)
if use_newpm
@dispose pb=PassBuilder(tm) mpm=NewPMModulePassManager(pb) begin
add!(mpm, NewPMFunctionPassManager) do fpm
# TODO: need to run this earlier; optimize_module! is called after addOptimizationPasses!
add!(legacy2newpm(nvvm_reflect!), fpm)

# needed by GemmKernels.jl-like code
add!(fpm, SpeculativeExecutionPass())

# NVPTX's target machine info enables runtime unrolling,
# but Julia's pass sequence only invokes the simple unroller.
add!(fpm, LoopUnrollPass(LoopUnrollOptions(; job.config.opt_level)))
add!(fpm, InstCombinePass()) # clean-up redundancy
add!(fpm, NewPMLoopPassManager) do lpm
add!(lpm, LICMPass()) # the inner runtime check might be
# outer loop invariant
end

# the above loop unroll pass might have unrolled regular, non-runtime nested loops.
# that code still needs to be optimized (arguably, multiple unroll passes should be
# scheduled by the Julia optimizer). do so here, instead of re-optimizing entirely.
if job.config.opt_level == 2
add!(fpm, GVNPass())
elseif job.config.opt_level == 1
add!(fpm, EarlyCSEPass())
end
add!(fpm, DSEPass())

add!(fpm, SimplifyCFGPass())
end

# NVPTX's target machine info enables runtime unrolling,
# but Julia's pass sequence only invokes the simple unroller.
loop_unroll!(pm)
instruction_combining!(pm) # clean-up redundancy
licm!(pm) # the inner runtime check might be outer loop invariant
# get rid of the internalized functions; now possible unused
add!(mpm, GlobalDCEPass())

# the above loop unroll pass might have unrolled regular, non-runtime nested loops.
# that code still needs to be optimized (arguably, multiple unroll passes should be
# scheduled by the Julia optimizer). do so here, instead of re-optimizing entirely.
early_csemem_ssa!(pm) # TODO: gvn instead? see NVPTXTargetMachine.cpp::addEarlyCSEOrGVNPass
dead_store_elimination!(pm)
run!(mpm, mod, tm)
end
else
@dispose pm=ModulePassManager() begin
add_library_info!(pm, triple(mod))
add_transform_info!(pm, tm)

# TODO: need to run this earlier; optimize_module! is called after addOptimizationPasses!
add!(pm, FunctionPass("NVVMReflect", nvvm_reflect!))

# needed by GemmKernels.jl-like code
speculative_execution_if_has_branch_divergence!(pm)

# NVPTX's target machine info enables runtime unrolling,
# but Julia's pass sequence only invokes the simple unroller.
loop_unroll!(pm)
instruction_combining!(pm) # clean-up redundancy
licm!(pm) # the inner runtime check might be outer loop invariant

# the above loop unroll pass might have unrolled regular, non-runtime nested loops.
# that code still needs to be optimized (arguably, multiple unroll passes should be
# scheduled by the Julia optimizer). do so here, instead of re-optimizing entirely.
if job.config.opt_level == 2
gvn!(pm)
elseif job.config.opt_level == 1
early_cse!(pm)
end
dead_store_elimination!(pm)

cfgsimplification!(pm)
cfgsimplification!(pm)

# get rid of the internalized functions; now possible unused
global_dce!(pm)
# get rid of the internalized functions; now possible unused
global_dce!(pm)

run!(pm, mod)
run!(pm, mod)
end
end
end

function finish_ir!(@nospecialize(job::CompilerJob{PTXCompilerTarget}),
mod::LLVM.Module, entry::LLVM.Function)
for f in functions(mod)
lower_unreachable!(f)
if LLVM.version() < v"17"
for f in functions(mod)
lower_unreachable!(f)
end
end

if job.config.kernel
Expand Down
4 changes: 2 additions & 2 deletions test/ptx_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ end
@test occursin(r"@\w*kernel\w*\(\[1 x i64\] %state", ir)

# child1 doesn't use the state
@test occursin(r"@\w*child1\w*\((i64|i8\*)", ir)
@test occursin(r"@\w*child1\w*\((i64|i8\*|ptr)", ir)

# child2 does
@test occursin(r"@\w*child2\w*\(\[1 x i64\] %state", ir)
Expand Down Expand Up @@ -341,7 +341,7 @@ precompile_test_harness("Inference caching") do load_path
job, _ = PTXCompiler.create_job(kernel, ())
GPUCompiler.code_typed(job)
end

# identity is foreign
@setup_workload begin
job, _ = PTXCompiler.create_job(identity, (Int,))
Expand Down

0 comments on commit e18cdd2

Please sign in to comment.