Skip to content

Commit

Permalink
Merge pull request #69 from LuxDL/ap/mark_inactive
Browse files Browse the repository at this point in the history
[Enzyme] Mark certain operations as Enzyme inactive
  • Loading branch information
avik-pal committed May 12, 2024
2 parents ea65d23 + bb79996 commit e829b63
Show file tree
Hide file tree
Showing 11 changed files with 30 additions and 288 deletions.
8 changes: 4 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
name = "LuxLib"
uuid = "82251201-b29d-42c6-8e01-566dec8acb11"
authors = ["Avik Pal <avikpal@mit.edu> and contributors"]
version = "0.3.22"
version = "0.3.23"

[deps]
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
FastBroadcast = "7034ab61-46d4-4ed7-9d0f-46aef9175898"
FastClosures = "9aa1b823-49e4-5ca5-8b0f-3971ec8bab6a"
GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
Expand Down Expand Up @@ -44,19 +44,19 @@ ArrayInterface = "7.9"
CUDA = "5.3.2"
ChainRulesCore = "1.23"
ComponentArrays = "0.15.8"
EnzymeCore = "0.7"
ExplicitImports = "1.4.1"
FastBroadcast = "0.2.8"
FastClosures = "0.3.2"
ForwardDiff = "0.10.36"
GPUArraysCore = "0.1.6"
KernelAbstractions = "0.9.15"
LinearAlgebra = "1.10"
LuxAMDGPU = "0.2.1"
LuxCUDA = "0.3.1"
LuxCore = "0.1.13"
LuxTestUtils = "0.1.15"
Markdown = "1.10"
NNlib = "0.9.10"
NNlib = "0.9.13"
PrecompileTools = "1.2"
Random = "1.10"
ReTestItems = "1.23.1"
Expand Down
2 changes: 0 additions & 2 deletions ext/LuxLibReverseDiffExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ end
@grad_from_chainrules LuxLib._copy_autodiff_barrier(x::TrackedArray)
@grad_from_chainrules LuxLib._copy_autodiff_barrier(x::TrackedReal)

LuxLib._get_backend(x::TrackedArray) = LuxLib._get_backend(ReverseDiff.value(x))

# api/dropout.jl
LuxLib._dropout_fptype(x::TrackedArray) = LuxLib._dropout_fptype(ReverseDiff.value(x))

Expand Down
13 changes: 0 additions & 13 deletions ext/LuxLibTrackerExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,20 +41,7 @@ function LuxLib._copy_autodiff_barrier(x::Union{TrackedArray, TrackedReal})
return LuxLib._copy_autodiff_barrier(Tracker.data(x))
end

LuxLib._get_backend(x::TrackedArray) = LuxLib._get_backend(Tracker.data(x))

# api/dropout.jl
LuxLib._dropout_fptype(x::TrackedArray) = LuxLib._dropout_fptype(Tracker.data(x))

# api/groupnorm.jl
for T1 in (:TrackedArray, :AbstractArray),
T2 in (:TrackedVector, :AbstractVector),
T3 in (:TrackedVector, :AbstractVector)

LuxLib.__is_tracked(T1, T2, T3) || continue

@eval Tracker.@grad_from_chainrules LuxLib.__fast_groupnorm(
x::$T1, groups, scale::$T2, bias::$T3, epsilon::Real)
end

end
6 changes: 2 additions & 4 deletions src/LuxLib.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,26 @@ using PrecompileTools: @recompile_invalidations
@recompile_invalidations begin
using ArrayInterface: ArrayInterface
using ChainRulesCore: ChainRulesCore, NoTangent
using EnzymeCore: EnzymeCore, EnzymeRules
using FastBroadcast: @..
using FastClosures: @closure
using GPUArraysCore: GPUArraysCore, AnyGPUArray
using KernelAbstractions: KernelAbstractions, @Const, @index, @kernel
using LinearAlgebra: LinearAlgebra, BLAS, mul!
using LuxCore: LuxCore
using Markdown: @doc_str
using NNlib: NNlib
using Random: Random, AbstractRNG, rand!
using Reexport: @reexport
using Statistics: Statistics, mean, std, var
using Statistics: Statistics, mean, var
end

@reexport using NNlib

const CRC = ChainRulesCore
const KA = KernelAbstractions

include("utils.jl")

# Low-Level Implementations
include("impl/groupnorm.jl")
include("impl/normalization.jl")
include("impl/fused_dense.jl")
include("impl/fused_conv.jl")
Expand Down
4 changes: 4 additions & 0 deletions src/api/dropout.jl
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ end
@inline _dropout_fptype(x) = float(real(eltype(x)))

CRC.@non_differentiable _dropout_fptype(::Any...)
EnzymeRules.inactive_noinl(::typeof(_dropout_fptype), ::Any...) = nothing

@inline function _alpha_dropout_noise(rng, x)
rng = LuxCore.replicate(rng)
Expand All @@ -139,6 +140,7 @@ CRC.@non_differentiable _dropout_fptype(::Any...)
end

CRC.@non_differentiable _alpha_dropout_noise(::Any...)
EnzymeRules.inactive_noinl(::typeof(_alpha_dropout_noise), ::Any...) = nothing

@inline function _generate_dropout_mask(rng::AbstractRNG, x, p, invp; dims)
realfptype = _dropout_fptype(x)
Expand All @@ -148,4 +150,6 @@ CRC.@non_differentiable _alpha_dropout_noise(::Any...)
end

CRC.@non_differentiable _generate_dropout_mask(::Any...)
EnzymeRules.inactive_noinl(::typeof(_generate_dropout_mask), ::Any...) = nothing
CRC.@non_differentiable _dropout_shape(::Any...)
EnzymeRules.inactive_noinl(::typeof(_dropout_shape), ::Any...) = nothing
40 changes: 1 addition & 39 deletions src/api/groupnorm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,39 +21,11 @@ statistics.
The normalized array is returned.
## Performance Considerations
The most common case of this Op -- `x` is a 4D array -- is optimized using
KernelAbstractions and has a fast custom backwards pass implemented. All other cases have a
fallback implementation which is not especially optimized.
We have tested the code path for `Float16` and it works, but gradient accumulation is
extremely fragile. Hence, for `Float16` inputs, it uses the fallback implementation.
If the batch size is small (< 16), then the fallback implementation will be faster than the
KA version. However, this customization is not possible using the direct `groupnorm`
interface.
## References
[1] Wu, Yuxin, and Kaiming He. "Group normalization." Proceedings of the European conference
on computer vision (ECCV). 2018.
"""
function groupnorm(x::AbstractArray{<:Union{Float32, Float64}, 4},
scale::AbstractVector{<:Union{Float32, Float64}},
bias::AbstractVector{<:Union{Float32, Float64}},
groups::Int, σ::F=identity, epsilon::Real=1.0f-5) where {F}
_test_valid_groupnorm_arguments(x, scale, bias, groups)
# FIXME: We need to fuse the activation function into the kernel for optimal performance
return fast_activation!!(σ, __fast_groupnorm(x, groups, scale, bias, epsilon))
end

# Separate this out for a cleaner rrule later on
@inline function __fast_groupnorm(x, groups, scale, bias, epsilon)
return first(_groupnorm(x, groups, scale, bias, epsilon))
end

# Slow Fallback (without custom Pullback Implementation)
function groupnorm(x::AbstractArray{<:Real, N}, scale::Union{Nothing, <:AbstractVector},
bias::Union{Nothing, <:AbstractVector}, groups::Int,
σ::F=identity, epsilon::Real=1.0f-5) where {F, N}
Expand All @@ -71,19 +43,8 @@ end
return :($(Val(Tuple(collect(1:(N - 1))))))
end

# Custom Pullbacks
function CRC.rrule(::typeof(__fast_groupnorm), x, groups, scale, bias, epsilon)
y, μ, σ⁻¹ = _groupnorm(x, groups, scale, bias, epsilon)
∇groupnorm = @closure Δ -> begin
∂x, ∂scale, ∂bias = _∇groupnorm(Δ, y, x, groups, scale, bias, μ, σ⁻¹)
return NoTangent(), ∂x, NoTangent(), ∂scale, ∂bias, NoTangent()
end
return y, ∇groupnorm
end

function _test_valid_groupnorm_arguments(
x::AbstractArray{T, N}, scale, bias, groups) where {T, N}
_assert_same_backend(x, scale, bias)
if scale !== nothing && bias !== nothing && length(scale) != length(bias) != size(x, 3)
throw(ArgumentError("Length of `scale` and `bias` must be equal to the number of \
channels (N - 1 dim of the input array)."))
Expand All @@ -95,3 +56,4 @@ function _test_valid_groupnorm_arguments(
end

CRC.@non_differentiable _test_valid_groupnorm_arguments(::Any...)
EnzymeRules.inactive_noinl(::typeof(_test_valid_groupnorm_arguments), ::Any...) = nothing
1 change: 1 addition & 0 deletions src/api/instancenorm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,4 @@ function _test_valid_instancenorm_arguments(::AbstractArray{T, N}) where {T, N}
end

CRC.@non_differentiable _test_valid_instancenorm_arguments(::Any...)
EnzymeRules.inactive_noinl(::typeof(_test_valid_instancenorm_arguments), ::Any...) = nothing
113 changes: 0 additions & 113 deletions src/impl/groupnorm.jl

This file was deleted.

1 change: 1 addition & 0 deletions src/impl/normalization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ end
@inline __accum_size(x, ::Val{dims}) where {dims} = prod(Base.Fix1(size, x), dims)

CRC.@non_differentiable __accum_size(::Any...)
EnzymeRules.inactive_noinl(::typeof(__accum_size), ::Any...) = nothing

@inline function _get_batch_statistics(x::AbstractArray, ::Nothing, ::Nothing,
::Val{rdims}, ::Val{false}, momentum) where {rdims}
Expand Down

2 comments on commit e829b63

@avik-pal
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/106664

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.3.23 -m "<description of version>" e829b63957b383eed6f0360b6c02232c57a23195
git push origin v0.3.23

Please sign in to comment.