From 15d1e43fcd63e96bcdeab98b3869c80cb19a0766 Mon Sep 17 00:00:00 2001 From: Dennis Yatunin Date: Tue, 31 Jan 2023 12:02:07 -0800 Subject: [PATCH 1/3] Try just removing inlines --- src/Operators/finitedifference.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Operators/finitedifference.jl b/src/Operators/finitedifference.jl index 6e8fd39a25..01a24b71ab 100644 --- a/src/Operators/finitedifference.jl +++ b/src/Operators/finitedifference.jl @@ -3275,7 +3275,7 @@ function Base.similar( return Field(Eltype, sp) end -@inline function _serial_copyto!( +function _serial_copyto!( field_out::Field, bc::Base.Broadcast.Broadcasted{S}, Ni::Int, @@ -3288,7 +3288,7 @@ end return field_out end -@inline function _threaded_copyto!( +function _threaded_copyto!( field_out::Field, bc::Base.Broadcast.Broadcasted{S}, Ni::Int, @@ -3305,7 +3305,7 @@ end return field_out end -@inline function Base.copyto!( +function Base.copyto!( field_out::Field, bc::Base.Broadcast.Broadcasted{S}, ) where {S <: AbstractStencilStyle} From fe2299a59f76c8d8259686cd344dc2bb8a1619a4 Mon Sep 17 00:00:00 2001 From: Dennis Yatunin Date: Tue, 31 Jan 2023 17:11:13 -0800 Subject: [PATCH 2/3] Try replacing Ref broadcasts with Tuple broadcasts in field_opt.jl --- src/DataLayouts/broadcast.jl | 35 ++++++++------- src/Fields/broadcast.jl | 31 ++++++++++--- src/Operators/finitedifference.jl | 1 + test/Fields/field_opt.jl | 43 +++++++++++-------- .../finitedifference/opt_examples.jl | 4 +- 5 files changed, 70 insertions(+), 44 deletions(-) diff --git a/src/DataLayouts/broadcast.jl b/src/DataLayouts/broadcast.jl index 01e504b220..69d7308bf5 100644 --- a/src/DataLayouts/broadcast.jl +++ b/src/DataLayouts/broadcast.jl @@ -407,10 +407,13 @@ end # broadcasting scalar assignment # Performance optimization for the common identity scalar case: dest .= val -@inline function Base.copyto!( +function Base.copyto!( dest::AbstractData, bc::Base.Broadcast.Broadcasted{Style}, -) where {Style <: Base.Broadcast.AbstractArrayStyle{0}} +) where { + Style <: + Union{Base.Broadcast.AbstractArrayStyle{0}, Base.Broadcast.Style{Tuple}}, +} bc = Base.Broadcast.instantiate( Base.Broadcast.Broadcasted{Style}(bc.f, bc.args, ()), ) @@ -418,7 +421,7 @@ end fill!(dest, bc0) end -@inline function Base.copyto!( +function Base.copyto!( dest::DataF{S}, bc::Union{DataF{S, A}, Base.Broadcast.Broadcasted{DataFStyle{A}}}, ) where {S, A} @@ -426,7 +429,7 @@ end return dest end -@inline function Base.copyto!( +function Base.copyto!( dest::IJFH{S, Nij}, bc::Union{IJFH{S, Nij}, Base.Broadcast.Broadcasted{<:IJFHStyle{Nij}}}, ) where {S, Nij} @@ -439,7 +442,7 @@ end return dest end -@inline function Base.copyto!( +function Base.copyto!( dest::IFH{S, Ni}, bc::Union{IFH{S, Ni}, Base.Broadcast.Broadcasted{<:IFHStyle{Ni}}}, ) where {S, Ni} @@ -453,7 +456,7 @@ end end # inline inner slab(::DataSlab2D) copy -@inline function Base.copyto!( +function Base.copyto!( dest::IJF{S, Nij}, bc::Union{IJF{S, Nij, A}, Base.Broadcast.Broadcasted{IJFStyle{Nij, A}}}, ) where {S, Nij, A} @@ -465,7 +468,7 @@ end end # inline inner slab(::DataSlab1D) copy -@inline function Base.copyto!( +function Base.copyto!( dest::IF{S, Ni}, bc::Base.Broadcast.Broadcasted{IFStyle{Ni, A}}, ) where {S, Ni, A} @@ -477,7 +480,7 @@ end end # inline inner column(::DataColumn) copy -@inline function Base.copyto!( +function Base.copyto!( dest::VF{S}, bc::Union{VF{S, A}, Base.Broadcast.Broadcasted{VFStyle{A}}}, ) where {S, A} @@ -489,7 +492,7 @@ end return dest end -@inline function _serial_copyto!( +function _serial_copyto!( dest::VIFH{S, Ni}, bc::Union{VIFH{S, Ni, A}, Base.Broadcast.Broadcasted{VIFHStyle{Ni, A}}}, ) where {S, Ni, A} @@ -503,7 +506,7 @@ end return dest end -@inline function _threaded_copyto!( +function _threaded_copyto!( dest::VIFH{S, Ni}, bc::Base.Broadcast.Broadcasted{VIFHStyle{Ni, A}}, ) where {S, Ni, A} @@ -522,14 +525,14 @@ end return dest end -@inline function Base.copyto!( +function Base.copyto!( dest::VIFH{S, Ni}, source::VIFH{S, Ni, A}, ) where {S, Ni, A} return _serial_copyto!(dest, source) end -@inline function Base.copyto!( +function Base.copyto!( dest::VIFH{S, Ni}, bc::Base.Broadcast.Broadcasted{VIFHStyle{Ni, A}}, ) where {S, Ni, A} @@ -539,7 +542,7 @@ end return _serial_copyto!(dest, bc) end -@inline function _serial_copyto!( +function _serial_copyto!( dest::VIJFH{S, Nij}, bc::Union{VIJFH{S, Nij, A}, Base.Broadcast.Broadcasted{VIJFHStyle{Nij, A}}}, ) where {S, Nij, A} @@ -553,7 +556,7 @@ end return dest end -@inline function _threaded_copyto!( +function _threaded_copyto!( dest::VIJFH{S, Nij}, bc::Base.Broadcast.Broadcasted{VIJFHStyle{Nij, A}}, ) where {S, Nij, A} @@ -572,14 +575,14 @@ end return dest end -@inline function Base.copyto!( +function Base.copyto!( dest::VIJFH{S, Nij}, source::VIJFH{S, Nij, A}, ) where {S, Nij, A} return _serial_copyto!(dest, source) end -@inline function Base.copyto!( +function Base.copyto!( dest::VIJFH{S, Nij}, bc::Base.Broadcast.Broadcasted{VIJFHStyle{Nij, A}}, ) where {S, Nij, A} diff --git a/src/Fields/broadcast.jl b/src/Fields/broadcast.jl index fc0d247fa2..9ba25dc58d 100644 --- a/src/Fields/broadcast.jl +++ b/src/Fields/broadcast.jl @@ -18,10 +18,15 @@ FieldStyle(x::Base.Broadcast.Unknown) = x Base.Broadcast.BroadcastStyle(::Type{Field{V, S}}) where {V, S} = FieldStyle(DataStyle(V)) +# Broadcasting over scalars (Ref or Tuple) Base.Broadcast.BroadcastStyle( ::Base.Broadcast.AbstractArrayStyle{0}, fs::AbstractFieldStyle, ) = fs +Base.Broadcast.BroadcastStyle( + ::Base.Broadcast.Style{Tuple}, + fs::AbstractFieldStyle, +) = fs Base.Broadcast.BroadcastStyle( ::FieldStyle{DS1}, @@ -42,15 +47,14 @@ _first(data::DataLayouts.VF) = data[] _first(field::Field) = _first_data_layout(field_values(column(field, 1, 1, 1))) _first(space::Spaces.AbstractSpace) = _first_data_layout(field_values(column(space, 1, 1, 1))) -_first(bc::Base.Broadcast.Broadcasted) = _first.(bc.args) # Is this case necessary? -_first(x::Base.RefValue{T}) where {T} = x -unref(x::Ref{T}) where {T} = x.x -unref(T) = T +_first(bc::Base.Broadcast.Broadcasted) = _first(copy(bc)) +_first(x::Ref{T}) where {T} = x.x +_first(x::Tuple{T}) where {T} = x[1] function call_with_first(bc) # Try calling with first applied to all arguments: bc′ = Base.Broadcast.preprocess(nothing, bc) - first_args = map(arg -> unref(_first(arg)), bc′.args) + first_args = map(arg -> _first(arg), bc′.args) bc.f(first_args...) end @@ -183,8 +187,8 @@ end end return space1 end -@inline Base.Broadcast.broadcast_shape(space::AbstractSpace, ::Tuple{}) = space -@inline Base.Broadcast.broadcast_shape(::Tuple{}, space::AbstractSpace) = space +@inline Base.Broadcast.broadcast_shape(space::AbstractSpace, ::Tuple) = space +@inline Base.Broadcast.broadcast_shape(::Tuple, space::AbstractSpace) = space @inline Base.Broadcast.broadcast_shape( pointspace::AbstractPointSpace, @@ -232,6 +236,12 @@ end ) return nothing end +@inline function Base.Broadcast.check_broadcast_shape( + ::AbstractSpace, + ::Tuple{T}, +) where {T} + return nothing +end @inline function Base.Broadcast.check_broadcast_shape( ::AbstractSpace, ::AbstractPointSpace, @@ -380,6 +390,13 @@ function Base.Broadcast.copyto!( copyto!(Fields.field_values(field), bc) return field end +function Base.Broadcast.copyto!( + field::Field, + bc::Base.Broadcast.Broadcasted{Base.Broadcast.Style{Tuple}}, +) + copyto!(Fields.field_values(field), bc) + return field +end function Base.Broadcast.copyto!(field::Field, nt::NamedTuple) copyto!( diff --git a/src/Operators/finitedifference.jl b/src/Operators/finitedifference.jl index 01a24b71ab..27851f9c1a 100644 --- a/src/Operators/finitedifference.jl +++ b/src/Operators/finitedifference.jl @@ -3109,6 +3109,7 @@ Base.@propagate_inbounds function getidx( end # unwap boxed scalars +@inline getidx(scalar::Tuple{T}, loc::Location, idx, hidx) where {T} = scalar[1] @inline getidx(scalar::Ref, loc::Location, idx, hidx) = scalar[] @inline getidx(field::Fields.PointField, loc::Location, idx, hidx) = field[] @inline getidx(field::Fields.PointField, loc::Location, idx) = field[] diff --git a/test/Fields/field_opt.jl b/test/Fields/field_opt.jl index f2bd65baea..89a57fdd67 100644 --- a/test/Fields/field_opt.jl +++ b/test/Fields/field_opt.jl @@ -1,4 +1,5 @@ # These tests require running with `--check-bounds=[auto|no]` +using Revise using Test using StaticArrays, IntervalSets import ClimaCore @@ -19,15 +20,15 @@ end include(joinpath(@__DIR__, "util_spaces.jl")) # https://github.com/CliMA/ClimaCore.jl/issues/946 -@testset "Allocations with broadcasting Refs" begin +@testset "Allocations with broadcasting Scalars" begin FT = Float64 function foo!(Yx::Fields.Field) - Yx .= Ref(1) .+ Yx + Yx .= (1,) .+ Yx return nothing end function foocolumn!(Yx::Fields.Field) Fields.bycolumn(axes(Yx)) do colidx - Yx[colidx] .= Ref(1) .+ Yx[colidx] + Yx[colidx] .= (1,) .+ Yx[colidx] nothing end return nothing @@ -58,7 +59,7 @@ end nothing end function callfill!(Y) - fill!(Y, Ref((; x = 2.0))) + fill!(Y, ((; x = 2.0),)) nothing end for space in all_spaces(FT) @@ -80,13 +81,13 @@ function allocs_test1!(Y) x = Y.x FT = Spaces.undertype(axes(x)) I = sc(FT) - x .= x .+ Ref(I) + x .= x .+ (I,) nothing end function allocs_test2!(Y) x = Y.x FT = Spaces.undertype(axes(x)) - IR = Ref(sc(FT)) + IR = (sc(FT),) @. x += IR nothing end @@ -96,7 +97,7 @@ function allocs_test1_column!(Y) FT = Spaces.undertype(axes(x)) # I = sc(FT) I = Operators.StencilCoefs{-1, 1}((zero(FT), one(FT), zero(FT))) - x[colidx] .= x[colidx] .+ Ref(I) + x[colidx] .= x[colidx] .+ (I,) end nothing end @@ -104,7 +105,7 @@ function allocs_test2_column!(Y) Fields.bycolumn(axes(Y.x)) do colidx x = Y.x FT = Spaces.undertype(axes(x)) - IR = Ref(sc(FT)) + IR = (sc(FT),) @. x[colidx] += IR end nothing @@ -119,10 +120,10 @@ end function allocs_test3_column!(x) FT = Spaces.undertype(axes(x)) - IR = Ref(Operators.StencilCoefs{-1, 1}((zero(FT), one(FT), zero(FT)))) + IR = (Operators.StencilCoefs{-1, 1}((zero(FT), one(FT), zero(FT))),) @. x += IR I = Operators.StencilCoefs{-1, 1}((zero(FT), one(FT), zero(FT))) - x .+= Ref(I) + x .+= (I,) nothing end @@ -154,9 +155,9 @@ end end nothing -function allocs_test_Ref_with_compose!(S, ∂ᶠ𝕄ₜ∂ᶜρ, ∂ᶜρₜ∂ᶠ𝕄) +function allocs_test_scalar_with_compose!(S, ∂ᶠ𝕄ₜ∂ᶜρ, ∂ᶜρₜ∂ᶠ𝕄) Fields.bycolumn(axes(S)) do colidx - allocs_test_Ref_with_compose_column!( + allocs_test_scalar_with_compose_column!( S[colidx], ∂ᶠ𝕄ₜ∂ᶜρ[colidx], ∂ᶜρₜ∂ᶠ𝕄[colidx], @@ -165,15 +166,15 @@ function allocs_test_Ref_with_compose!(S, ∂ᶠ𝕄ₜ∂ᶜρ, ∂ᶜρₜ∂ nothing end -function allocs_test_Ref_with_compose_column!(S, ∂ᶠ𝕄ₜ∂ᶜρ, ∂ᶜρₜ∂ᶠ𝕄) +function allocs_test_scalar_with_compose_column!(S, ∂ᶠ𝕄ₜ∂ᶜρ, ∂ᶜρₜ∂ᶠ𝕄) compose = Operators.ComposeStencils() FT = Spaces.undertype(axes(S)) - IR = Ref(Operators.StencilCoefs{-1, 1}((zero(FT), one(FT), zero(FT)))) + IR = (Operators.StencilCoefs{-1, 1}((zero(FT), one(FT), zero(FT))),) @. S = compose(∂ᶠ𝕄ₜ∂ᶜρ, ∂ᶜρₜ∂ᶠ𝕄) - IR nothing end -@testset "Allocations StencilCoefs Ref with ComposeStencils broadcasting" begin +@testset "Allocations StencilCoefs scalar with ComposeStencils broadcasting" begin FT = Float64 for space in all_spaces(FT) space isa Spaces.CenterExtrudedFiniteDifferenceSpace || continue @@ -185,12 +186,16 @@ end tridiag_type = Operators.StencilCoefs{-1, 1, NTuple{3, FT}} S = Fields.Field(tridiag_type, fspace) - allocs_test_Ref_with_compose!(S, ∂ᶠ𝕄ₜ∂ᶜρ, ∂ᶜρₜ∂ᶠ𝕄) - p = @allocated allocs_test_Ref_with_compose!(S, ∂ᶠ𝕄ₜ∂ᶜρ, ∂ᶜρₜ∂ᶠ𝕄) + allocs_test_scalar_with_compose!(S, ∂ᶠ𝕄ₜ∂ᶜρ, ∂ᶜρₜ∂ᶠ𝕄) + p = @allocated allocs_test_scalar_with_compose!(S, ∂ᶠ𝕄ₜ∂ᶜρ, ∂ᶜρₜ∂ᶠ𝕄) @test p == 0 - allocs_test_Ref_with_compose_column!(S, ∂ᶠ𝕄ₜ∂ᶜρ, ∂ᶜρₜ∂ᶠ𝕄) - p = @allocated allocs_test_Ref_with_compose_column!(S, ∂ᶠ𝕄ₜ∂ᶜρ, ∂ᶜρₜ∂ᶠ𝕄) + allocs_test_scalar_with_compose_column!(S, ∂ᶠ𝕄ₜ∂ᶜρ, ∂ᶜρₜ∂ᶠ𝕄) + p = @allocated allocs_test_scalar_with_compose_column!( + S, + ∂ᶠ𝕄ₜ∂ᶜρ, + ∂ᶜρₜ∂ᶠ𝕄, + ) @test p == 0 end end diff --git a/test/Operators/finitedifference/opt_examples.jl b/test/Operators/finitedifference/opt_examples.jl index 64590e65fa..757a11648f 100644 --- a/test/Operators/finitedifference/opt_examples.jl +++ b/test/Operators/finitedifference/opt_examples.jl @@ -98,7 +98,7 @@ function alloc_test_derivative(cfield, ffield, ∇c, ∇f) p = @allocated begin c∇closure() end - @test_broken p == 0 + @test p == 0 ##### C2F # wvec = Geometry.WVector # cannot re-define, otherwise many allocations @@ -168,7 +168,7 @@ function alloc_test_operators_in_loops(cfield, ffield) p = @allocated begin c∇closure() end - @test_broken p == 0 + @test p == 0 end end function alloc_test_nested_expressions_1(cfield, ffield) From 18a9cf4d486f70f77472b57c459aa7d1b3695d7a Mon Sep 17 00:00:00 2001 From: Dennis Yatunin Date: Tue, 31 Jan 2023 21:52:13 -0800 Subject: [PATCH 3/3] Remove accidental --- test/Fields/field_opt.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/test/Fields/field_opt.jl b/test/Fields/field_opt.jl index 89a57fdd67..8539373b79 100644 --- a/test/Fields/field_opt.jl +++ b/test/Fields/field_opt.jl @@ -1,5 +1,4 @@ # These tests require running with `--check-bounds=[auto|no]` -using Revise using Test using StaticArrays, IntervalSets import ClimaCore