FluxML · mcabbott · Oct 10, 2022 · Oct 5, 2022 · Oct 5, 2022 · Oct 5, 2022
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,8 @@
 # Flux Release Notes
 
+## v0.13.7
+* Added [`@autosize` macro](https://github.com/FluxML/Flux.jl/pull/2078)
+
 ## v0.13.4
 * Added [`PairwiseFusion` layer](https://github.com/FluxML/Flux.jl/pull/1983)
 

diff --git a/src/Flux.jl b/src/Flux.jl
@@ -55,6 +55,7 @@ include("layers/show.jl")
 include("loading.jl")
 
 include("outputsize.jl")
+export @autosize
 
 include("data/Data.jl")
 using .Data

diff --git a/src/outputsize.jl b/src/outputsize.jl
@@ -147,8 +147,12 @@ outputsize(m::AbstractVector, input::Tuple...; padbatch=false) = outputsize(Chai
 
 ## bypass statistics in normalization layers
 
-for layer in (:LayerNorm, :BatchNorm, :InstanceNorm, :GroupNorm)
-  @eval (l::$layer)(x::AbstractArray{Nil}) = x
+for layer in (:BatchNorm, :InstanceNorm, :GroupNorm)  # LayerNorm works fine
+  @eval function (l::$layer)(x::AbstractArray{Nil})
+    l.chs == size(x, ndims(x)-1) || throw(DimensionMismatch(
+      string($layer, " expected ", l.chs, " channels, but got size(x) == ", size(x))))
+    x
+  end
 end
 
 ## fixes for layers that don't work out of the box
@@ -168,3 +172,165 @@ for (fn, Dims) in ((:conv, DenseConvDims),)
     end
   end
 end
+
+
+"""
+    @autosize (size...,) Chain(Layer(_ => 2), Layer(_), ...)
+
+Returns the specified model, with each `_` replaced by an inferred number,
+for input of the given `size`.
+
+The unknown sizes are usually the second-last dimension of that layer's input,
+which Flux regards as the channel dimension.
+(A few layers, `Dense` & [`LayerNorm`](@ref), instead always use the first dimension.)
+The underscore may appear as an argument of a layer, or inside a `=>`.
+It may be used in further calculations, such as `Dense(_ => _÷4)`.
+
+# Examples
+```
+julia> @autosize (3, 1) Chain(Dense(_ => 2, sigmoid), BatchNorm(_, affine=false))
+Chain(
+  Dense(3 => 2, σ),                     # 8 parameters
+  BatchNorm(2, affine=false),
+) 
+
+julia> img = [28, 28];
+
+julia> @autosize (img..., 1, 32) Chain(              # size is only needed at runtime
+          Chain(c = Conv((3,3), _ => 5; stride=2, pad=SamePad()),
+                p = MeanPool((3,3)),
+                b = BatchNorm(_),
+                f = Flux.flatten),
+          Dense(_ => _÷4, relu, init=Flux.rand32),   # can calculate output size _÷4
+          SkipConnection(Dense(_ => _, relu), +),
+          Dense(_ => 10),
+       ) |> gpu                                      # moves to GPU after initialisation
+Chain(
+  Chain(
+    c = Conv((3, 3), 1 => 5, pad=1, stride=2),  # 50 parameters
+    p = MeanPool((3, 3)),
+    b = BatchNorm(5),                   # 10 parameters, plus 10
+    f = Flux.flatten,
+  ),
+  Dense(80 => 20, relu),                # 1_620 parameters
+  SkipConnection(
+    Dense(20 => 20, relu),              # 420 parameters
+    +,
+  ),
+  Dense(20 => 10),                      # 210 parameters
+)         # Total: 10 trainable arrays, 2_310 parameters,
+          # plus 2 non-trainable, 10 parameters, summarysize 10.469 KiB.
+
+julia> outputsize(ans, (28, 28, 1, 32))
+(10, 32)
+```
+
+Limitations:
+* While `@autosize (5, 32) Flux.Bilinear(_ => 7)` is OK, something like `Bilinear((_, _) => 7)` will fail.
+* While `Scale(_)` and `LayerNorm(_)` are fine (and use the first dimension), `Scale(_,_)` and `LayerNorm(_,_)`
+  will fail if `size(x,1) != size(x,2)`.
+* RNNs won't work: `@autosize (7, 11) LSTM(_ => 5)` fails, because `outputsize(RNN(3=>7), (3,))` also fails, a known issue.
+"""
+macro autosize(size, model)
+  Meta.isexpr(size, :tuple) || error("@autosize's first argument must be a tuple, the size of the input")
+  Meta.isexpr(model, :call) || error("@autosize's second argument must be something like Chain(layers...)")
+  ex = _makelazy(model)
+  @gensym m
+  quote
+    $m = $ex
+    $outputsize($m, $size)
+    $striplazy($m)
+  end |> esc
+end
+
+function _makelazy(ex::Expr)
+  n = _underscoredepth(ex)
+  n == 0 && return ex
+  n == 1 && error("@autosize doesn't expect an underscore here: $ex")
+  n == 2 && return :($LazyLayer($(string(ex)), $(_makefun(ex)), nothing))
+  n > 2 && return Expr(ex.head, ex.args[1], map(_makelazy, ex.args[2:end])...)
+end
+_makelazy(x) = x
+
+function _underscoredepth(ex::Expr)
+  # Meta.isexpr(ex, :tuple) && :_ in ex.args && return 10
+  ex.head in (:call, :kw, :(->), :block) || return 0
+  ex.args[1] == :(=>) && ex.args[2] == :_ && return 1
+  m = maximum(_underscoredepth, ex.args)
+  m == 0 ? 0 : m+1
+end
+_underscoredepth(ex) = Int(ex == :_)
+
+function _makefun(ex)
+  T = Meta.isexpr(ex, :call) ? ex.args[1] : Type
+  @gensym x s
+  Expr(:(->), x, Expr(:block, :($s = $autosizefor($T, $x)), _replaceunderscore(ex, s)))
+end
+
+"""
+    autosizefor(::Type, x)
+
+If an `_` in your layer's constructor, used within `@autosize`, should
+*not* mean the 2nd-last dimension, then you can overload this.
+
+For instance `autosizefor(::Type{<:Dense}, x::AbstractArray) = size(x, 1)`
+is needed to make `@autosize (2,3,4) Dense(_ => 5)` return 
+`Dense(2 => 5)` rather than `Dense(3 => 5)`.
+"""
+autosizefor(::Type, x::AbstractArray) = size(x, max(1, ndims(x)-1))
+autosizefor(::Type{<:Dense}, x::AbstractArray) = size(x, 1)
+autosizefor(::Type{<:LayerNorm}, x::AbstractArray) = size(x, 1)
+
+_replaceunderscore(e, s) = e == :_ ? s : e
+_replaceunderscore(ex::Expr, s) = Expr(ex.head, map(a -> _replaceunderscore(a, s), ex.args)...)
+
+mutable struct LazyLayer
+  str::String
+  make::Function
+  layer
+end
+
+function (l::LazyLayer)(x::AbstractArray)
+  l.layer == nothing || return l.layer(x)
+  lay = l.make(x)
+  y = lay(x)
+  l.layer = lay  # mutate after we know that call worked
+  return y
+end
+
+#=
+
+Flux.outputsize(Chain(Dense(2=>3)), (4,))  # nice error
+Flux.outputsize(Dense(2=>3), (4,))  # no nice error
+@autosize (4,) Dense(2=>3)  # no nice error
+
+@autosize (3,) Dense(2 => _)  # shouldn't work, weird error
+
+
+@autosize (3,5,6) LayerNorm(_,_)  # no complaint, but
+ans(rand(3,5,6))  # this fails
+
+=#
+
+@functor LazyLayer
+
+function striplazy(x)
+  fs, re = functor(x)
+  re(map(striplazy, fs))
+end
+striplazy(l::LazyLayer) = l.layer == nothing ? error("should be initialised!") : l.layer
+
+# Could make LazyLayer usable outside of @autosize, for instance allow Chain(@lazy Dense(_ => 2))?
+# But then it will survive to produce weird structural gradients etc. 
+
+function Base.show(io::IO, l::LazyLayer)
+  printstyled(io, "LazyLayer(", color=:light_black)
+  if l.layer == nothing
+    printstyled(io, l.str, color=:red)
+  else
+    printstyled(io, l.layer, color=:green)
+  end
+  printstyled(io, ")", color=:light_black)
+end
+
+_big_show(io::IO, l::LazyLayer, indent::Int=0, name=nothing) = _layer_show(io, l, indent, name)
diff --git a/test/outputsize.jl b/test/outputsize.jl
@@ -142,16 +142,81 @@ end
   m = LayerNorm(32)
   @test outputsize(m, (32, 32, 3, 16)) == (32, 32, 3, 16)
   @test outputsize(m, (32, 32, 3); padbatch=true) == (32, 32, 3, 1)
+  m2 = LayerNorm(3, 2)
+  @test outputsize(m2, (3, 2)) == (3, 2) == size(m2(randn(3, 2)))
+  @test outputsize(m2, (3,)) == (3, 2) == size(m2(randn(3, 2)))
 
   m = BatchNorm(3)
   @test outputsize(m, (32, 32, 3, 16)) == (32, 32, 3, 16)
   @test outputsize(m, (32, 32, 3); padbatch=true) == (32, 32, 3, 1)
+  @test_throws Exception m(randn(Float32, 32, 32, 5, 1))
+  @test_throws DimensionMismatch outputsize(m, (32, 32, 5, 1))
 
   m = InstanceNorm(3)
   @test outputsize(m, (32, 32, 3, 16)) == (32, 32, 3, 16)
   @test outputsize(m, (32, 32, 3); padbatch=true) == (32, 32, 3, 1)
+  @test_throws Exception m(randn(Float32, 32, 32, 5, 1))
+  @test_throws DimensionMismatch outputsize(m, (32, 32, 5, 1))
 
   m = GroupNorm(16, 4)
   @test outputsize(m, (32, 32, 16, 16)) == (32, 32, 16, 16)
   @test outputsize(m, (32, 32, 16); padbatch=true) == (32, 32, 16, 1)
+  @test_throws Exception m(randn(Float32, 32, 32, 15, 4))
+  @test_throws DimensionMismatch outputsize(m, (32, 32, 15, 4))
 end
+
+@testset "autosize macro" begin
+  m = @autosize (3,) Dense(_ => 4)
+  @test randn(3) |> m |> size == (4,)
+
+  m = @autosize (3, 1) Chain(Dense(_ => 4), Dense(4 => 10), softmax)
+  @test randn(3, 5) |> m |> size == (10, 5)
+
+  m = @autosize (2, 3, 4, 5) Dense(_ => 10)  # goes by first dim, not 2nd-last
+  @test randn(2, 3, 4, 5) |> m |> size == (10, 3, 4, 5)
+
+  m = @autosize (9,) Dense(_ => div(_,2))
+  @test randn(9) |> m |> size == (4,)
+
+  m = @autosize (3,) Chain(one = Dense(_ => 4), two = softmax)  # needs kw
+  @test randn(3) |> m |> size == (4,)
+
+  m = @autosize (3, 45) Maxout(() -> Dense(_ => 6, tanh), 2)    # needs ->, block
+  @test randn(3, 45) |> m |> size == (6, 45)
+
+  # here Parallel gets two inputs, no problem:
+  m = @autosize (3,) Chain(SkipConnection(Dense(_ => 4), Parallel(vcat, Dense(_ => 5), Dense(_ => 6))), Flux.Scale(_))
+  @test randn(3) |> m |> size == (11,)
+
+  # like Dense, LayerNorm goes by the first dimension:
+  m = @autosize (3, 4, 5) LayerNorm(_)
+  @test rand(3, 6, 7) |> m |> size == (3, 6, 7)
+
+  m = @autosize (3, 3, 10) LayerNorm(_, _)  # does not check that sizes match
+  @test rand(3, 3, 10) |> m |> size == (3, 3, 10)
+
+  m = @autosize (3,) Flux.Bilinear(_ => 10)
+  @test randn(3) |> m |> size == (10,)
+
+  m = @autosize (3, 1) Flux.Bilinear(_ => 10)
+  @test randn(3, 4) |> m |> size == (10, 4)
+
+  @test_throws Exception @eval @autosize (3,) Flux.Bilinear((_,3) => 10)
+
+  # first docstring example
+  m = @autosize (3, 1) Chain(Dense(_ => 2, sigmoid), BatchNorm(_, affine=false))
+  @test randn(3, 4) |> m |> size == (2, 4)
+
+  # evil docstring example
+  img = [28, 28];
+  m = @autosize (img..., 1, 32) Chain(              # size is only needed at runtime
+         Chain(c = Conv((3,3), _ => 5; stride=2, pad=SamePad()),
+               p = MeanPool((3,3)),
+               b = BatchNorm(_),
+               f = Flux.flatten),
+         Dense(_ => _÷4, relu, init=Flux.rand32),   # can calculate output size _÷4
+         SkipConnection(Dense(_ => _, relu), +),
+         Dense(_ => 10),
+      ) |> gpu                                      # moves to GPU after initialisation
+  @test randn(Float32, img..., 1, 32) |> gpu |> m |> size == (10, 32)
+end