-
-
Notifications
You must be signed in to change notification settings - Fork 67
/
mbconv.jl
154 lines (133 loc) · 7.05 KB
/
mbconv.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""
dwsep_conv_norm(kernel_size::Dims{2}, inplanes::Integer, outplanes::Integer,
activation = relu; norm_layer = BatchNorm, stride::Integer = 1,
bias::Bool = !(norm_layer !== identity), pad::Integer = 0, [bias, weight, init])
Create a depthwise separable convolution chain as used in MobileNetv1.
This is sequence of layers:
- a `kernel_size` depthwise convolution from `inplanes => inplanes`
- a (batch) normalisation layer + `activation` (if `norm_layer !== identity`; otherwise
`activation` is applied to the convolution output)
- a `kernel_size` convolution from `inplanes => outplanes`
- a (batch) normalisation layer + `activation` (if `norm_layer !== identity`; otherwise
`activation` is applied to the convolution output)
See Fig. 3 in [reference](https://arxiv.org/abs/1704.04861v1).
# Arguments
- `kernel_size`: size of the convolution kernel (tuple)
- `inplanes`: number of input feature maps
- `outplanes`: number of output feature maps
- `activation`: the activation function for the final layer
- `norm_layer`: the normalisation layer used. Note that using `identity` as the normalisation
layer will result in no normalisation being applied.
- `bias`: whether to use bias in the convolution layers.
- `stride`: stride of the first convolution kernel
- `pad`: padding of the first convolution kernel
- `weight`, `init`: initialization for the convolution kernel (see [`Flux.Conv`](@ref))
"""
function dwsep_conv_norm(kernel_size::Dims{2}, inplanes::Integer, outplanes::Integer,
activation = relu; norm_layer = BatchNorm, stride::Integer = 1,
bias::Bool = !(norm_layer !== identity), kwargs...)
return vcat(conv_norm(kernel_size, inplanes, inplanes, activation; norm_layer,
stride, bias, groups = inplanes, kwargs...), # depthwise convolution
conv_norm((1, 1), inplanes, outplanes, activation; norm_layer, bias)) # pointwise convolution
end
"""
mbconv(kernel_size::Dims{2}, inplanes::Integer, explanes::Integer,
outplanes::Integer, activation = relu; stride::Integer,
reduction::Union{Nothing, Real} = nothing,
se_round_fn = x -> round(Int, x), norm_layer = BatchNorm, kwargs...)
Create a basic inverted residual block for MobileNet and Efficient variants.
This is a sequence of layers:
- a 1x1 convolution from `inplanes => explanes` followed by a (batch) normalisation layer
- `activation` if `inplanes != explanes`
- a `kernel_size` depthwise separable convolution from `explanes => explanes`
- a (batch) normalisation layer
- a squeeze-and-excitation block (if `reduction != nothing`) from
`explanes => se_round_fn(explanes / reduction)` and back to `explanes`
- a 1x1 convolution from `explanes => outplanes`
- a (batch) normalisation layer + `activation`
!!! warning
This function does not handle the residual connection by default. The user must add
this manually to use this block as a standalone. To construct a model, check out the
builders, which handle the residual connection and other details.
First introduced in the MobileNetv2 paper.
(See Fig. 3 in [reference](https://arxiv.org/abs/1801.04381v4).)
# Arguments
- `kernel_size`: kernel size of the convolutional layers
- `inplanes`: number of input feature maps
- `explanes`: The number of expanded feature maps. This is the number of feature maps
after the first 1x1 convolution.
- `outplanes`: The number of output feature maps
- `activation`: The activation function for the first two convolution layer
- `stride`: The stride of the convolutional kernel, has to be either 1 or 2
- `reduction`: The reduction factor for the number of hidden feature maps
in a squeeze and excite layer (see [`squeeze_excite`](@ref))
- `se_round_fn`: The function to round the number of reduced feature maps
in the squeeze and excite layer
- `norm_layer`: The normalization layer to use
"""
function mbconv(kernel_size::Dims{2}, inplanes::Integer, explanes::Integer,
outplanes::Integer, activation = relu; stride::Integer,
reduction::Union{Nothing, Real} = nothing,
se_round_fn = x -> round(Int, x), norm_layer = BatchNorm)
@assert stride in [1, 2] "`stride` has to be 1 or 2 for `mbconv`"
layers = []
# expand
if inplanes != explanes
append!(layers,
conv_norm((1, 1), inplanes, explanes, activation; norm_layer))
end
# depthwise
append!(layers,
conv_norm(kernel_size, explanes, explanes, activation; norm_layer,
stride, pad = SamePad(), groups = explanes))
# squeeze-excite layer
if !isnothing(reduction)
push!(layers,
squeeze_excite(explanes; round_fn = se_round_fn, reduction,
activation, gate_activation = hardσ))
end
# project
append!(layers, conv_norm((1, 1), explanes, outplanes, identity))
return Chain(layers...)
end
"""
fused_mbconv(kernel_size::Dims{2}, inplanes::Integer, explanes::Integer,
outplanes::Integer, activation = relu;
stride::Integer, norm_layer = BatchNorm)
Create a fused inverted residual block.
This is a sequence of layers:
- a `kernel_size` depthwise separable convolution from `explanes => explanes`
- a (batch) normalisation layer
- a 1x1 convolution from `explanes => outplanes` followed by a (batch) normalisation
layer + `activation` if `inplanes != explanes`
!!! warning
This function does not handle the residual connection by default. The user must add
this manually to use this block as a standalone. To construct a model, check out the
builders, which handle the residual connection and other details.
Originally introduced by Google in [EfficientNet-EdgeTPU: Creating Accelerator-Optimized Neural Networks with AutoML](https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html).
Later used in the EfficientNetv2 paper.
# Arguments
- `kernel_size`: kernel size of the convolutional layers
- `inplanes`: number of input feature maps
- `explanes`: The number of expanded feature maps
- `outplanes`: The number of output feature maps
- `activation`: The activation function for the first two convolution layer
- `stride`: The stride of the convolutional kernel, has to be either 1 or 2
- `norm_layer`: The normalization layer to use
"""
function fused_mbconv(kernel_size::Dims{2}, inplanes::Integer,
explanes::Integer, outplanes::Integer, activation = relu;
stride::Integer, norm_layer = BatchNorm)
@assert stride in [1, 2] "`stride` has to be 1 or 2 for `fused_mbconv`"
layers = []
# fused expand
explanes = explanes == inplanes ? outplanes : explanes
append!(layers,
conv_norm(kernel_size, inplanes, explanes, activation; norm_layer, stride,
pad = SamePad()))
if explanes != inplanes
# project
append!(layers, conv_norm((1, 1), explanes, outplanes, identity; norm_layer))
end
return Chain(layers...)
end