diff --git a/.travis.yml b/.travis.yml index ab560b3b4..044dee26a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,24 +9,9 @@ notifications: email: false git: depth: 99999999 +sudo: required +dist: trusty -## uncomment the following lines to allow failures on nightly julia -## (tests will run but not make your overall status red) -#matrix: -# allow_failures: -# - julia: nightly - -## uncomment and modify the following lines to manually install system packages -#addons: -# apt: # apt-get for linux -# packages: -# - gfortran -#before_script: # homebrew for mac -# - if [ $TRAVIS_OS_NAME = osx ]; then brew install gcc; fi - -## uncomment the following lines to override the default test script -#script: -# - julia -e 'Pkg.clone(pwd()); Pkg.build("Metalhead"); Pkg.test("Metalhead"; coverage=true)' after_success: # push coverage results to Coveralls - julia -e 'cd(Pkg.dir("Metalhead")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())' diff --git a/README.md b/README.md index bd251a5e4..c723fa802 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,22 @@ julia> vgg.layers[1:21](x) ⋮ ``` +# Available Models for Object Classification + +1. VGG - VGG11, VGG13, VGG16, VGG19 +2. ResNet - ResNet18, ResNet34, ResNet50, ResNet101, ResNet152 +3. GoogleNet +4. SqueezeNet - v1.0 and v1.1 +5. DenseNet - DenseNet121, DenseNet169, DenseNet201, DenseNet264 + +# Available Pretrained Models for Object Classification + +1. VGG19 +2. ResNet50 +3. GoogleNet +4. SqueezeNet v1.1 +5. DenseNet121 + # Working with common datasets Metalhead includes support for wokring with several common object recognition datasets. The `datasets()` function will attempt to auto-detect any common dataset placed in diff --git a/src/Metalhead.jl b/src/Metalhead.jl index 151ed87c4..b7f8b1281 100644 --- a/src/Metalhead.jl +++ b/src/Metalhead.jl @@ -5,7 +5,12 @@ using Flux, Images, ImageFiltering, BSON, REPL, Requires, Statistics using Flux: @treelike # Models -export VGG19, SqueezeNet, DenseNet, ResNet, GoogleNet +export VGG19, VGG16, VGG13, VGG11, SqueezeNet, DenseNet121, + DenseNet169, DenseNet201, DenseNet264, ResNet18, + ResNet34, ResNet50, ResNet101, ResNet152, GoogleNet + +# Trained Models Loader +export trained # Useful re-export from Images export load @@ -30,7 +35,7 @@ include("display/terminal.jl") include("datasets/imagenet.jl") include("datasets/cifar10.jl") include("datasets/autodetect.jl") -include("vgg19.jl") +include("vgg.jl") include("squeezenet.jl") include("densenet.jl") include("resnet.jl") diff --git a/src/densenet.jl b/src/densenet.jl index edee81d10..a4a134489 100644 --- a/src/densenet.jl +++ b/src/densenet.jl @@ -14,7 +14,7 @@ Bottleneck(in_planes, growth_rate) = Bottleneck( Transition(chs::Pair{<:Int, <:Int}) = Chain(BatchNorm(chs[1], relu), Conv((1, 1), chs), - x -> meanpool(x, (2, 2))) + MeanPool((2, 2))) function _make_dense_layers(block, in_planes, growth_rate, nblock) local layers = [] @@ -25,70 +25,130 @@ function _make_dense_layers(block, in_planes, growth_rate, nblock) Chain(layers...) end -function _densenet(nblocks = [6, 12, 24, 16]; block = Bottleneck, growth_rate = 32, reduction = 0.5, num_classes = 1000) - num_planes = 2growth_rate - layers = [] - push!(layers, Conv((7, 7), 3=>num_planes, stride = (2, 2), pad = (3, 3))) - push!(layers, BatchNorm(num_planes, relu)) - push!(layers, x -> maxpool(x, (3, 3), stride = (2, 2), pad = (1, 1))) - - for i in 1:3 - push!(layers, _make_dense_layers(block, num_planes, growth_rate, nblocks[i])) - num_planes += nblocks[i] * growth_rate - out_planes = Int(floor(num_planes * reduction)) - push!(layers, Transition(num_planes=>out_planes)) - num_planes = out_planes - end - - push!(layers, _make_dense_layers(block, num_planes, growth_rate, nblocks[4])) - num_planes += nblocks[4] * growth_rate - push!(layers, BatchNorm(num_planes, relu)) - - Chain(layers..., x -> meanpool(x, (7, 7)), - x -> reshape(x, :, size(x, 4)), - Dense(num_planes, num_classes), softmax) -end - -function densenet_layers() +function trained_densenet121_layers() weight = Metalhead.weights("densenet.bson") weights = Dict{Any, Any}() for ele in keys(weight) - weights[string(ele)] = convert(Array{Float64, N} where N ,weight[ele]) + weights[string(ele)] = weight[ele] end - ls = _densenet() - ls[1].weight.data .= weights["conv1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:] + ls = load_densenet(densenet_configs["densenet121"]...) + ls[1].weight.data .= flipkernel(weights["conv1_w_0"]) ls[2].β.data .= weights["conv1/bn_b_0"] ls[2].γ.data .= weights["conv1/bn_w_0"] + ls[2].σ² .= weights["conv1/bn_var_0"] + ls[2].μ .= weights["conv1/bn_mean_0"] l = 4 for (c, n) in enumerate([6, 12, 24, 16]) for i in 1:n for j in [2, 4] - ls[l][i].layer[j].weight.data .= weights["conv$(c+1)_$i/x$(j÷2)_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:] + ls[l][i].layer[j].weight.data .= flipkernel(weights["conv$(c+1)_$i/x$(j÷2)_w_0"]) ls[l][i].layer[j-1].β.data .= weights["conv$(c+1)_$i/x$(j÷2)/bn_b_0"] ls[l][i].layer[j-1].γ.data .= weights["conv$(c+1)_$i/x$(j÷2)/bn_w_0"] + ls[l][i].layer[j-1].σ² .= weights["conv$(c+1)_$i/x$(j÷2)/bn_var_0"] + ls[l][i].layer[j-1].μ .= weights["conv$(c+1)_$i/x$(j÷2)/bn_mean_0"] end end l += 2 end for i in [5, 7, 9] # Transition Block Conv Layers - ls[i][2].weight.data .= weights["conv$(i÷2)_blk_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:] + ls[i][2].weight.data .= flipkernel(weights["conv$(i÷2)_blk_w_0"]) ls[i][1].β.data .= weights["conv$(i÷2)_blk/bn_b_0"] ls[i][1].γ.data .= weights["conv$(i÷2)_blk/bn_w_0"] + ls[i][1].σ² .= weights["conv$(i÷2)_blk/bn_var_0"] + ls[i][1].μ .= weights["conv$(i÷2)_blk/bn_mean_0"] end + ls[11].β.data .= weights["conv5_blk/bn_b_0"] + ls[11].γ.data .= weights["conv5_blk/bn_w_0"] + ls[11].σ² .= weights["conv5_blk/bn_var_0"] + ls[11].μ .= weights["conv5_blk/bn_mean_0"] ls[end-1].W.data .= transpose(dropdims(weights["fc6_w_0"], dims = (1, 2))) # Dense Layers ls[end-1].b.data .= weights["fc6_b_0"] Flux.testmode!(ls) return ls end -struct DenseNet <: ClassificationModel{ImageNet.ImageNet1k} +function load_densenet(block, nblocks; growth_rate = 32, reduction = 0.5, num_classes = 1000) + num_planes = 2growth_rate + layers = [] + push!(layers, Conv((7, 7), 3=>num_planes, stride = (2, 2), pad = (3, 3))) + push!(layers, BatchNorm(num_planes, relu)) + push!(layers, MaxPool((3, 3), stride = (2, 2), pad = (1, 1))) + + for i in 1:3 + push!(layers, _make_dense_layers(block, num_planes, growth_rate, nblocks[i])) + num_planes += nblocks[i] * growth_rate + out_planes = Int(floor(num_planes * reduction)) + push!(layers, Transition(num_planes=>out_planes)) + num_planes = out_planes + end + + push!(layers, _make_dense_layers(block, num_planes, growth_rate, nblocks[4])) + num_planes += nblocks[4] * growth_rate + push!(layers, BatchNorm(num_planes, relu)) + + Chain(layers..., MeanPool((7, 7)), + x -> reshape(x, :, size(x, 4)), + Dense(num_planes, num_classes), softmax) +end + +densenet_configs = + Dict("densenet121" => (Bottleneck, [6, 12, 24, 16]), + "densenet169" => (Bottleneck, [6, 12, 32, 32]), + "densenet201" => (Bottleneck, [6, 12, 48, 32]), + "densenet264" => (Bottleneck, [6, 12, 64, 48])) + +struct DenseNet121 <: ClassificationModel{ImageNet.ImageNet1k} + layers::Chain +end + +DenseNet121() = DenseNet121(load_densenet(densenet_configs["densenet121"]...)) + +trained(::Type{DenseNet121}) = DenseNet121(trained_densenet121_layers()) + +Base.show(io::IO, ::DenseNet121) = print(io, "DenseNet121()") + +@treelike DenseNet121 + +(m::DenseNet121)(x) = m.layers(x) + +struct DenseNet169 <: ClassificationModel{ImageNet.ImageNet1k} + layers::Chain +end + +DenseNet169() = DenseNet169(load_densenet(densenet_configs["densenet169"]...)) + +trained(::Type{DenseNet169}) = error("Pretrained Weights for DenseNet169 are not available") + +Base.show(io::IO, ::DenseNet169) = print(io, "DenseNet169()") + +@treelike DenseNet169 + +(m::DenseNet169)(x) = m.layers(x) + +struct DenseNet201 <: ClassificationModel{ImageNet.ImageNet1k} layers::Chain end -DenseNet() = DenseNet(densenet_layers()) +DenseNet201() = DenseNet201(load_densenet(densenet_configs["densenet201"]...)) + +trained(::Type{DenseNet201}) = error("Pretrained Weights for DenseNet201 are not available") + +Base.show(io::IO, ::DenseNet201) = print(io, "DenseNet201()") + +@treelike DenseNet201 + +(m::DenseNet201)(x) = m.layers(x) + +struct DenseNet264 <: ClassificationModel{ImageNet.ImageNet1k} + layers::Chain +end + +DenseNet264() = DenseNet264(load_densenet(densenet_configs["densenet264"]..., growth_rate=48)) + +trained(::Type{DenseNet264}) = error("Pretrained Weights for DenseNet264 are not available") -Base.show(io::IO, ::DenseNet) = print(io, "DenseNet()") +Base.show(io::IO, ::DenseNet264) = print(io, "DenseNet264()") -@treelike DenseNet +@treelike DenseNet264 -(m::DenseNet)(x) = m.layers(x) +(m::DenseNet264)(x) = m.layers(x) diff --git a/src/googlenet.jl b/src/googlenet.jl index 80fa29f4a..c2f250fff 100644 --- a/src/googlenet.jl +++ b/src/googlenet.jl @@ -9,61 +9,57 @@ end function InceptionBlock(in_chs, chs_1x1, chs_3x3_reduce, chs_3x3, chs_5x5_reduce, chs_5x5, pool_proj) path_1 = Conv((1, 1), in_chs=>chs_1x1, relu) - - path_2 = (Conv((1, 1), in_chs=>chs_3x3_reduce, relu), - Conv((3, 3), chs_3x3_reduce=>chs_3x3, relu, pad = (1, 1))) - - path_3 = (Conv((1, 1), in_chs=>chs_5x5_reduce, relu), - Conv((5, 5), chs_5x5_reduce=>chs_5x5, relu, pad = (2, 2))) - - path_4 = (x -> maxpool(x, (3,3), stride = (1, 1), pad = (1, 1)), - Conv((1, 1), in_chs=>pool_proj, relu)) - + path_2 = Chain(Conv((1, 1), in_chs=>chs_3x3_reduce, relu), + Conv((3, 3), chs_3x3_reduce=>chs_3x3, relu, pad = (1, 1))) + path_3 = Chain(Conv((1, 1), in_chs=>chs_5x5_reduce, relu), + Conv((5, 5), chs_5x5_reduce=>chs_5x5, relu, pad = (2, 2))) + path_4 = Chain(MaxPool((3,3), stride = (1, 1), pad = (1, 1)), + Conv((1, 1), in_chs=>pool_proj, relu)) InceptionBlock(path_1, path_2, path_3, path_4) end function (m::InceptionBlock)(x) - cat(m.path_1(x), m.path_2[2](m.path_2[1](x)), m.path_3[2](m.path_3[1](x)), m.path_4[2](m.path_4[1](x)), dims = 3) + cat(m.path_1(x), m.path_2(x), m.path_3(x), m.path_4(x), dims = 3) end -_googlenet() = Chain(Conv((7, 7), 3=>64, stride = (2, 2), relu, pad = (3, 3)), - x -> maxpool(x, (3, 3), stride = (2, 2), pad = (1, 1)), +load_googlenet() = Chain(Conv((7, 7), 3=>64, stride = (2, 2), relu, pad = (3, 3)), + MaxPool((3, 3), stride = (2, 2), pad = (1, 1)), Conv((1, 1), 64=>64, relu), Conv((3, 3), 64=>192, relu, pad = (1, 1)), - x -> maxpool(x, (3, 3), stride = (2, 2), pad = (1, 1)), + MaxPool((3, 3), stride = (2, 2), pad = (1, 1)), InceptionBlock(192, 64, 96, 128, 16, 32, 32), InceptionBlock(256, 128, 128, 192, 32, 96, 64), - x -> maxpool(x, (3, 3), stride = (2, 2), pad = (1, 1)), + MaxPool((3, 3), stride = (2, 2), pad = (1, 1)), InceptionBlock(480, 192, 96, 208, 16, 48, 64), InceptionBlock(512, 160, 112, 224, 24, 64, 64), InceptionBlock(512, 128, 128, 256, 24, 64, 64), InceptionBlock(512, 112, 144, 288, 32, 64, 64), InceptionBlock(528, 256, 160, 320, 32, 128, 128), - x -> maxpool(x, (3, 3), stride = (2, 2), pad = (1, 1)), + MaxPool((3, 3), stride = (2, 2), pad = (1, 1)), InceptionBlock(832, 256, 160, 320, 32, 128, 128), InceptionBlock(832, 384, 192, 384, 48, 128, 128), - x -> meanpool(x, (7, 7), stride = (1, 1), pad = (0, 0)), + MeanPool((7, 7), stride = (1, 1), pad = (0, 0)), x -> reshape(x, :, size(x, 4)), Dropout(0.4), Dense(1024, 1000), softmax) -function googlenet_layers() +function trained_googlenet_layers() weight = Metalhead.weights("googlenet.bson") weights = Dict{Any, Any}() for ele in keys(weight) - weights[string(ele)] = convert(Array{Float64, N} where N, weight[ele]) + weights[string(ele)] = weight[ele] end - ls = _googlenet() - ls[1].weight.data .= weights["conv1/7x7_s2_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:]; ls[1].bias.data .= weights["conv1/7x7_s2_b_0"] - ls[3].weight.data .= weights["conv2/3x3_reduce_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:]; ls[3].bias.data .= weights["conv2/3x3_reduce_b_0"] - ls[4].weight.data .= weights["conv2/3x3_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:]; ls[4].bias.data .= weights["conv2/3x3_b_0"] + ls = load_googlenet() + ls[1].weight.data .= flipkernel(weights["conv1/7x7_s2_w_0"]); ls[1].bias.data .= weights["conv1/7x7_s2_b_0"] + ls[3].weight.data .= flipkernel(weights["conv2/3x3_reduce_w_0"]); ls[3].bias.data .= weights["conv2/3x3_reduce_b_0"] + ls[4].weight.data .= flipkernel(weights["conv2/3x3_w_0"]); ls[4].bias.data .= weights["conv2/3x3_b_0"] for (a, b) in [(6, "3a"), (7, "3b"), (9, "4a"), (10, "4b"), (11, "4c"), (12, "4d"), (13, "4e"), (15, "5a"), (16, "5b")] - ls[a].path_1.weight.data .= weights["inception_$b/1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:]; ls[a].path_1.bias.data .= weights["inception_$b/1x1_b_0"] - ls[a].path_2[1].weight.data .= weights["inception_$b/3x3_reduce_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:]; ls[a].path_2[1].bias.data .= weights["inception_$b/3x3_reduce_b_0"] - ls[a].path_2[2].weight.data .= weights["inception_$b/3x3_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:]; ls[a].path_2[2].bias.data .= weights["inception_$b/3x3_b_0"] - ls[a].path_3[1].weight.data .= weights["inception_$b/5x5_reduce_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:]; ls[a].path_3[1].bias.data .= weights["inception_$b/5x5_reduce_b_0"] - ls[a].path_3[2].weight.data .= weights["inception_$b/5x5_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:]; ls[a].path_3[2].bias.data .= weights["inception_$b/5x5_b_0"] - ls[a].path_4[2].weight.data .= weights["inception_$b/pool_proj_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:]; ls[a].path_4[2].bias.data .= weights["inception_$b/pool_proj_b_0"] + ls[a].path_1.weight.data .= flipkernel(weights["inception_$b/1x1_w_0"]); ls[a].path_1.bias.data .= weights["inception_$b/1x1_b_0"] + ls[a].path_2[1].weight.data .= flipkernel(weights["inception_$b/3x3_reduce_w_0"]); ls[a].path_2[1].bias.data .= weights["inception_$b/3x3_reduce_b_0"] + ls[a].path_2[2].weight.data .= flipkernel(weights["inception_$b/3x3_w_0"]); ls[a].path_2[2].bias.data .= weights["inception_$b/3x3_b_0"] + ls[a].path_3[1].weight.data .= flipkernel(weights["inception_$b/5x5_reduce_w_0"]); ls[a].path_3[1].bias.data .= weights["inception_$b/5x5_reduce_b_0"] + ls[a].path_3[2].weight.data .= flipkernel(weights["inception_$b/5x5_w_0"]); ls[a].path_3[2].bias.data .= weights["inception_$b/5x5_b_0"] + ls[a].path_4[2].weight.data .= flipkernel(weights["inception_$b/pool_proj_w_0"]); ls[a].path_4[2].bias.data .= weights["inception_$b/pool_proj_b_0"] end ls[20].W.data .= transpose(weights["loss3/classifier_w_0"]); ls[20].b.data .= weights["loss3/classifier_b_0"] Flux.testmode!(ls) @@ -74,7 +70,9 @@ struct GoogleNet <: ClassificationModel{ImageNet.ImageNet1k} layers::Chain end -GoogleNet() = GoogleNet(googlenet_layers()) +GoogleNet() = GoogleNet(load_googlenet()) + +trained(::Type{GoogleNet}) = GoogleNet(trained_googlenet_layers()) Base.show(io::IO, ::GoogleNet) = print(io, "GoogleNet()") diff --git a/src/resnet.jl b/src/resnet.jl index 3277632ee..729627697 100644 --- a/src/resnet.jl +++ b/src/resnet.jl @@ -1,83 +1,87 @@ struct ResidualBlock - conv_layers - norm_layers + layers shortcut end @treelike ResidualBlock function ResidualBlock(filters, kernels::Array{Tuple{Int,Int}}, pads::Array{Tuple{Int,Int}}, strides::Array{Tuple{Int,Int}}, shortcut = identity) - local conv_layers = [] - local norm_layers = [] + layers = [] for i in 2:length(filters) - push!(conv_layers, Conv(kernels[i-1], filters[i-1]=>filters[i], pad = pads[i-1], stride = strides[i-1])) - push!(norm_layers, BatchNorm(filters[i])) + push!(layers, Conv(kernels[i-1], filters[i-1]=>filters[i], pad = pads[i-1], stride = strides[i-1])) + if i != length(filters) + push!(layers, BatchNorm(filters[i], relu)) + else + push!(layers, BatchNorm(filters[i])) + end end - ResidualBlock(Tuple(conv_layers),Tuple(norm_layers),shortcut) + ResidualBlock(Chain(layers...), shortcut) end -function ResidualBlock(filters, kernels::Array{Int}, pads::Array{Int}, strides::Array{Int}, shortcut = identity) +ResidualBlock(filters, kernels::Array{Int}, pads::Array{Int}, strides::Array{Int}, shortcut = identity) = ResidualBlock(filters, [(i,i) for i in kernels], [(i,i) for i in pads], [(i,i) for i in strides], shortcut) -end -function (block::ResidualBlock)(input) - local value = copy.(input) - for i in 1:length(block.conv_layers)-1 - value = relu.((block.norm_layers[i])((block.conv_layers[i])(value))) +(r::ResidualBlock)(input) = relu.(r.layers(input) + r.shortcut(input)) + +function BasicBlock(filters::Int, downsample::Bool = false, res_top::Bool = false) + # NOTE: res_top is set to true if this is the first residual connection of the architecture + # If the number of channels is to be halved set the downsample argument to true + if !downsample || res_top + return ResidualBlock([filters for i in 1:3], [3,3], [1,1], [1,1]) end - relu.(((block.norm_layers[end])((block.conv_layers[end])(value))) + block.shortcut(input)) + shortcut = Chain(Conv((3,3), filters÷2=>filters, pad = (1,1), stride = (2,2)), BatchNorm(filters)) + ResidualBlock([filters÷2, filters, filters], [3,3], [1,1], [1,2], shortcut) end function Bottleneck(filters::Int, downsample::Bool = false, res_top::Bool = false) - if(!downsample && !res_top) - return ResidualBlock([4 * filters, filters, filters, 4 * filters], [1,3,1], [0,1,0], [1,1,1]) - elseif(downsample && res_top) - return ResidualBlock([filters, filters, filters, 4 * filters], [1,3,1], [0,1,0], [1,1,1], Chain(Conv((1,1), filters=>4 * filters, pad = (0,0), stride = (1,1)), BatchNorm(4 * filters))) + # NOTE: res_top is set to true if this is the first residual connection of the architecture + # If the number of channels is to be halved set the downsample argument to true + if !downsample && !res_top + ResidualBlock([4 * filters, filters, filters, 4 * filters], [1,3,1], [0,1,0], [1,1,1]) + elseif downsample && res_top + ResidualBlock([filters, filters, filters, 4 * filters], [1,3,1], [0,1,0], [1,1,1], Chain(Conv((1,1), filters=>4 * filters, pad = (0,0), stride = (1,1)), BatchNorm(4 * filters))) else shortcut = Chain(Conv((1,1), 2 * filters=>4 * filters, pad = (0,0), stride = (2,2)), BatchNorm(4 * filters)) - return ResidualBlock([2 * filters, filters, filters, 4 * filters], [1,3,1], [0,1,0], [1,1,2], shortcut) - end -end - -function resnet50() - local layers = [3, 4, 6, 3] - local layer_arr = [] - - push!(layer_arr, Conv((7,7), 3=>64, pad = (3,3), stride = (2,2))) - push!(layer_arr, x -> maxpool(x, (3,3), pad = (1,1), stride = (2,2))) - - initial_filters = 64 - for i in 1:length(layers) - push!(layer_arr, Bottleneck(initial_filters, true, i==1)) - for j in 2:layers[i] - push!(layer_arr, Bottleneck(initial_filters)) - end - initial_filters *= 2 + ResidualBlock([2 * filters, filters, filters, 4 * filters], [1,3,1], [0,1,0], [1,1,2], shortcut) end - - push!(layer_arr, x -> meanpool(x, (7,7))) - push!(layer_arr, x -> reshape(x, :, size(x,4))) - push!(layer_arr, (Dense(2048, 1000))) - push!(layer_arr, softmax) - - Chain(layer_arr...) end -function resnet_layers() +function trained_resnet50_layers() weight = Metalhead.weights("resnet.bson") weights = Dict{Any ,Any}() for ele in keys(weight) - weights[string(ele)] = convert(Array{Float64, N} where N, weight[ele]) + weights[string(ele)] = weight[ele] end - ls = resnet50() - ls[1].weight.data .= weights["gpu_0/conv1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:] + ls = load_resnet(resnet_configs["resnet50"]...) + ls[1][1].weight.data .= flipkernel(weights["gpu_0/conv1_w_0"]) + ls[1][2].σ² .= weights["gpu_0/res_conv1_bn_riv_0"] + ls[1][2].μ .= weights["gpu_0/res_conv1_bn_rm_0"] + ls[1][2].β.data .= weights["gpu_0/res_conv1_bn_b_0"] + ls[1][2].γ.data .= weights["gpu_0/res_conv1_bn_s_0"] count = 2 for j in [3:5, 6:9, 10:15, 16:18] for p in j - ls[p].conv_layers[1].weight.data .= weights["gpu_0/res$(count)_$(p-j[1])_branch2a_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:] - ls[p].conv_layers[2].weight.data .= weights["gpu_0/res$(count)_$(p-j[1])_branch2b_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:] - ls[p].conv_layers[3].weight.data .= weights["gpu_0/res$(count)_$(p-j[1])_branch2c_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:] + ls[p].layers[1].weight.data .= flipkernel(weights["gpu_0/res$(count)_$(p-j[1])_branch2a_w_0"]) + ls[p].layers[2].σ² .= weights["gpu_0/res$(count)_$(p-j[1])_branch2a_bn_riv_0"] + ls[p].layers[2].μ .= weights["gpu_0/res$(count)_$(p-j[1])_branch2a_bn_rm_0"] + ls[p].layers[2].β.data .= weights["gpu_0/res$(count)_$(p-j[1])_branch2a_bn_b_0"] + ls[p].layers[2].γ.data .= weights["gpu_0/res$(count)_$(p-j[1])_branch2a_bn_s_0"] + ls[p].layers[3].weight.data .= flipkernel(weights["gpu_0/res$(count)_$(p-j[1])_branch2b_w_0"]) + ls[p].layers[4].σ² .= weights["gpu_0/res$(count)_$(p-j[1])_branch2b_bn_riv_0"] + ls[p].layers[4].μ .= weights["gpu_0/res$(count)_$(p-j[1])_branch2b_bn_rm_0"] + ls[p].layers[4].β.data .= weights["gpu_0/res$(count)_$(p-j[1])_branch2b_bn_b_0"] + ls[p].layers[4].γ.data .= weights["gpu_0/res$(count)_$(p-j[1])_branch2b_bn_s_0"] + ls[p].layers[5].weight.data .= flipkernel(weights["gpu_0/res$(count)_$(p-j[1])_branch2c_w_0"]) + ls[p].layers[6].σ² .= weights["gpu_0/res$(count)_$(p-j[1])_branch2c_bn_riv_0"] + ls[p].layers[6].μ .= weights["gpu_0/res$(count)_$(p-j[1])_branch2c_bn_rm_0"] + ls[p].layers[6].β.data .= weights["gpu_0/res$(count)_$(p-j[1])_branch2c_bn_b_0"] + ls[p].layers[6].γ.data .= weights["gpu_0/res$(count)_$(p-j[1])_branch2c_bn_s_0"] end + ls[j[1]].shortcut[1].weight.data .= flipkernel(weights["gpu_0/res$(count)_0_branch1_w_0"]) + ls[j[1]].shortcut[2].σ² .= weights["gpu_0/res$(count)_0_branch1_bn_riv_0"] + ls[j[1]].shortcut[2].μ .= weights["gpu_0/res$(count)_0_branch1_bn_rm_0"] + ls[j[1]].shortcut[2].β.data .= weights["gpu_0/res$(count)_0_branch1_bn_b_0"] + ls[j[1]].shortcut[2].γ.data .= weights["gpu_0/res$(count)_0_branch1_bn_s_0"] count += 1 end ls[21].W.data .= transpose(weights["gpu_0/pred_w_0"]); ls[21].b.data .= weights["gpu_0/pred_b_0"] @@ -85,14 +89,108 @@ function resnet_layers() return ls end -struct ResNet <: ClassificationModel{ImageNet.ImageNet1k} +function load_resnet(Block, layers, initial_filters::Int = 64, nclasses::Int = 1000) + local top = [] + local residual = [] + local bottom = [] + + push!(top, Chain(Conv((7,7), 3=>initial_filters, pad = (3,3), stride = (2,2)), + BatchNorm(initial_filters))) + push!(top, MaxPool((3,3), pad = (1,1), stride = (2,2))) + + for i in 1:length(layers) + push!(residual, Block(initial_filters, true, i==1)) + for j in 2:layers[i] + push!(residual, Block(initial_filters)) + end + initial_filters *= 2 + end + + push!(bottom, MeanPool((7,7))) + push!(bottom, x -> reshape(x, :, size(x,4))) + if Block == Bottleneck + push!(bottom, (Dense(2048, nclasses))) + else + push!(bottom, (Dense(512, nclasses))) + end + push!(bottom, softmax) + + Chain(top..., residual..., bottom...) +end + +resnet_configs = + Dict("resnet18" => (BasicBlock, [2, 2, 2, 2]), + "resnet34" => (BasicBlock, [3, 4, 6, 3]), + "resnet50" => (Bottleneck, [3, 4, 6, 3]), + "resnet101" => (Bottleneck, [3, 4, 23, 3]), + "resnet152" => (Bottleneck, [3, 8, 36, 3])) + +struct ResNet18 <: ClassificationModel{ImageNet.ImageNet1k} layers::Chain end -ResNet() = ResNet(resnet_layers()) +ResNet18() = ResNet18(load_resnet(resnet_configs["resnet18"]...)) + +trained(::Type{ResNet18}) = error("Pretrained Weights for ResNet18 are not available") + +Base.show(io::IO, ::ResNet18) = print(io, "ResNet18()") + +@treelike ResNet18 + +(m::ResNet18)(x) = m.layers(x) + +struct ResNet34 <: ClassificationModel{ImageNet.ImageNet1k} + layers::Chain +end + +ResNet34() = ResNet34(load_resnet(resnet_configs["resnet34"]...)) + +trained(::Type{ResNet34}) = error("Pretrained Weights for ResNet34 are not available") + +Base.show(io::IO, ::ResNet34) = print(io, "ResNet34()") + +@treelike ResNet34 + +(m::ResNet34)(x) = m.layers(x) + +struct ResNet50 <: ClassificationModel{ImageNet.ImageNet1k} + layers::Chain +end + +ResNet50() = ResNet50(load_resnet(resnet_configs["resnet50"]...)) + +trained(::Type{ResNet50}) = ResNet50(trained_resnet50_layers()) + +Base.show(io::IO, ::ResNet50) = print(io, "ResNet50()") + +@treelike ResNet50 + +(m::ResNet50)(x) = m.layers(x) + +struct ResNet101 <: ClassificationModel{ImageNet.ImageNet1k} + layers::Chain +end + +ResNet101() = ResNet101(load_resnet(resnet_configs["resnet101"]...)) + +trained(::Type{ResNet101}) = error("Pretrained Weights for ResNet101 are not available") + +Base.show(io::IO, ::ResNet101) = print(io, "ResNet101()") + +@treelike ResNet101 + +(m::ResNet101)(x) = m.layers(x) + +struct ResNet152 <: ClassificationModel{ImageNet.ImageNet1k} + layers::Chain +end + +ResNet152() = ResNet152(load_resnet(resnet_configs["resnet152"]...)) + +trained(::Type{ResNet152}) = error("Pretrained Weights for ResNet152 are not available") -Base.show(io::IO, ::ResNet) = print(io, "ResNet()") +Base.show(io::IO, ::ResNet152) = print(io, "ResNet152()") -@treelike ResNet +@treelike ResNet152 -(m::ResNet)(x) = m.layers(x) +(m::ResNet152)(x) = m.layers(x) diff --git a/src/squeezenet.jl b/src/squeezenet.jl index e9fcce46f..40d5981b0 100644 --- a/src/squeezenet.jl +++ b/src/squeezenet.jl @@ -1,63 +1,117 @@ -function squeezenet_layers() +struct Fire + squeeze + expand1x1 + expand3x3 +end + +@treelike Fire + +Fire(inplanes, squeeze_planes, expand1x1_planes, expand3x3_planes) = + Fire(Conv((1, 1), inplanes=>squeeze_planes, relu), + Conv((1, 1), squeeze_planes=>expand1x1_planes, relu), + Conv((3, 3), squeeze_planes=>expand3x3_planes, relu, pad=(1, 1))) + +function (f::Fire)(x) + x = f.squeeze(x) + cat(f.expand1x1(x), f.expand3x3(x), dims=3) +end + +# NOTE: The initialization of the Conv layers are different in the paper. They are Kaiming Normal +load_squeezenetv1_0() = Chain(Conv((7, 7), 3=>96, relu, stride = (2, 2)), + MaxPool((3, 3), stride = (2, 2)), + Fire(96, 16, 64, 64), + Fire(128, 16, 64, 64), + Fire(128, 32, 128, 128), + MaxPool((3, 3), stride = (2, 2)), + Fire(256, 32, 128, 128), + Fire(256, 48, 192, 192), + Fire(384, 48, 192, 192), + Fire(384, 64, 256, 256), + MaxPool((3, 3), stride = (2, 2)), + Fire(512, 64, 256, 256), + Dropout(0.5), + Conv((1, 1), 512=>1000, relu), + MeanPool((12, 12), stride = (1, 1)), + x -> reshape(x, :, size(x, 4)), + softmax) + +load_squeezenetv1_1() = Chain(Conv((3, 3), 3=>64, relu, stride = (2, 2)), + MaxPool((3, 3), stride = (2, 2)), + Fire(64, 16, 64, 64), + Fire(128, 16, 64, 64), + MaxPool((3, 3), stride = (2, 2)), + Fire(128, 32, 128, 128), + Fire(256, 32, 128, 128), + MaxPool((3, 3), stride = (2, 2)), + Fire(256, 48, 192, 192), + Fire(384, 48, 192, 192), + Fire(384, 64, 256, 256), + Fire(512, 64, 256, 256), + Dropout(0.5), + Conv((1, 1), 512=>1000, relu), + MeanPool((13, 13), stride = (1, 1)), + x -> reshape(x, :, size(x, 4)), + softmax) + +function trained_squeezenetv1_1_layers() weight = Metalhead.weights("squeezenet.bson") weights = Dict{Any ,Any}() for ele in keys(weight) weights[string(ele)] = weight[ele] end - c_1 = Conv(weights["conv10_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["conv10_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) + c_1 = Conv(flipkernel(weights["conv10_w_0"]), weights["conv10_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) c_2 = Dropout(0.5f0) - c_3 = Conv(weights["fire9/expand1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire9/expand1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) - c_4 = Conv(weights["fire9/squeeze1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire9/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) - c_5 = Conv(weights["fire8/expand1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire8/expand1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) - c_6 = Conv(weights["fire8/squeeze1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire8/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) - c_7 = Conv(weights["fire7/expand1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire7/expand1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) - c_8 = Conv(weights["fire7/squeeze1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire7/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) - c_9 = Conv(weights["fire6/expand1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire6/expand1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) - c_10 = Conv(weights["fire6/squeeze1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire6/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) - c_11 = Conv(weights["fire5/expand1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire5/expand1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) - c_12 = Conv(weights["fire5/squeeze1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire5/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) - c_13 = Conv(weights["fire4/expand1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire4/expand1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) - c_14 = Conv(weights["fire4/squeeze1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire4/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) - c_15 = Conv(weights["fire3/expand1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire3/expand1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) - c_16 = Conv(weights["fire3/squeeze1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire3/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) - c_17 = Conv(weights["fire2/expand1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire2/expand1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) - c_18 = Conv(weights["fire2/squeeze1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire2/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) - c_19 = Conv(weights["conv1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["conv1_b_0"], stride=(2, 2), pad=(0, 0), dilation = (1, 1)) - c_20 = Conv(weights["fire2/expand3x3_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire2/expand3x3_b_0"], stride=(1, 1), pad=(1, 1), dilation = (1, 1)) - c_21 = Conv(weights["fire3/expand3x3_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire3/expand3x3_b_0"], stride=(1, 1), pad=(1, 1), dilation = (1, 1)) - c_22 = Conv(weights["fire4/expand3x3_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire4/expand3x3_b_0"], stride=(1, 1), pad=(1, 1), dilation = (1, 1)) - c_23 = Conv(weights["fire5/expand3x3_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire5/expand3x3_b_0"], stride=(1, 1), pad=(1, 1), dilation = (1, 1)) - c_24 = Conv(weights["fire6/expand3x3_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire6/expand3x3_b_0"], stride=(1, 1), pad=(1, 1), dilation = (1, 1)) - c_25 = Conv(weights["fire7/expand3x3_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire7/expand3x3_b_0"], stride=(1, 1), pad=(1, 1), dilation = (1, 1)) - c_26 = Conv(weights["fire8/expand3x3_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire8/expand3x3_b_0"], stride=(1, 1), pad=(1, 1), dilation = (1, 1)) - c_27 = Conv(weights["fire9/expand3x3_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire9/expand3x3_b_0"], stride=(1, 1), pad=(1, 1), dilation = (1, 1)) + c_3 = Conv(flipkernel(weights["fire9/expand1x1_w_0"]), weights["fire9/expand1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) + c_4 = Conv(flipkernel(weights["fire9/squeeze1x1_w_0"]), weights["fire9/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) + c_5 = Conv(flipkernel(weights["fire8/expand1x1_w_0"]), weights["fire8/expand1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) + c_6 = Conv(flipkernel(weights["fire8/squeeze1x1_w_0"]), weights["fire8/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) + c_7 = Conv(flipkernel(weights["fire7/expand1x1_w_0"]), weights["fire7/expand1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) + c_8 = Conv(flipkernel(weights["fire7/squeeze1x1_w_0"]), weights["fire7/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) + c_9 = Conv(flipkernel(weights["fire6/expand1x1_w_0"]), weights["fire6/expand1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) + c_10 = Conv(flipkernel(weights["fire6/squeeze1x1_w_0"]), weights["fire6/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) + c_11 = Conv(flipkernel(weights["fire5/expand1x1_w_0"]), weights["fire5/expand1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) + c_12 = Conv(flipkernel(weights["fire5/squeeze1x1_w_0"]), weights["fire5/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) + c_13 = Conv(flipkernel(weights["fire4/expand1x1_w_0"]), weights["fire4/expand1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) + c_14 = Conv(flipkernel(weights["fire4/squeeze1x1_w_0"]), weights["fire4/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) + c_15 = Conv(flipkernel(weights["fire3/expand1x1_w_0"]), weights["fire3/expand1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) + c_16 = Conv(flipkernel(weights["fire3/squeeze1x1_w_0"]), weights["fire3/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) + c_17 = Conv(flipkernel(weights["fire2/expand1x1_w_0"]), weights["fire2/expand1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) + c_18 = Conv(flipkernel(weights["fire2/squeeze1x1_w_0"]), weights["fire2/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)) + c_19 = Conv(flipkernel(weights["conv1_w_0"]), weights["conv1_b_0"], stride=(2, 2), pad=(0, 0), dilation = (1, 1)) + c_20 = Conv(flipkernel(weights["fire2/expand3x3_w_0"]), weights["fire2/expand3x3_b_0"], stride=(1, 1), pad=(1, 1), dilation = (1, 1)) + c_21 = Conv(flipkernel(weights["fire3/expand3x3_w_0"]), weights["fire3/expand3x3_b_0"], stride=(1, 1), pad=(1, 1), dilation = (1, 1)) + c_22 = Conv(flipkernel(weights["fire4/expand3x3_w_0"]), weights["fire4/expand3x3_b_0"], stride=(1, 1), pad=(1, 1), dilation = (1, 1)) + c_23 = Conv(flipkernel(weights["fire5/expand3x3_w_0"]), weights["fire5/expand3x3_b_0"], stride=(1, 1), pad=(1, 1), dilation = (1, 1)) + c_24 = Conv(flipkernel(weights["fire6/expand3x3_w_0"]), weights["fire6/expand3x3_b_0"], stride=(1, 1), pad=(1, 1), dilation = (1, 1)) + c_25 = Conv(flipkernel(weights["fire7/expand3x3_w_0"]), weights["fire7/expand3x3_b_0"], stride=(1, 1), pad=(1, 1), dilation = (1, 1)) + c_26 = Conv(flipkernel(weights["fire8/expand3x3_w_0"]), weights["fire8/expand3x3_b_0"], stride=(1, 1), pad=(1, 1), dilation = (1, 1)) + c_27 = Conv(flipkernel(weights["fire9/expand3x3_w_0"]), weights["fire9/expand3x3_b_0"], stride=(1, 1), pad=(1, 1), dilation = (1, 1)) - ls = Chain(Conv(weights["conv1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["conv1_b_0"], stride=(2, 2), pad=(0, 0), dilation = (1, 1)), - x -> relu.(x), x->maxpool(x, (3,3), pad=(0,0), stride=(2,2)), - Conv(weights["fire2/squeeze1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire2/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)), + ls = Chain(Conv(flipkernel(weights["conv1_w_0"]), weights["conv1_b_0"], stride=(2, 2), pad=(0, 0), dilation = (1, 1)), + x -> relu.(x), MaxPool((3,3), pad=(0,0), stride=(2,2)), + Conv(flipkernel(weights["fire2/squeeze1x1_w_0"]), weights["fire2/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)), x -> relu.(x), x->cat(relu.(c_17(x)), relu.(c_20(x)), dims=3), - Conv(weights["fire3/squeeze1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire3/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)), + Conv(flipkernel(weights["fire3/squeeze1x1_w_0"]), weights["fire3/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)), x -> relu.(x), x->cat(relu.(c_15(x)), relu.(c_21(x)), dims=3), - x->maxpool(x, (3, 3), pad=(0, 0), stride=(2, 2)), - Conv(weights["fire4/squeeze1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire4/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)), + MaxPool((3, 3), pad=(0, 0), stride=(2, 2)), + Conv(flipkernel(weights["fire4/squeeze1x1_w_0"]), weights["fire4/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)), x -> relu.(x), x->cat(relu.(c_13(x)), relu.(c_22(x)), dims=3), - Conv(weights["fire5/squeeze1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire5/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)), + Conv(flipkernel(weights["fire5/squeeze1x1_w_0"]), weights["fire5/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)), x -> relu.(x), x->cat(relu.(c_11(x)), relu.(c_23(x)), dims=3), - x->maxpool(x, (3, 3), pad=(0, 0), stride=(2, 2)), - Conv(weights["fire6/squeeze1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire6/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)), + MaxPool((3, 3), pad=(0, 0), stride=(2, 2)), + Conv(flipkernel(weights["fire6/squeeze1x1_w_0"]), weights["fire6/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)), x -> relu.(x), x->cat(relu.(c_9(x)), relu.(c_24(x)), dims=3), - Conv(weights["fire7/squeeze1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire7/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)), + Conv(flipkernel(weights["fire7/squeeze1x1_w_0"]), weights["fire7/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)), x -> relu.(x), x->cat(relu.(c_7(x)), relu.(c_25(x)), dims=3), - Conv(weights["fire8/squeeze1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire8/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)), + Conv(flipkernel(weights["fire8/squeeze1x1_w_0"]), weights["fire8/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)), x -> relu.(x), x->cat(relu.(c_5(x)), relu.(c_26(x)), dims=3), - Conv(weights["fire9/squeeze1x1_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["fire9/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)), + Conv(flipkernel(weights["fire9/squeeze1x1_w_0"]), weights["fire9/squeeze1x1_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)), x -> relu.(x), x->cat(relu.(c_3(x)), relu.(c_27(x)), dims=3), Dropout(0.5f0), - Conv(weights["conv10_w_0"][end:-1:1,:,:,:][:,end:-1:1,:,:], weights["conv10_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)), + Conv(flipkernel(weights["conv10_w_0"]), weights["conv10_b_0"], stride=(1, 1), pad=(0, 0), dilation = (1, 1)), x -> relu.(x), x->mean(x, dims=[1,2]), - vec, softmax + x -> reshape(x, :, size(x, 4)), softmax ) -#end Flux.testmode!(ls) return ls end @@ -66,7 +120,25 @@ struct SqueezeNet <: ClassificationModel{ImageNet.ImageNet1k} layers::Chain end -SqueezeNet() = SqueezeNet(squeezenet_layers()) +function SqueezeNet(version::String = "1.1") + if version == "1.0" + SqueezeNet(load_squeezenetv1_0()) + elseif version == "1.1" + SqueezeNet(load_squeezenetv1_1()) + else + error("Only SqueezeNet versions 1.1 and 1.0 available") + end +end + +function trained(::Type{SqueezeNet}, version = "1.1") + if version == "1.0" + error("Pretrained Weights for SqueezeNet v1.0 are not available") + elseif version == "1.1" + SqueezeNet(trained_squeezenetv1_1_layers()) + else + error("Only SqueezeNet versions 1.1 and 1.0 available") + end +end Base.show(io::IO, ::SqueezeNet) = print(io, "SqueezeNet()") diff --git a/src/utils.jl b/src/utils.jl index 4cd55f57f..21e5e594b 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -21,6 +21,9 @@ function weights(name) BSON.load(joinpath(deps, name)) end +# TODO: Remove after NNlib supports flip kernel through https://github.com/FluxML/NNlib.jl/pull/53 +flipkernel(x::AbstractArray) = x[end:-1:1, end:-1:1, :, :] + load_img(im::AbstractMatrix{<:Color}) = im load_img(str::AbstractString) = load(str) load_img(val::ValidationImage) = load_img(val.img) diff --git a/src/vgg.jl b/src/vgg.jl new file mode 100644 index 000000000..043fcc030 --- /dev/null +++ b/src/vgg.jl @@ -0,0 +1,120 @@ +function trained_vgg19_layers() + ws = weights("vgg19.bson") + ls = Chain( + Conv(flipkernel(ws[:conv1_1_w_0]), ws[:conv1_1_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), + Conv(flipkernel(ws[:conv1_2_w_0]), ws[:conv1_2_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), + MaxPool((2,2)), + Conv(flipkernel(ws[:conv2_1_w_0]), ws[:conv2_1_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), + Conv(flipkernel(ws[:conv2_2_w_0]), ws[:conv2_2_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), + MaxPool((2,2)), + Conv(flipkernel(ws[:conv3_1_w_0]), ws[:conv3_1_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), + Conv(flipkernel(ws[:conv3_2_w_0]), ws[:conv3_2_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), + Conv(flipkernel(ws[:conv3_3_w_0]), ws[:conv3_3_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), + Conv(flipkernel(ws[:conv3_4_w_0]), ws[:conv3_4_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), + MaxPool((2,2)), + Conv(flipkernel(ws[:conv4_1_w_0]), ws[:conv4_1_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), + Conv(flipkernel(ws[:conv4_2_w_0]), ws[:conv4_2_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), + Conv(flipkernel(ws[:conv4_3_w_0]), ws[:conv4_3_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), + Conv(flipkernel(ws[:conv4_4_w_0]), ws[:conv4_4_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), + MaxPool((2,2)), + Conv(flipkernel(ws[:conv5_1_w_0]), ws[:conv5_1_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), + Conv(flipkernel(ws[:conv5_2_w_0]), ws[:conv5_2_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), + Conv(flipkernel(ws[:conv5_3_w_0]), ws[:conv5_3_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), + Conv(flipkernel(ws[:conv5_4_w_0]), ws[:conv5_4_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), + MaxPool((2,2)), + x -> reshape(x, :, size(x, 4)), + Dense(ws[:fc6_w_0]', ws[:fc6_b_0], relu), + Dropout(0.5f0), + Dense(ws[:fc7_w_0]', ws[:fc7_b_0], relu), + Dropout(0.5f0), + Dense(ws[:fc8_w_0]', ws[:fc8_b_0]), + softmax) + Flux.testmode!(ls) + return ls +end + +function load_vgg(arr, batchnorm::Bool = false) + layers = [] + in_chs = 3 + for i in arr + if i != 0 + push!(layers, Conv((3, 3), in_chs=>i, pad = (1, 1))) + if batchnorm + push!(layers, BatchNorm(i)) + end + push!(layers, x -> relu.(x)) + in_chs = i + else + push!(layers, MaxPool((2, 2))) + end + end + push!(layers, [x -> reshape(x, :, size(x, 4)), Dense(25088, 4096, relu), Dropout(0.5), + Dense(4096, 4096, relu), Dropout(0.5), Dense(4096, 1000), softmax]...) + Chain(layers...) +end + +vgg_configs = + Dict("vgg11" => [64, 0, 128, 0, 256, 256, 0, 512, 512, 0, 512, 512, 0], + "vgg13" => [64, 64, 0, 128, 128, 0, 256, 256, 0, 512, 512, 0, 512, 512, 0], + "vgg16" => [64, 64, 0, 128, 128, 0, 256, 256, 256, 0, 512, 512, 512, 0, 512, 512, 512, 0], + "vgg19" => [64, 64, 0, 128, 128, 0, 256, 256, 256, 256, 0, 512, 512, 512, 512, 0, 512, 512, 512, 512, 0]) + +struct VGG11 <: ClassificationModel{ImageNet.ImageNet1k} + layers::Chain +end + +VGG11(batchnorm::Bool = false) = VGG11(load_vgg(vgg_configs["vgg11"], batchnorm)) + +trained(::Type{VGG11}, batchnorm::Bool = false) = + batchnorm ? error("Pretrained Weights for VGG11 BatchNorm are not available") : error("Pretrained Weights for VGG11 are not available") + +Base.show(io::IO, ::VGG11) = print(io, "VGG11()") + +@treelike VGG11 + +(m::VGG11)(x) = m.layers(x) + +struct VGG13 <: ClassificationModel{ImageNet.ImageNet1k} + layers::Chain +end + +VGG13(batchnorm::Bool = false) = VGG13(load_vgg(vgg_configs["vgg13"], batchnorm)) + +trained(::Type{VGG13}, batchnorm::Bool = false) = + batchnorm ? error("Pretrained Weights for VGG13 BatchNorm are not available") : error("Pretrained Weights for VGG13 are not available") + +Base.show(io::IO, ::VGG13) = print(io, "VGG13()") + +@treelike VGG13 + +(m::VGG13)(x) = m.layers(x) + +struct VGG16 <: ClassificationModel{ImageNet.ImageNet1k} + layers::Chain +end + +VGG16(batchnorm::Bool = false) = VGG16(load_vgg(vgg_configs["vgg16"], batchnorm)) + +trained(::Type{VGG16}, batchnorm::Bool = false) = + batchnorm ? error("Pretrained Weights for VGG16 BatchNorm are not available") : error("Pretrained Weights for VGG16 are not available") + +Base.show(io::IO, ::VGG16) = print(io, "VGG16()") + +@treelike VGG16 + +(m::VGG16)(x) = m.layers(x) + +struct VGG19 <: ClassificationModel{ImageNet.ImageNet1k} + layers::Chain +end + +VGG19(batchnorm::Bool = false) = VGG19(load_vgg(vgg_configs["vgg19"], batchnorm)) + +trained(::Type{VGG19}, batchnorm::Bool = false) = + batchnorm ? error("Pretrained Weights for VGG19 BatchNorm are not available") : VGG19(trained_vgg19_layers()) + +Base.show(io::IO, ::VGG19) = print(io, "VGG19()") + +@treelike VGG19 + +(m::VGG19)(x) = m.layers(x) diff --git a/src/vgg19.jl b/src/vgg19.jl deleted file mode 100644 index 071e52313..000000000 --- a/src/vgg19.jl +++ /dev/null @@ -1,46 +0,0 @@ -function vgg19_layers() - ws = weights("vgg19.bson") - ls = Chain( - Conv(ws[:conv1_1_w_0][end:-1:1,:,:,:][:,end:-1:1,:,:], ws[:conv1_1_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), - Conv(ws[:conv1_2_w_0][end:-1:1,:,:,:][:,end:-1:1,:,:], ws[:conv1_2_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), - x -> maxpool(x, (2,2)), - Conv(ws[:conv2_1_w_0][end:-1:1,:,:,:][:,end:-1:1,:,:], ws[:conv2_1_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), - Conv(ws[:conv2_2_w_0][end:-1:1,:,:,:][:,end:-1:1,:,:], ws[:conv2_2_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), - x -> maxpool(x, (2,2)), - Conv(ws[:conv3_1_w_0][end:-1:1,:,:,:][:,end:-1:1,:,:], ws[:conv3_1_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), - Conv(ws[:conv3_2_w_0][end:-1:1,:,:,:][:,end:-1:1,:,:], ws[:conv3_2_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), - Conv(ws[:conv3_3_w_0][end:-1:1,:,:,:][:,end:-1:1,:,:], ws[:conv3_3_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), - Conv(ws[:conv3_4_w_0][end:-1:1,:,:,:][:,end:-1:1,:,:], ws[:conv3_4_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), - x -> maxpool(x, (2,2)), - Conv(ws[:conv4_1_w_0][end:-1:1,:,:,:][:,end:-1:1,:,:], ws[:conv4_1_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), - Conv(ws[:conv4_2_w_0][end:-1:1,:,:,:][:,end:-1:1,:,:], ws[:conv4_2_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), - Conv(ws[:conv4_3_w_0][end:-1:1,:,:,:][:,end:-1:1,:,:], ws[:conv4_3_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), - Conv(ws[:conv4_4_w_0][end:-1:1,:,:,:][:,end:-1:1,:,:], ws[:conv4_4_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), - x -> maxpool(x, (2,2)), - Conv(ws[:conv5_1_w_0][end:-1:1,:,:,:][:,end:-1:1,:,:], ws[:conv5_1_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), - Conv(ws[:conv5_2_w_0][end:-1:1,:,:,:][:,end:-1:1,:,:], ws[:conv5_2_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), - Conv(ws[:conv5_3_w_0][end:-1:1,:,:,:][:,end:-1:1,:,:], ws[:conv5_3_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), - Conv(ws[:conv5_4_w_0][end:-1:1,:,:,:][:,end:-1:1,:,:], ws[:conv5_4_b_0], relu, pad = (1,1), stride = (1,1), dilation = (1,1)), - x -> maxpool(x, (2,2)), - x -> reshape(x, :, size(x, 4)), - Dense(ws[:fc6_w_0]', ws[:fc6_b_0], relu), - Dropout(0.5f0), - Dense(ws[:fc7_w_0]', ws[:fc7_b_0], relu), - Dropout(0.5f0), - Dense(ws[:fc8_w_0]', ws[:fc8_b_0]), - softmax) - Flux.testmode!(ls) - return ls -end - -struct VGG19 <: ClassificationModel{ImageNet.ImageNet1k} - layers::Chain -end - -VGG19() = VGG19(vgg19_layers()) - -Base.show(io::IO, ::VGG19) = print(io, "VGG19()") - -@treelike VGG19 - -(m::VGG19)(x) = m.layers(x) diff --git a/test/runtests.jl b/test/runtests.jl index 4fd9fd7dc..2a564c35e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,14 +1,112 @@ -using Metalhead, Test +using Metalhead, Flux, Test, InteractiveUtils # Standardized testing for the models of tomorrow -@testset "Basic Model Tests" begin +@testset "Untrained Model Tests" begin for (T, MODEL) in [ - (Float32, VGG19), - (Float32, SqueezeNet), - (Float64, DenseNet), - (Float64, GoogleNet), + (Float64, VGG11), + (Float64, VGG13), + (Float64, VGG16), + (Float64, VGG19), + (Float64, ResNet18), + (Float64, ResNet34), + (Float64, ResNet50), + (Float64, ResNet101), + (Float64, ResNet152), + (Float64, DenseNet121), + (Float64, DenseNet169), + (Float64, DenseNet201), + (Float64, DenseNet264), + (Float64, GoogleNet) ] + GC.gc() + model = MODEL() + model = Flux.mapleaves(Flux.Tracker.data, model) + + x_test = rand(T, 224, 224, 3, 1) + y_test = model(x_test) + + # Test that types and shapes work out as we expect + @test y_test isa AbstractArray + @test length(y_test) == 1000 + + # Test that the models can be indexed + @test length(model.layers[1:4].layers) == 4 + + # Make all the allocations nothing for GC to free them + model = nothing + x_test = nothing + y_test = nothing + end + GC.gc() + # Test if batchnorm models work properly + for (T, MODEL) in [ + (Float64, VGG19), + (Float64, VGG16), + (Float64, VGG13), + (Float64, VGG11) + ] + GC.gc() + + model = MODEL(true) + model = Flux.mapleaves(Flux.Tracker.data, model) + + x_test = rand(T, 224, 224, 3, 1) + y_test = model(x_test) + + # Test that types and shapes work out as we expect + @test y_test isa AbstractArray + @test length(y_test) == 1000 + + # Test that the models can be indexed + @test length(model.layers[1:4].layers) == 4 + + # Make all the allocations nothing for GC to free them + model = nothing + x_test = nothing + y_test = nothing + end + GC.gc() + # Test models which have a version parameter + for (T, version, MODEL) in [ + (Float64, "1.0", SqueezeNet), + (Float64, "1.1", SqueezeNet) + ] + GC.gc() + + model = MODEL(version) + model = Flux.mapleaves(Flux.Tracker.data, model) + + x_test = rand(T, 224, 224, 3, 1) + y_test = model(x_test) + + # Test that types and shapes work out as we expect + @test y_test isa AbstractArray + @test length(y_test) == 1000 + + # Test that the models can be indexed + @test length(model.layers[1:4].layers) == 4 + + # Make all the allocations nothing for GC to free them + model = nothing + x_test = nothing + y_test = nothing + end + GC.gc() +end + +@testset "Trained Model Tests" begin + for (T, MODEL) in [ + (Float32, SqueezeNet), + (Float32, VGG19), + (Float32, ResNet50), + (Float32, DenseNet121), + (Float32, GoogleNet) + ] + GC.gc() + + model = trained(MODEL) + model = Flux.mapleaves(Flux.Tracker.data, model) x_test = rand(T, 224, 224, 3, 1) y_test = model(x_test) @@ -19,6 +117,11 @@ using Metalhead, Test # Test that the models can be indexed @test length(model.layers[1:4].layers) == 4 + + # Make all the allocations nothing for GC to free them + model = nothing + x_test = nothing + y_test = nothing end end @@ -35,13 +138,14 @@ end # Test printing of prediction @testset "Prediction table display" begin x = valimgs(CIFAR10)[1] - m = VGG19() + m = trained(VGG19) predict(m, x) end # Just run the prediction code end-to-end # TODO: Set up travis to actually run these if length(datasets()) == 2 + vgg19 = trained(VGG19) for dataset in (ImageNet, CIFAR10) val1 = valimgs(dataset)[1] predict(vgg19, val1)