In [1]:
# GPU: 32*40 in 8.00s = 160/s
# CPU: 32*8 in 115.0s = 2/s

In [2]:
# After installing and starting Julia run the following to install the required packages:
# Pkg.init(); Pkg.update()
# for p in ("CUDAapi","CUDAdrv","MAT","Images","IJulia","Knet"); Pkg.add(p); end
# Pkg.checkout("Knet","ilkarman") # make sure we have the right Knet version
# Pkg.build("Knet")

In [3]:
using Knet
include(Knet.dir("examples","resnet", "resnetlib.jl"))
using ResNetLib: resnet50init, resnet50
println("OS: ", Sys.KERNEL)
println("Julia: ", VERSION)
println("Knet: ", Pkg.installed("Knet"))

OS: Linux
Julia: 0.6.1
Knet: 0.8.5+


In [4]:
;cat /proc/cpuinfo '|' grep processor '|' wc -l

6


In [5]:
;nvidia-smi --query-gpu=gpu_name --format=csv

name
Tesla K80


In [6]:
const BATCH_SIZE = 32
const RESNET_FEATURES = 2048
const BATCHES_GPU = 40
const BATCHES_CPU = 8

8

In [7]:
# Create batches of fake data
function fakedata(batches; atype=KnetArray)
    x = rand(Float32, 224, 224, 3, BATCH_SIZE * batches)
    minibatch(x, BATCH_SIZE, xtype=atype)
end

fakedata (generic function with 1 method)

In [8]:
# Return features from classifier
function predictfn(weights, moments, data)
    out = []
    for x in data
        pred = resnet50(weights, moments, x; stage=5)
        push!(out, mat(pred))
    end
    return Array(hcat(out...))
end

predictfn (generic function with 1 method)

## 1. GPU

In [9]:
# Initialize resnet weights and fake data
gpuweights = gpumoments = nothing; knetgc() # clear memory from previous run
gpuweights, gpumoments = resnet50init(;stage=5, trained=true, atype=KnetArray);

[1m[36mINFO: [39m[22m[36mLoading pretrained weights...
[39m[1m[36mINFO: [39m[22m[36mLoading imagenet-resnet-50-dag.mat...
[39m

In [10]:
info("Cold start")
gpudata1 = fakedata(BATCHES_GPU, atype=KnetArray)
@time predictfn(gpuweights, gpumoments, gpudata1);

[1m[36mINFO: [39m[22m[36mCold start
[39m

 21.220333 seconds (1.93 M allocations: 842.832 MiB, 35.05% gc time)


In [11]:
info("Benchmarking")
gpudata = fakedata(BATCHES_GPU, atype=KnetArray)
@time predictfn(gpuweights, gpumoments, gpudata);

[1m[36mINFO: [39m[22m[36mBenchmarking
[39m

  8.002292 seconds (360.61 k allocations: 760.376 MiB, 3.82% gc time)


## 2. CPU

In [12]:
# Initialize resnet weights
cpuweights, cpumoments = resnet50init(;stage=5, trained=true, atype=Array);

[1m[36mINFO: [39m[22m[36mLoading pretrained weights...
[39m

In [13]:
info("Cold start")
cpudata1 = fakedata(1, atype=Array);
@time predictfn(cpuweights, cpumoments, cpudata1);

[1m[36mINFO: [39m[22m[36mCold start
[39m

 25.160136 seconds (14.20 M allocations: 4.351 GiB, 10.91% gc time)


In [14]:
info("Benchmarking")
cpudata = fakedata(BATCHES_CPU, atype=Array);
@time predictfn(cpuweights, cpumoments, cpudata);

[1m[36mINFO: [39m[22m[36mBenchmarking
[39m

115.024997 seconds (174.89 k allocations: 30.150 GiB, 15.85% gc time)
