## A complete MNIST training code from the codes at L8-Knet and L9

In [1]:
# Definition of pacakages and functions as well as the download of MNIST data go here.
using AutoGrad
using Knet, Plots, DataFrames
gr()

for p in ("GZip",)
    Pkg.installed(p) == nothing && Pkg.add(p)
end

using GZip

# atype definition:
atype=Array{Float32};

# Functions for MNIST data download next:
"Where to download mnist from"
mnisturl = "http://yann.lecun.com/exdb/mnist"

"Where to download mnist to"
mnistdir = "./"

"""
This utility loads the [MNIST](http://yann.lecun.com/exdb/mnist)
hand-written digits dataset.  There are 60000 training and 10000 test
examples. Each input x consists of 784 pixels representing a 28x28
grayscale image.  The pixel values are converted to Float32 and
normalized to [0,1].  Each output y is a UInt8 indicating the correct
class.  10 is used to represent the digit 0.
```
# Usage:
include(Pkg.dir("Knet/data/mnist.jl"))
xtrn, ytrn, xtst, ytst = mnist()
# xtrn: 28×28×1×60000 Array{Float32,4}
# ytrn: 60000-element Array{UInt8,1}
# xtst: 28×28×1×10000 Array{Float32,4}
# ytst: 10000-element Array{UInt8,1}
```
"""
function mnist()
    global _mnist_xtrn,_mnist_ytrn,_mnist_xtst,_mnist_ytst
    if !isdefined(:_mnist_xtrn)
        info("Loading MNIST...")
        _mnist_xtrn = _mnist_xdata("train-images-idx3-ubyte.gz")
        _mnist_xtst = _mnist_xdata("t10k-images-idx3-ubyte.gz")
        _mnist_ytrn = _mnist_ydata("train-labels-idx1-ubyte.gz")
        _mnist_ytst = _mnist_ydata("t10k-labels-idx1-ubyte.gz")
    end
    return _mnist_xtrn,_mnist_ytrn,_mnist_xtst,_mnist_ytst
end

"Utility to view a MNIST image, requires the Images package"
mnistview(x,i)=colorview(Gray,permutedims(x[:,:,1,i],(2,1)))

function _mnist_xdata(file)
    a = _mnist_gzload(file)[17:end]
    reshape(a ./ 255f0, (28,28,1,div(length(a),784)))
end

function _mnist_ydata(file)
    a = _mnist_gzload(file)[9:end]
    a[a.==0] = 10
    # full(sparse(a,1:length(a),1f0,10,length(a)))
    return a
end

function _mnist_gzload(file)
    if !isdir(mnistdir)
        mkpath(mnistdir)
    end
    path = joinpath(mnistdir,file)
    if !isfile(path)
        url = "$mnisturl/$file"
        download(url, path)
    end
    f = gzopen(path)
    a = read(f)
    close(f)
    return(a)
end

# The function to set the initial weights of Network:
function initweights(d, scale=0.01; hidden=[2], atype=Array{Float32})
    model = Vector{Any}(2 * length(hidden))
    X = d
    for k = 1:length(hidden)
        H = hidden[k]
        model[2k - 1] = scale * randn(H, X) 
        model[2k]     = scale * randn(H, 1)
        X = H
    end
    return map(atype, model)
end

# Function to initialize the model Neural Network:
#    num_inputs: Number of input values in input layer
#    num_hidden: Number of nodes at each hidden layers
#    num_outputs: Number of values at output layer
#    hidden: the list of layers that does not include input layer
function initmodel(atype;num_inputs=784,num_hidden=256,num_outputs=10)
    return initweights(num_inputs,hidden=[num_hidden,num_hidden,num_outputs]; atype=atype);
end

# The next function defines how we determine the prediction:
#  w: tensor of weights (See length(w) for first dimension, which is the number of layers in NN!)
#     But wait, the number of layers are doubled since there are also bias values. 
#     Check the initweights function!
#  x: input values
function predict(w, x)
    x = mat(x)
    for i=1:2:length(w) - 2
        x = relu.(w[i] * x .+ w[i+1]) # bias an weights are concatendated 
    end
    return w[end - 1]*x .+ w[end]
end

# Definition of optimizer (Here it is SGD, Stochastic Gradient Descent)
optim(w; lr=0.01) = optimizers(w, Sgd;  lr=lr);

# Definition of loss function and its gradient:
loss(w, x, ygold, predict) = nll(predict(w, x), ygold);
lossgradient = grad(loss); # AutoGrad means we don't need backpropagation

# Definition of training function:
function train(w, dtrn, optim, predict; epochs=10)
    for epoch = 1:epochs
        for (x, y) in dtrn
            g = lossgradient(w, x, y, predict)
            update!(w, g, optim) ## this a generic train loop the gradient update can be replaced as appropriate
        end
    end
end

# Fancy printing of how successful is the network predictions: accuracy of training (trn) and test (tst) datasets
function report(epoch, w, dtrn, dtst, predict)
    println((:epoch, epoch, :trn, accuracy(w, dtrn, predict), :tst, accuracy(w, dtst, predict)))
end

# MNIST download:
xtrn, ytrn, xtst, ytst = mnist()


[1m[36mINFO: [39m[22m[36mLoading MNIST...
[39m

(Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

...

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], UInt8[0x05, 0x0a, 0x04, 0x01, 0x09, 0x02, 0x01, 0x03, 0x01, 0x04  …  0x09, 0x02, 0x09, 0x05, 0x01, 0x08, 0x03, 0x05, 0x06, 0x08], Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]

...

Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 

In [2]:
# Initialization of the model and training go here.
 
# Minibatch definitions:
size_of_batch = 100
dtrn = minibatch(xtrn, ytrn, size_of_batch; xtype=atype);
dtst = minibatch(xtst, ytst, size_of_batch; xtype=atype);

# Activation function definitions:
#softmax(z) = exp.(z) ./ sum(exp.(z), 1)
#cross_entropy(yhat, y) = - sum(y .* log.(yhat), 1)

# And finally training the network!
w   = initmodel(atype);
opt = optim(w, lr=1e-1);
fast=true
nepochs=20
if fast
    train(w, dtrn, opt, predict; epochs=nepochs)
    report(nepochs, w, dtrn, dtst, predict)
else
    for epoch = 1:nepochs
        train(w, dtrn, opt, predict, epochs=epoch)
        report(epoch, w, dtrn, dtst, predict)
    end
end

(:epoch, 1, :trn, 0.9137833333333333, :tst, 0.9113)


In [3]:
# The weights of the trained NN so far
w

6-element Array{Array{Float32,2},1}:
 Float32[0.017359 0.00673276 … -0.00325596 -0.00699815; -0.0089351 0.00633085 … -0.0150963 0.0129551; … ; -0.00258225 0.0124222 … -0.00728731 -0.014594; -0.00345332 0.00509166 … -0.00439187 -0.00410329]
 Float32[0.0214714; -0.00607682; … ; 0.00876192; -0.0190833]                                                                                                                                              
 Float32[0.0144674 0.00230694 … 0.019803 0.0121752; 0.00831516 -0.0247813 … 0.119893 0.0385364; … ; 0.0319464 0.0447994 … 0.0185448 -0.000125911; 0.0415982 -0.00412595 … 0.0238766 -0.0213153]           
 Float32[-0.0216828; 0.0630762; … ; -0.0639897; 0.0787055]                                                                                                                                                
 Float32[-0.0368239 0.205578 … -0.189907 0.219468; 0.0419893 0.0474131 … 0.111218 -0.149322; … ; 0.0144785 0.0665358 … 0.0546254 0.114373; 0.00943776 -

In [4]:
# Accuracy of the trained NN for training (trn) and test (tst) datasets (minibatches)
println((:trn, accuracy(w, dtrn, predict), :tst, accuracy(w, dtst, predict)))

(:trn, 0.9137833333333333, :tst, 0.9113)


## Finding BEST classified MNIST digits using a pretrained NN

In [20]:
using JLD
# Loading weights of pretrained NN.
w = load("MNISTbestWeights.jld","w")

8-element Array{Array{Float32,N} where N,1}:
 Float32[-0.20782 -0.0893097 … 0.164467 -0.0792276; 0.159104 0.033714 … -0.032242 0.0341693; … ; -0.116886 -0.105403 … -0.0528396 0.150365; -0.121344 0.0353723 … 0.143916 -0.0214616]

Float32[-0.204196 0.0813816 … 0.21809 0.282282; -0.238804 -0.247869 … 0.126207 0.160308; … ; -0.258774 -0.108804 … 0.0509252 0.204242; 0.0627572 -0.210232 … -0.00881229 -0.0925261]

Float32[-0.0375886 -0.0965139 … -0.0443734 -0.156997; -0.151887 0.168425 … -0.0559682 0.011378; … ; -0.103611 -0.169165 … -0.119457 0.131616; 0.191033 -0.107757 … 0.192663 0.220189]

...

Float32[0.0623074 0.185271 … -0.206776 -0.155101; -0.0142799 0.215816 … -0.198127 0.0495622; … ; 0.0103011 0.190939 … 0.0226201 -0.133638; -0.158723 0.279516 … -0.0424811 -0.191937]

Float32[-0.0137954 -0.0169536 … -0.0646714 0.0466605; 0.13701 0.352682 … 0.00912433 -0.0173363; … ; 0.172771 0.248079 … 0.00444711 -0.155721; 0.129662 -0.143648 … 0.135474 -0.2017]

Float32[0.231305 0.0467362 … -0.0464

In [21]:
# Input images of test dataset
xtst

28×28×1×10000 Array{Float32,4}:
[:, :, 1, 1] =
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0       0.0       0.0       0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0       0.0       0.0       0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0       0.0       0.0       0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0       0.0       0.0       0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0       0.0       0.0       0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0       0.0       0.0       0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0       0.0       0.0       0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0       0.0       0.0       0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0       0.0       0.0       0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0       0.0       0.0       0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.239216  0.47451   0.47451   0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.94902   0.996078  0.996078  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.996078  0.996078  0.811765  0.0
 ⋮  

In [22]:
# Target digits for test dataset
# Labels are from 0 to 9
1ytst-1

10000-element Array{Int64,1}:
 6
 1
 0
 9
 3
 0
 3
 8
 4
 8
 9
 5
 8
 ⋮
 4
 5
 6
 7
 8
 9
 0
 1
 2
 3
 4
 5

In [23]:
# Select all test data for one big batch. mini batch step = 1 <==> every image in dataset
dtrn = minibatch(xtrn, ytrn, 1; xtype=atype);
dtst = minibatch(xtst, ytst, 1; xtype=atype);

In [24]:
dtst

Knet.MB(Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], UInt8[0x07 0x02 … 0x05 0x06], 1, 10000, false, 1:10000, [28, 28, 1, 10000], [10000], Array{Float32,N} where N, Array{UInt8,1})

In [27]:
# Let's check test dataset of 10000 images
for (x, y) in dtst
    pred = predict(w, x)
    correct = accuracy(pred, y)
    acc = nll(pred, y)
    if correct > 0 && acc < 1.0e-4
        println((:correct, correct, :accuracy, acc, :digit, 1y[1]))
    end
end

LoadError: [91mMethodError: no method matching *(::Array{Float32,4}, ::Array{Float32,2})[0m
Closest candidates are:
  *(::Any, ::Any, [91m::Any[39m, [91m::Any...[39m) at operators.jl:424
  *([91m::Type{AutoGrad.Grad{1}}[39m, ::Any, [91m::Any[39m, [91m::AutoGrad.Rec{##1109<:Number}[39m, [91m::AutoGrad.Rec{##1110<:Number}[39m) where {##1109<:Number, ##1110<:Number} at :0
  *([91m::Type{AutoGrad.Grad{1}}[39m, ::Any, [91m::Any[39m, [91m::##1109<:Number[39m, [91m::AutoGrad.Rec{##1110<:Number}[39m) where {##1109<:Number, ##1110<:Number} at :0
  ...[39m

## How to download noMNIST data

In [None]:
using AutoGrad
using Knet, Plots, DataFrames
gr()
using MAT

# Functions for MNIST data download next:
"Where to download mnist from"
mnisturl = "http://yaroslavvb.com/upload/notMNIST/"

"Where to download mnist to"
mnistdir = "./"

function _mnist_load_mat(file)
    if !isdir(mnistdir)
        mkpath(mnistdir)
    end
    path = joinpath(mnistdir,file)
    if !isfile(path)
        url = "$mnisturl/$file"
        download(url, path)
    end
    f = matopen(path)
    a = read(f)
    close(f)
    return(a)
end

noMNIST_data = _mnist_load_mat("notMNIST_small.mat")


Dict{String,Any} with 2 entries:
  "images" => [0.0 0.0 … 255.0 255.0; 0.0 0.0 … 255.0 255.0; … ; 237.0 255.0 … …
  "labels" => [9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0  …  2.0, 2.0, 2…

In [None]:
xall = noMNIST_data["images"]
yall = noMNIST_data["labels"]
xtrn_ids = rand(length(xall)) .< 1/2 
xtst_ids = .!xtrn_ids
xtrn = xall[xtrn_ids]
xtst = xall[xtst_ids]
ytrn = yall[xtrn_ids]
ytst = yall[xtst_ids]

In [6]:
data["labels"]

18724-element Array{Float64,1}:
 9.0
 9.0
 9.0
 9.0
 9.0
 9.0
 9.0
 9.0
 9.0
 9.0
 9.0
 9.0
 9.0
 ⋮  
 2.0
 2.0
 2.0
 2.0
 2.0
 2.0
 2.0
 2.0
 2.0
 2.0
 2.0
 2.0