In [1]:
function sigmoid(z)
    
    s =  1./(1+exp.(-z))
    
    return s
end

sigmoid (generic function with 1 method)

In [2]:
function initialize_with_zeros(dim::Integer)
   
    w = zeros(dim,1)
    b = 0.
    
    return w, b
end

initialize_with_zeros (generic function with 1 method)

In [3]:
function propagate(w, b, X, Y)
    m = size(X,2)
   
    A = sigmoid(w'*X + b)
    cost = sum(Y .* log.(A) + (1-Y) .* log.(1-A))./-m
    
    A_Y = A - Y

    dw = (X * A_Y')/m
    
    db = sum(A_Y)/m    
    
    grads = Dict("dw" => dw,
             "db" => db)
    grads, cost
end

propagate (generic function with 1 method)

In [4]:
function optimize(w, b, X, Y; num_iterations = 1000, learning_rate =.05, print_cost = false, print_every = 100)
   
    costs = []
    dw, db = 0, 0
    for i = 1:num_iterations
        
        grads, cost = propagate(w, b, X, Y)
        
        dw = grads["dw"]
        db = grads["db"]
        
        w = w - learning_rate .* dw
        b = b - learning_rate .* db

        if i % print_every == 0
            push!(costs, cost)
        end
        if print_cost && i % print_every == 0
            println("Cost after iteration $i :  $cost")
        end
    end
    params = Dict("w" => w,
              "b" => b)
    
    grads = Dict("dw" => dw,
             "db" => db)
    
    return params, grads, costs
end

optimize (generic function with 1 method)

In [5]:
function predict(w, b, X)
    
    m = size(X, 2)
    Y_prediction = zeros(1,m)
    w = reshape(w,size(X, 1), 1)
    
    A = sigmoid(w' * X + b)                                   
    Y_prediction = (A .>= .5)*1.

    return Y_prediction
end

predict (generic function with 1 method)

In [6]:
function model(X_train, Y_train, X_test, Y_test;
        num_iterations = 2000, learning_rate = 0.05, print_cost = false, print_every = 100)
   
    nx_train, mtrain = size(X_train)
    nx_test, mtest = size(X_test)
    assert(nx_train == nx_test)
    X_train, Y_train, X_test, Y_test = float(X_train), float(Y_train), float(X_test), float(Y_test)
    Y_train = reshape(Y_train, (1,mtrain))
    Y_test = reshape(Y_test, (1,mtest))

    w, b = initialize_with_zeros(size(X_train, 1))

    parameters, grads, costs = optimize(w,b, X_train, Y_train, num_iterations= num_iterations,
                                       learning_rate = learning_rate, print_cost = print_cost, print_every = print_every)
    
    w = parameters["w"]
    b = parameters["b"]
    
    Y_prediction_test = predict(w, b, X_test)
    Y_prediction_train = predict(w, b, X_train)


    println("train accuracy: $(100 - mean(abs.(Y_prediction_train - Y_train)) .* 100)")
    println("test accuracy: $(100 - mean(abs.(Y_prediction_test - Y_test)) .* 100)")

    
    d = Dict("costs" => costs,
         "Y_prediction_test"=> Y_prediction_test, 
         "Y_prediction_train" => Y_prediction_train, 
         "w" => w, 
         "b" => b,
         "learning_rate" => learning_rate,
         "num_iterations"=> num_iterations)
    
    return d
end

model (generic function with 1 method)

In [7]:
#Pkg.add("MLDatasets")
using MLDatasets

train_x, train_y = MNIST.traindata()
test_x,  test_y  = MNIST.testdata()

(FixedPointNumbers.Normed{UInt8,8}[0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; … ; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8]

FixedPointNumbers.Normed{UInt8,8}[0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; … ; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8]

FixedPointNumbers.Normed{UInt8,8}[0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; … ; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8]

...

FixedPointNumbers.Normed{UInt8,8}[0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; … ; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8]

FixedPointNumbers.Normed{UInt8,8}[0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; … ; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8]

FixedPointNumbers.Normed{UInt8,8}[0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N

In [8]:
println(size(train_x))
println(size(train_y))

(28, 28, 60000)
(60000,)


In [9]:
train_x = reshape(train_x, (28*28, 60000))
test_x = reshape(test_x, (28*28, 10000))

784×10000 Array{N0f8,2}:
 0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8  …  0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8
 0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8     0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8
 0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8     0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8
 0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8     0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8
 0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8     0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8
 0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8  …  0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8
 0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8     0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8
 0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8     0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8
 0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8     0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8
 0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8     0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8
 0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8  …  0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8
 0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8     0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8
 0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8     0.0N0f8  0.0N0f8  0.0N0f8  0.0N0f8


In [10]:
#test_x = test_x'
#train_x = train_x'
#test_y = test_y'
#train_y = train_y'
size(train_y)

(60000,)

In [11]:
train_x = train_x[:, 4 .< train_y .< 7]
train_y = train_y[4 .< train_y .< 7]
test_x = test_x[:, 4 .< test_y .< 7]
test_y = test_y[4 .< test_y .< 7]

1850-element Array{Int64,1}:
 5
 6
 5
 6
 6
 5
 5
 6
 5
 5
 6
 5
 6
 ⋮
 6
 6
 6
 6
 5
 6
 6
 5
 5
 6
 5
 6

In [12]:
train_y, test_y = train_y - 5, test_y - 5

([0, 0, 1, 1, 1, 0, 1, 1, 0, 1  …  0, 1, 0, 1, 1, 1, 1, 0, 0, 1], [0, 1, 0, 1, 1, 0, 0, 1, 0, 0  …  1, 1, 0, 1, 1, 0, 0, 1, 0, 1])

In [13]:
size(train_y)

(11339,)

In [15]:
d = model(train_x, train_y, test_x, test_y,
          num_iterations =500, learning_rate = .005, print_cost = true, print_every = 100)

Cost after iteration 100 :  0.38911032236086024
Cost after iteration 200 :  0.2896087332469981
Cost after iteration 300 :  0.2409613741290059
Cost after iteration 400 :  0.21169460371917376
Cost after iteration 500 :  0.19189982014900067
train accuracy: 95.92556662845048
test accuracy: 95.94594594594595


Dict{String,Any} with 7 entries:
  "w"                  => [0.0; 0.0; … ; 0.0; 0.0]
  "Y_prediction_test"  => [1.0 1.0 … 0.0 1.0]
  "b"                  => -0.0444569
  "learning_rate"      => 0.005
  "Y_prediction_train" => [0.0 0.0 … 0.0 1.0]
  "num_iterations"     => 500
  "costs"              => Any[0.38911, 0.289609, 0.240961, 0.211695, 0.1919]