In [1]:
using Optim
using Plots

In [2]:
default(size = (300, 200)) # plot size

# Generate data

In [3]:
f1(x) = x^2/20 + sin(2*x)

f1 (generic function with 1 method)

In [4]:
x = reshape([-10:0.1:10;],1,:) # shape = [n_input,n_sample]
y_true = f1.(x)
print(size(x), size(y_true))
plot(x[:], y_true[:])

(1, 201)(1, 201)

# Build NN

In [5]:
function init_weights(;n_in=1, n_hidden=10, n_out=1)
    W1 = randn(n_hidden, n_in) # for left multiply W1*x
    b1 = zeros(n_hidden)
    W2 = randn(n_out, n_hidden)
    b2 = zeros(n_out)
    params = [W1, b1, W2, b2]
    return params
end

params = init_weights()

4-element Array{Array{Float64,N} where N,1}:
 [0.177902; -1.77137; … ; 1.36901; -0.175861]      
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 [1.22351 -0.932874 … -0.920088 -0.529654]         
 [0.0]                                             

In [6]:
params_flat = collect(Iterators.flatten(params))
sizes = map(size, params)

4-element Array{Tuple{Int64,Vararg{Int64,N} where N},1}:
 (10, 1)
 (10,)  
 (1, 10)
 (1,)   

In [7]:
const sizes_const = sizes

4-element Array{Tuple{Int64,Vararg{Int64,N} where N},1}:
 (10, 1)
 (10,)  
 (1, 10)
 (1,)   

In [8]:
function predict(params_flat, x; act=tanh)
    # unflatten
    params = []
    i1 = 1
    for s in sizes # sizes is global constant
        l = reduce(*, s) # size -> length
        i2 = i1+l
        p = reshape(params_flat[i1:i2-1], s)
        push!(params, p)
        i1 = i2
    end 
    
    W1, b1, W2, b2 = params
    
    # normal NN calculation
    
    a = act.(W1*x .+ b1)
    y = W2*a .+ b2
    return y
end

predict (generic function with 1 method)

In [9]:
y_pred = predict(params_flat, x)

1×201 Array{Float64,2}:
 -3.33572  -3.3355  -3.33519  -3.33479  …  3.33479  3.33519  3.3355  3.33572

In [10]:
plot(x[:], y_pred[:])

In [11]:
function loss_func(params_flat, x, y_true)
    y_pred = predict(params_flat, x)
    loss = mean(abs2 ,y_pred - y_true)
    return loss
end

loss_func (generic function with 1 method)

In [12]:
loss_func(params_flat, x, y_true)

13.685127558542781

# Optim.jl

In [13]:
# use global x and y for now
loss_wrap(params_flat) = loss_func(params_flat, x, y_true)

loss_wrap (generic function with 1 method)

In [14]:
params = init_weights(n_hidden = 10) # re-initialize weight
params_flat = collect(Iterators.flatten(params));

In [15]:
option = Optim.Options(iterations = 2000)

Optim.Options{Void}(1.0e-32, 1.0e-32, 1.0e-8, 0, 0, 0, false, 2000, false, false, false, 1, nothing, NaN)

In [16]:
@time opt = optimize(loss_wrap, params_flat, BFGS(), option)

 30.022794 seconds (30.32 M allocations: 12.413 GiB, 6.05% gc time)


Results of Optimization Algorithm
 * Algorithm: BFGS
 * Starting Point: [-0.0014598533367757788,-1.7767793156770935, ...]
 * Minimizer: [0.8413179480694707,-0.3771537230331488, ...]
 * Minimum: 5.249643e-04
 * Iterations: 2000
 * Convergence: false
   * |x - x'| < 1.0e-32: false 
     |x - x'| = 8.83e-02 
   * |f(x) - f(x')| / |f(x)| < 1.0e-32: false
     |f(x) - f(x')| / |f(x)| = NaN 
   * |g(x)| < 1.0e-08: false 
     |g(x)| = 3.91e-02 
   * stopped by an increasing objective: false
   * Reached Maximum Number of Iterations: true
 * Objective Calls: 5042
 * Gradient Calls: 5042

In [17]:
loss_wrap(opt.minimizer)

0.0005249642960746887

In [18]:
y_pred = predict(opt.minimizer, x)

1×201 Array{Float64,2}:
 3.92036  4.02945  4.13717  4.24251  …  5.21609  5.46376  5.70045  5.936

In [19]:
plot(x[:], y_true[:],lw=2)
plot!(x[:], y_pred[:],lw=0,
    marker=:circle,markerstrokewidth = 0,markersize=3)