In [2]:
# notebook for trying out RA and lbfgs with optim.jl instead of custom implementation.
# not reusing past data proves to be costly here.
using LinearAlgebra, Distributions, Random, Optim, LineSearches
using CSV, DataFrames, DelimitedFiles

In [3]:
# beta is (1, 2, 3, ... p)ᵀ, given in paper.
data = CSV.read("../data/lin_reg.csv", DataFrame);
data = Matrix(data);

In [4]:
const X = data[:, 1:end - 1];
const y = data[:, end];

In [5]:
n, p = size(X)

(10000, 1000)

In [6]:
function batch_shuffle(X, y, mb_size)
    n = size(X)[1]
    perm = randperm(n)[1:mb_size]
    X_mini = @view X[perm, :]
    y_mini = @view y[perm, :]
    return X_mini, y_mini
end

batch_shuffle (generic function with 1 method)

In [7]:
# function loss(X, y, β)
    
#     res = y - X * β
#     l  = sum(abs2, res)
#     return l
# end

In [8]:
# function gradient(X, y, β)
#     ∇β = -X' * (y .- X * β)
#     return ∇β
# end

In [9]:
function f(β)
    res = y .- X * β
    l = sum(abs2, res)
    return l
end
function g!(G, β)
    G .= -X' * (y .- X * β)
    println(norm(G))
end
function fg!(F, G, β)
    res = y .- X * β
    
    if G != nothing
        G .= -X' * res
    end
    if F != nothing
        l = sum(abs2, res)
        return l
    end
end

fg! (generic function with 1 method)

In [10]:
β₀ = zeros(p);

In [None]:
res_f = optimize(f, g!, β₀, method = LBFGS(
            alphaguess = LineSearches.InitialStatic(),
            linesearch = LineSearches.BackTracking()
        ),
        g_tol = 1e-10,
        store_trace = true,
        extended_trace = true
    )

In [18]:
β₀ = zeros(p)
res_fg = optimize(Optim.only_fg!(fg!), β₀, method = LBFGS(
            alphaguess = LineSearches.InitialStatic(),
            linesearch = LineSearches.BackTracking()
        ),
    store_trace = true
    )

 * Status: success

 * Candidate solution
    Final objective value:     1.264382e-18

 * Found with
    Algorithm:     L-BFGS

 * Convergence measures
    |x - x'|               = 4.43e-12 ≰ 0.0e+00
    |x - x'|/|x'|          = 4.43e-15 ≰ 0.0e+00
    |f(x) - f(x')|         = 8.93e-18 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 7.07e+00 ≰ 0.0e+00
    |g(x)|                 = 9.30e-09 ≤ 1.0e-08

 * Work counters
    Seconds run:   2  (vs limit Inf)
    Iterations:    32
    f(x) calls:    38
    ∇f(x) calls:   33


In [105]:
eps = [1e0, 1e-1, 1e-2, 1e-2, 1e-3]

5-element Vector{Float64}:
 1.0
 0.1
 0.01
 0.01
 0.001

In [106]:
# make first element 2
nₒ = size(eps)[1] 
mbs = zeros(Int64, nₒ)
mbs[1] = 2
mbs, nₒ

([2, 0, 0, 0, 0], 5)

In [107]:
for i in 1:(nₒ - 1)
    qᵢ₊₁ = ceil(Int, 1 + 7 * (i^-1.7))
    mbs[i + 1] = mbs[i] * qᵢ₊₁
end
mbs

5-element Vector{Int64}:
   2
  16
  64
 192
 384

In [108]:
function ra_lbfgs(eps, mbs, β₀)
    
    @assert(size(mbs) == size(eps))
    nₒ = size(mbs)[1]
    niter_total = 0
    ngrad_total = 0
    βᵢ = β₀
    
    for i in 1:nₒ
        X_inner, y_inner = batch_shuffle(X, y, mbs[i])
        @show i
        function fg_inner!(F, G, β)
            res = y_inner .- X_inner * β

            if G != nothing
                G .= -X_inner' * res
            end
            if F != nothing
                l = sum(abs2, res)
                return l
            end
        end
       res_fg = optimize(Optim.only_fg!(fg_inner!), βᵢ, method = LBFGS(
                alphaguess = LineSearches.InitialStatic(),
                linesearch = LineSearches.BackTracking()
            ),
            g_tol = eps[i],
            time_limit = 5.0
        )
        println(res_fg)
        βᵢ = Optim.minimizer(res_fg)
    end
    
    return βᵢ
end

ra_lbfgs (generic function with 1 method)

In [109]:
βₗ =  ra_lbfgs(eps, mbs, zeros(p))

i = 1
 * Status: success

 * Candidate solution
    Final objective value:     2.714554e-02

 * Found with
    Algorithm:     L-BFGS

 * Convergence measures
    |x - x'|               = 1.02e-02 ≰ 0.0e+00
    |x - x'|/|x'|          = 4.81e-04 ≰ 0.0e+00
    |f(x) - f(x')|         = 8.22e+00 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 3.03e+02 ≰ 0.0e+00
    |g(x)|                 = 5.06e-01 ≤ 1.0e+00

 * Work counters
    Seconds run:   0  (vs limit 5)
    Iterations:    3
    f(x) calls:    7
    ∇f(x) calls:   4

i = 2
 * Status: success

 * Candidate solution
    Final objective value:     4.566229e-05

 * Found with
    Algorithm:     L-BFGS

 * Convergence measures
    |x - x'|               = 1.92e-04 ≰ 0.0e+00
    |x - x'|/|x'|          = 1.25e-06 ≰ 0.0e+00
    |f(x) - f(x')|         = 3.60e-03 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 7.88e+01 ≰ 0.0e+00
    |g(x)|                 = 2.02e-02 ≤ 1.0e-01

 * Work counters
    Seconds run:   0  (vs limit 5)
    Iterations:    7
    f(x) call

1000-element Vector{Float64}:
  138.35033734761888
 -469.35693443384685
  815.929803486784
  176.1439997053514
 -116.06183114186759
   69.51238432936839
  567.9800892406682
  -33.58391142097452
  659.4265425962129
 -288.62809440055065
  -66.63552635754452
  477.6613690854057
  273.56681737181503
    ⋮
  731.866917321932
  847.6893598865106
  441.56862890966363
  922.5730126128735
  292.31370333654337
  394.28749982890497
  520.0956805737047
 1004.2753184924477
  534.5337693452221
  325.29541364213236
  991.9938682844304
  503.5421679096379