/
solvers.jl
183 lines (145 loc) · 5.59 KB
/
solvers.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
export Analytical, CG,
Newton, NewtonCG,
LBFGS,
ProxGrad, FISTA, ISTA,
IWLSCG
# ADMM, FADMM # NOTE: these do not work currently
# =====
# TODO
# * all - pick linesearch
# * NewtonCG number of inner iter
# * FISTA field to enforce descent
# ====
abstract type Solver end
# ===================== analytical.jl
"""
$SIGNATURES
Analytical solver (Cholesky). If the `iterative` parameter is set to `true`
then a CG solver is used. The CG solver is matrix-free and should be preferred
in "large scale" cases (when the hat matrix `X'X` is "big").
## Parameters
* `iterative` (Bool): whether to use CG (iterative) or not
* `max_inner` (Int): in the iterative mode, how many inner iterations to do.
"""
@with_kw struct Analytical <: Solver
iterative::Bool = false
max_inner::Int = 200
end
CG() = Analytical(; iterative=true)
# ===================== newton.jl
"""
$SIGNATURES
Newton solver. This is a full Hessian solver and should be avoided for
"large scale" cases.
`optim_options` are the [general Optim Options](https://julianlsolvers.github.io/Optim.jl/stable/#user/config/).
`newton_options` are the [options of Newton's method](https://julianlsolvers.github.io/Optim.jl/stable/#algo/newton/)
## Example
```julia
using MLJLinearModels, Optim
solver = MLJLinearModels.Newton(
optim_options = Optim.Options(time_limit = 20),
newton_options = (linesearch = Optim.LineSearches.HagerZhang()),)
)
```
"""
@with_kw struct Newton{O,S} <: Solver
optim_options::O = Optim.Options(f_tol=1e-4)
newton_options::S = (; )
end
"""
$SIGNATURES
Newton CG solver. This is the same as the Newton solver except that instead
of solving systems of the form `H\\b` where `H` is the full Hessian, it uses
a matrix-free conjugate gradient approach to solving that system. This should
generally be preferred for larger scale cases.
`optim_options` are the [general Optim Options](https://julianlsolvers.github.io/Optim.jl/stable/#user/config/).
`newtoncg_options` are the [options of Krylov Trust Region method](https://github.com/JuliaNLSolvers/Optim.jl/blob/master/src/multivariate/solvers/second_order/krylov_trust_region.jl)
## Example
```julia
using MLJLinearModels, Optim
solver = MLJLinearModels.NewtonCG(
optim_options = Optim.Options(time_limit = 20),
newtoncg_options = (eta = 0.2,)
)
```
"""
@with_kw struct NewtonCG{O,S} <: Solver
optim_options::O = Optim.Options(f_tol=1e-4)
newtoncg_options::S = (; )
end
"""
$SIGNATURES
LBFGS quasi-Newton solver. See [the wikipedia entry](https://en.wikipedia.org/wiki/Limited-memory_BFGS).
`optim_options` are the [general Optim Options](https://julianlsolvers.github.io/Optim.jl/stable/#user/config/).
`lbfgs_options` are the [options of LBFGS method](https://julianlsolvers.github.io/Optim.jl/stable/#algo/lbfgs/)
## Example
```julia
using MLJLinearModels, Optim
solver = MLJLinearModels.LBFGS(
optim_options = Optim.Options(time_limit = 20),
lbfgs_options = (linesearch = Optim.LineSearches.HagerZhang()),)
)
```
"""
@with_kw struct LBFGS{O,S} <: Solver
optim_options::O = Optim.Options(f_tol=1e-4)
lbfgs_options::S = (; )
end
# ===================== pgrad.jl
"""
$SIGNATURES
Proximal Gradient solver for non-smooth objective functions.
## Parameters
* `accel` (Bool): whether to use Nesterov-style acceleration
* `max_iter` (Int): number of overall iterations
* `tol` (Float64): tolerance for the relative change θ ie `norm(θ-θ_)/norm(θ)`
* `max_inner`: number of inner steps when searching for a stepsize in the
backtracking step
* `beta`: rate of shrinkage in the backtracking step (between 0 and 1)
"""
@with_kw struct ProxGrad <: Solver
accel::Bool = false # use Nesterov style acceleration (see also FISTA)
max_iter::Int = 1000 # max number of overall iterations
tol::Float64 = 1e-4 # tol relative change of θ i.e. norm(θ-θ_)/norm(θ)
max_inner::Int = 100 # β^max_inner should be > 1e-10
beta::Float64 = 0.8 # in (0, 1); shrinkage in the backtracking step
gram::Bool = false # use precomputed Gramian for lsq where possible
end
FISTA(; kwa...) = ProxGrad(;accel = true, kwa...)
ISTA(; kwa...) = ProxGrad(;accel = false, kwa...)
# ===================== iwls.jl
"""
$SIGNATURES
Iteratively Reweighted Least Square with Conjugate Gradient. This is the
standard (expensive) IWLS but with more efficient solves to avoid full matrix
computations.
## Parameters
* `max_iter` (Int): number of max iterations (outer)
* `max_inner` (Int): number of iterations for the CG solves
* `tol` (Float64): tolerance for the relative change θ ie `norm(θ-θ_)/norm(θ)`
* `damping` (Float64): how much to trust iterates (1=full trust)
* `threshold` (Float64): threshold for the residuals
"""
@with_kw struct IWLSCG <: Solver
max_iter::Int = 100
max_inner::Int = 200
tol::Float64 = 1e-4
damping::Float64 = 1.0 # should be between 0 and 1, 1 = trust iterates
threshold::Float64 = 1e-6 # thresh for residuals; used eg in quantile reg
end
# ===================== admm.jl
# @with_kw struct ADMM <: Solver
# max_iter::Int = 100
# tol::Float64 = 1e-4
# alpha::Float64 = 1.5 # over-relaxation (recommended between 1.5 and 1.8)
# rho::Float64 = 1.0 # Lagrangian parameter (should be decreased if poor condition)
# end
#
# @with_kw struct FADMM <: Solver
# max_iter::Int = 100
# tol::Float64 = 1e-4
# eta::Float64 = 0.999 # restart parameter
# rho::Float64 = 1.0 # Lagrangian parameter (should be decreased if poor condition)
# tau::Float64 = 2.0 # Increase / decrease of ρ should be > 1
# mu::Float64 = 10.0 #
# end