forked from statsmodels/statsmodels
-
Notifications
You must be signed in to change notification settings - Fork 0
/
l1_slsqp.py
166 lines (141 loc) · 5.4 KB
/
l1_slsqp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
"""
Holds files for l1 regularization of LikelihoodModel, using
scipy.optimize.slsqp
"""
import numpy as np
from scipy.optimize import fmin_slsqp
import statsmodels.base.l1_solvers_common as l1_solvers_common
def fit_l1_slsqp(
f, score, start_params, args, kwargs, disp=False, maxiter=1000,
callback=None, retall=False, full_output=False, hess=None):
"""
Solve the l1 regularized problem using scipy.optimize.fmin_slsqp().
Specifically: We convert the convex but non-smooth problem
.. math:: \\min_\\beta f(\\beta) + \\sum_k\\alpha_k |\\beta_k|
via the transformation to the smooth, convex, constrained problem in twice
as many variables (adding the "added variables" :math:`u_k`)
.. math:: \\min_{\\beta,u} f(\\beta) + \\sum_k\\alpha_k u_k,
subject to
.. math:: -u_k \\leq \\beta_k \\leq u_k.
Parameters
----------
All the usual parameters from LikelhoodModel.fit
alpha : non-negative scalar or numpy array (same size as parameters)
The weight multiplying the l1 penalty term
trim_mode : 'auto, 'size', or 'off'
If not 'off', trim (set to zero) parameters that would have been zero
if the solver reached the theoretical minimum.
If 'auto', trim params using the Theory above.
If 'size', trim params if they have very small absolute value
size_trim_tol : float or 'auto' (default = 'auto')
For use when trim_mode === 'size'
auto_trim_tol : float
For sue when trim_mode == 'auto'. Use
qc_tol : float
Print warning and don't allow auto trim when (ii) in "Theory" (above)
is violated by this much.
qc_verbose : Boolean
If true, print out a full QC report upon failure
acc : float (default 1e-6)
Requested accuracy as used by slsqp
"""
start_params = np.array(start_params).ravel('F')
### Extract values
# k_params is total number of covariates,
# possibly including a leading constant.
k_params = len(start_params)
# The start point
x0 = np.append(start_params, np.fabs(start_params))
# alpha is the regularization parameter
alpha = np.array(kwargs['alpha_rescaled']).ravel('F')
# Make sure it's a vector
alpha = alpha * np.ones(k_params)
assert alpha.min() >= 0
# Convert display parameters to scipy.optimize form
disp_slsqp = _get_disp_slsqp(disp, retall)
# Set/retrieve the desired accuracy
acc = kwargs.setdefault('acc', 1e-10)
### Wrap up for use in fmin_slsqp
func = lambda x_full: _objective_func(f, x_full, k_params, alpha, *args)
f_ieqcons_wrap = lambda x_full: _f_ieqcons(x_full, k_params)
fprime_wrap = lambda x_full: _fprime(score, x_full, k_params, alpha)
fprime_ieqcons_wrap = lambda x_full: _fprime_ieqcons(x_full, k_params)
### Call the solver
results = fmin_slsqp(
func, x0, f_ieqcons=f_ieqcons_wrap, fprime=fprime_wrap, acc=acc,
iter=maxiter, disp=disp_slsqp, full_output=full_output,
fprime_ieqcons=fprime_ieqcons_wrap)
params = np.asarray(results[0][:k_params])
### Post-process
# QC
qc_tol = kwargs['qc_tol']
qc_verbose = kwargs['qc_verbose']
passed = l1_solvers_common.qc_results(
params, alpha, score, qc_tol, qc_verbose)
# Possibly trim
trim_mode = kwargs['trim_mode']
size_trim_tol = kwargs['size_trim_tol']
auto_trim_tol = kwargs['auto_trim_tol']
params, trimmed = l1_solvers_common.do_trim_params(
params, k_params, alpha, score, passed, trim_mode, size_trim_tol,
auto_trim_tol)
### Pack up return values for statsmodels optimizers
# TODO These retvals are returned as mle_retvals...but the fit wasn't ML.
# This could be confusing someday.
if full_output:
x_full, fx, its, imode, smode = results
fopt = func(np.asarray(x_full))
converged = 'True' if imode == 0 else smode
iterations = its
gopt = float('nan') # Objective is non-differentiable
hopt = float('nan')
retvals = {
'fopt': fopt, 'converged': converged, 'iterations': iterations,
'gopt': gopt, 'hopt': hopt, 'trimmed': trimmed}
### Return
if full_output:
return params, retvals
else:
return params
def _get_disp_slsqp(disp, retall):
if disp or retall:
if disp:
disp_slsqp = 1
if retall:
disp_slsqp = 2
else:
disp_slsqp = 0
return disp_slsqp
def _objective_func(f, x_full, k_params, alpha, *args):
"""
The regularized objective function
"""
x_params = x_full[:k_params]
x_added = x_full[k_params:]
## Return
return f(x_params, *args) + (alpha * x_added).sum()
def _fprime(score, x_full, k_params, alpha):
"""
The regularized derivative
"""
x_params = x_full[:k_params]
# The derivative just appends a vector of constants
return np.append(score(x_params), alpha)
def _f_ieqcons(x_full, k_params):
"""
The inequality constraints.
"""
x_params = x_full[:k_params]
x_added = x_full[k_params:]
# All entries in this vector must be \geq 0 in a feasible solution
return np.append(x_params + x_added, x_added - x_params)
def _fprime_ieqcons(x_full, k_params):
"""
Derivative of the inequality constraints
"""
I = np.eye(k_params)
A = np.concatenate((I, I), axis=1)
B = np.concatenate((-I, I), axis=1)
C = np.concatenate((A, B), axis=0)
## Return
return C