Skip to content

Commit

Permalink
Merge pull request #201 from JesseLivezey/index_mask
Browse files Browse the repository at this point in the history
Index mask in Logistic regression
  • Loading branch information
JesseLivezey committed Apr 28, 2021
2 parents 4766eac + f719da9 commit dbfeacc
Show file tree
Hide file tree
Showing 9 changed files with 429 additions and 66 deletions.
2 changes: 1 addition & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
PyUol Copyright (c) 2019, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved.
PyUoI Copyright (c) 2019, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ PyUoI requires

* numpy>=1.14
* h5py>=2.8
* scikit-learn>=0.20
* scikit-learn>=0.24

and optionally

Expand Down Expand Up @@ -89,8 +89,8 @@ Please see our <a href="https://pyuoi.readthedocs.io/en/latest/">ReadTheDocs</a>

# Copyright

PyUol Copyright (c) 2019, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved.
PyUoI Copyright (c) 2019, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved.

If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov referring to " PyUol" (LBNL Ref 2019-157)."
If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov referring to " PyUoI" (LBNL Ref 2019-157)."

NOTICE. This software was developed under funding from the U.S. Department of Energy. As such, the U.S. Government has been granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, prepare derivative works, and perform publicly and display publicly. The U.S. Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so.
2 changes: 1 addition & 1 deletion bin/generate_build.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
eval "$(conda shell.bash hook)"
mkdir dist
for py in 3.6 3.7; do
for py in 3.6 3.7 3.8; do
git clone https://github.com/BouchardLab/pyuoi.git
cd pyuoi
conda create -y -n temp_build_env python=$py
Expand Down
2 changes: 1 addition & 1 deletion docs/source/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ PyUoI requires

* numpy>=1.14
* h5py>=2.8
* scikit-learn>=0.20
* scikit-learn>=0.24

and optionally

Expand Down
5 changes: 4 additions & 1 deletion pyuoi/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,10 @@ def make_classification(n_samples=100, n_features=20, n_informative=2,
if isinstance(random_state, int):
rng = np.random.RandomState(random_state)
else:
rng = random_state
if random_state is None:
rng = np.random
else:
rng = random_state
n_not_informative = n_features - n_informative

X = rng.randn(n_samples, n_features)
Expand Down
146 changes: 92 additions & 54 deletions pyuoi/linear_model/logistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,9 +541,10 @@ def _logistic_regression_path(X, y, Cs=48, fit_intercept=True,
_, n_features = X.shape

classes = np.unique(y)
n_classes = len(classes)

if multi_class == 'auto':
if len(classes) > 2:
if n_classes > 2:
multi_class = 'multinomial'
else:
multi_class = 'ovr'
Expand Down Expand Up @@ -616,9 +617,19 @@ def _logistic_regression_path(X, y, Cs=48, fit_intercept=True,
target = Y_multi
if penalty == 'l2':
w0 = w0.ravel()
if coef_mask is not None:
x0 = np.zeros_like(w0)

def func(x, *args):
return _multinomial_loss_grad(x, *args)[0:2]
mask = args[3]
if mask is not None:
x0[mask] = x
args = args[:3] + (None,) + (args[-1],)
f, df = _multinomial_loss_grad(x0, *args)[0:2]
df = df[mask]
else:
f, df = _multinomial_loss_grad(x, *args)[0:2]
return f, df
else:
w0 = w0.T.ravel().copy()

Expand All @@ -634,7 +645,18 @@ def func(x, g, *args):
else:
target = y_bin
if penalty == 'l2':
func = _logistic_loss_and_grad
x0 = np.zeros_like(w0)

def func(x, *args):
mask = args[3]
if mask is not None:
x0[mask] = x
args = args[:3] + (None,) + (args[-1],)
f, df = _logistic_loss_and_grad(x0, *args)
df = df[mask]
else:
f, df = _logistic_loss_and_grad(x, *args)
return f, df
else:
def func(x, g, *args):
loss, grad = _logistic_loss_and_grad(x, *args)
Expand All @@ -644,63 +666,79 @@ def func(x, g, *args):
coefs = list()
n_iter = np.zeros(len(Cs), dtype=np.int32)
for i, C in enumerate(Cs):
iprint = [-1, 50, 1, 100, 101][
np.searchsorted(np.array([0, 1, 2, 3]), verbose)]
if penalty == 'l2':
w0, loss, info = optimize.fmin_l_bfgs_b(
func, w0, fprime=None,
args=(X, target, 1. / C, coef_mask, sample_weight),
iprint=iprint, pgtol=tol, maxiter=max_iter)
else:
zeros_seen = [0]

def zero_coef(x, *args):
if multi_class == 'multinomial':
x = x.reshape(-1, classes.size)[:-1]
else:
x = x[:-1]
now_zeros = np.array_equiv(x, 0.)
if now_zeros:
zeros_seen[0] += 1
if coef_mask is None or coef_mask.sum():
iprint = [-1, 50, 1, 100, 101][
np.searchsorted(np.array([0, 1, 2, 3]), verbose)]
if penalty == 'l2':
if coef_mask is None:
w0, loss, info = optimize.fmin_l_bfgs_b(
func, w0, fprime=None,
args=(X, target, 1. / C, coef_mask, sample_weight),
iprint=iprint, pgtol=tol, maxiter=max_iter)
else:
zeros_seen[0] = 0
if zeros_seen[0] > 1:
return -2048
try:
w0 = fmin_lbfgs(func, w0, orthantwise_c=1. / C,
args=(X, target, 0., coef_mask, sample_weight),
max_iterations=max_iter,
epsilon=tol,
orthantwise_end=coef_size,
progress=zero_coef)
except AllZeroLBFGSError:
w0 *= 0.
info = None
if info is not None and info["warnflag"] == 1:
warnings.warn("lbfgs failed to converge. Increase the number "
"of iterations.", ConvergenceWarning)
# In scipy <= 1.0.0, nit may exceed maxiter.
# See https://github.com/scipy/scipy/issues/7854.
if info is None:
n_iter_i = -1
else:
n_iter_i = min(info['nit'], max_iter)
if fit_intercept:
if multi_class == 'multinomial':
mask = [coef_mask,
np.ones(n_classes)[:, np.newaxis]]
mask = np.concatenate(mask, axis=1)
else:
mask = np.concatenate([coef_mask, np.ones(1)])
else:
mask = coef_mask
mask = np.nonzero(mask.ravel())[0]
wp = w0[mask]
wp, loss, info = optimize.fmin_l_bfgs_b(
func, wp, fprime=None,
args=(X, target, 1. / C, mask, sample_weight),
iprint=iprint, pgtol=tol, maxiter=max_iter)
w0 = np.zeros_like(w0)
w0[mask] = wp

else:
zeros_seen = [0]

def zero_coef(x, *args):
if multi_class == 'multinomial':
x = x.reshape(-1, classes.size)[:-1]
else:
x = x[:-1]
now_zeros = np.array_equiv(x, 0.)
if now_zeros:
zeros_seen[0] += 1
else:
zeros_seen[0] = 0
if zeros_seen[0] > 1:
return -2048
try:
args = (X, target, 0., coef_mask, sample_weight)
w0 = fmin_lbfgs(func, w0, orthantwise_c=1. / C,
args=args,
max_iterations=max_iter,
epsilon=tol,
orthantwise_end=coef_size,
progress=zero_coef)
except AllZeroLBFGSError:
w0 *= 0.
info = None
if info is not None and info["warnflag"] == 1:
warnings.warn("lbfgs failed to converge. Increase the number "
"of iterations.", ConvergenceWarning)
# In scipy <= 1.0.0, nit may exceed maxiter.
# See https://github.com/scipy/scipy/issues/7854.
if info is None:
n_iter_i = -1
else:
n_iter_i = min(info['nit'], max_iter)

n_iter[i] = n_iter_i

if multi_class == 'multinomial':
n_classes = max(2, classes.size)
if penalty == 'l2':
multi_w0 = np.reshape(w0, (n_classes, -1))
w0 = np.reshape(w0, (n_classes, -1))
else:
multi_w0 = np.reshape(w0, (-1, n_classes)).T
if coef_mask is not None:
multi_w0[:, :n_features] *= coef_mask
coefs.append(multi_w0.copy())
else:
if coef_mask is not None:
w0[:n_features] *= coef_mask
coefs.append(w0.copy())

n_iter[i] = n_iter_i
w0 = np.reshape(w0, (-1, n_classes)).T
coefs.append(w0.copy())

return np.array(coefs), np.array(Cs), n_iter

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
numpy>=1.14
h5py>=2.8
scikit-learn>=0.20
scikit-learn>=0.24
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def finalize_options(self):
# Versions should comply with PEP440. For a discussion on single-sourcing
# the version across setup.py and the project code, see
# https://packaging.python.org/en/latest/single_source_version.html
version='1.0.0',
version='1.1.0',

description='The Union of Intersections framework in Python.',
long_description=long_description,
Expand Down

0 comments on commit dbfeacc

Please sign in to comment.