Skip to content
This repository has been archived by the owner on Mar 3, 2024. It is now read-only.

Commit

Permalink
Support tf.keras
Browse files Browse the repository at this point in the history
  • Loading branch information
CyberZHG committed May 12, 2019
1 parent 9cf93e9 commit 39bd05c
Show file tree
Hide file tree
Showing 9 changed files with 151 additions and 24 deletions.
36 changes: 33 additions & 3 deletions .travis.yml
@@ -1,14 +1,44 @@
dist: xenial
language: python
python:
- 2.7
- 3.6
env:
- KERAS_BACKEND=tensorflow
- KERAS_BACKEND=tensorflow TF_KERAS=1
- KERAS_BACKEND=theano THEANO_FLAGS=optimizer=fast_compile
# - KERAS_BACKEND=cntk PYTHONWARNINGS=ignore
install:
- if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh;
else
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
fi
- bash miniconda.sh -b -p $HOME/miniconda
- export PATH="$HOME/miniconda/bin:$PATH"
- conda config --set always_yes yes --set changeps1 no
- conda update -q conda
- conda info -a
- conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION
- source activate test-environment
- export LD_LIBRARY_PATH=$HOME/miniconda/envs/test-environment/lib/:$LD_LIBRARY_PATH
- pip install --upgrade pip
- pip install -r requirements.txt
- pip install -r requirements-dev.txt
- if [[ $KERAS_BACKEND == "theano" ]]; then pip install theano && conda install mkl mkl-service; fi
- if [[ "$KERAS_BACKEND" == "cntk" ]]; then
set -e &&
pip install cntk &&
mkdir -p ~/mpi &&
pushd ~/mpi &&
wget http://cntk.ai/PythonWheel/ForKeras/depends/openmpi_1.10-3.zip &&
unzip ./openmpi_1.10-3.zip &&
sudo dpkg -i openmpi_1.10-3.deb &&
popd;
fi
- if [[ $TRAVIS_PYTHON_VERSION == 3.6 ]]; then pip install adabound; fi
- pip install coveralls
before_script:
- bash lint.sh
script:
- bash test.sh
- ./test.sh
after_success:
coveralls
46 changes: 46 additions & 0 deletions keras_adabound/backend.py
@@ -0,0 +1,46 @@
import os

__all__ = [
'keras', 'utils', 'activations', 'applications', 'backend', 'datasets', 'engine',
'layers', 'preprocessing', 'wrappers', 'callbacks', 'constraints', 'initializers',
'metrics', 'models', 'losses', 'optimizers', 'regularizers',
]

if 'TF_KERAS' in os.environ and os.environ['TF_KERAS'] != '0':
from tensorflow.python import keras
from tensorflow.python.keras import utils
from tensorflow.python.keras import activations
from tensorflow.python.keras import applications
from tensorflow.python.keras import backend
from tensorflow.python.keras import datasets
from tensorflow.python.keras import engine
from tensorflow.python.keras import layers
from tensorflow.python.keras import preprocessing
from tensorflow.python.keras import wrappers
from tensorflow.python.keras import callbacks
from tensorflow.python.keras import constraints
from tensorflow.python.keras import initializers
from tensorflow.python.keras import metrics
from tensorflow.python.keras import models
from tensorflow.python.keras import losses
from tensorflow.python.keras import optimizers
from tensorflow.python.keras import regularizers
else:
import keras
from keras import utils
from keras import activations
from keras import applications
from keras import backend
from keras import datasets
from keras import engine
from keras import layers
from keras import preprocessing
from keras import wrappers
from keras import callbacks
from keras import constraints
from keras import initializers
from keras import metrics
from keras import models
from keras import losses
from keras import optimizers
from keras import regularizers
16 changes: 12 additions & 4 deletions keras_adabound/optimizers.py
@@ -1,5 +1,5 @@
import keras
import keras.backend as K
from .backend import keras
from .backend import backend as K


class AdaBound(keras.optimizers.Optimizer):
Expand All @@ -14,6 +14,7 @@ class AdaBound(keras.optimizers.Optimizer):
gamma: float, 0 < gamma < 1. Convergence speed of the bound functions.
epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`.
decay: float >= 0. Learning rate decay over each update.
weight_decay: float >= 0. Weight decay.
amsgrad: boolean. Whether to apply the AMSGrad variant of this algorithm.
# References
Expand All @@ -23,7 +24,7 @@ class AdaBound(keras.optimizers.Optimizer):

def __init__(self, lr=0.001, final_lr=0.1, base_lr=None,
beta_1=0.9, beta_2=0.999, gamma=0.001,
epsilon=None, decay=0., amsgrad=False, **kwargs):
epsilon=None, decay=0., weight_decay=0., amsgrad=False, **kwargs):
super(AdaBound, self).__init__(**kwargs)
with K.name_scope(self.__class__.__name__):
self.iterations = K.variable(0, dtype='int64', name='iterations')
Expand All @@ -33,6 +34,7 @@ def __init__(self, lr=0.001, final_lr=0.1, base_lr=None,
self.beta_2 = K.variable(beta_2, name='beta_2')
self.gamma = K.variable(gamma, name='gamma')
self.decay = K.variable(decay, name='decay')
self.weight_decay = K.variable(weight_decay, name='weight_decay')
if epsilon is None:
epsilon = K.epsilon()
if base_lr is None:
Expand All @@ -41,6 +43,7 @@ def __init__(self, lr=0.001, final_lr=0.1, base_lr=None,
self.base_lr = base_lr
self.epsilon = epsilon
self.initial_decay = decay
self.initial_weight_decay = weight_decay
self.amsgrad = amsgrad

def get_updates(self, loss, params):
Expand Down Expand Up @@ -68,6 +71,11 @@ def get_updates(self, loss, params):
self.weights = [self.iterations] + ms + vs + vhats

for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
if self.initial_weight_decay > 0.:
# Note that the decayed weights are added to the momentums.
# The mechanism is the same as the official repo.
g += self.weight_decay * p

m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

Expand All @@ -78,7 +86,6 @@ def get_updates(self, loss, params):
else:
step = lr_t / (K.sqrt(v_t) + self.epsilon)
p_t = p - K.minimum(K.maximum(step, lower_bound), upper_bound) * m_t

self.updates.append(K.update(m, m_t))
self.updates.append(K.update(v, v_t))
new_p = p_t
Expand All @@ -98,6 +105,7 @@ def get_config(self):
'beta_2': float(K.get_value(self.beta_2)),
'gamma': float(K.get_value(self.gamma)),
'decay': float(K.get_value(self.decay)),
'weight_decay': float(K.get_value(self.weight_decay)),
'epsilon': self.epsilon,
'amsgrad': self.amsgrad}
base_config = super(AdaBound, self).get_config()
Expand Down
2 changes: 0 additions & 2 deletions lint.sh

This file was deleted.

3 changes: 1 addition & 2 deletions requirements-dev.txt
Expand Up @@ -2,8 +2,7 @@ setuptools>=38.6.0
twine>=1.11.0
wheel>=0.31.0
torch
tensorflow
adabound
nose
tensorflow
pycodestyle
coverage
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -12,7 +12,7 @@

setup(
name='keras-adabound',
version='0.4.1',
version='0.5.0',
packages=find_packages(),
url='https://github.com/CyberZHG/keras-adabound',
license='MIT',
Expand Down
3 changes: 2 additions & 1 deletion test.sh
@@ -1,2 +1,3 @@
#!/usr/bin/env bash
nosetests --with-coverage --cover-erase --cover-html --cover-html-dir=htmlcov --cover-package="keras_adabound" tests
pycodestyle --max-line-length=120 keras_adabound tests && \
nosetests --with-coverage --cover-erase --cover-html --cover-html-dir=htmlcov --cover-package=keras_adabound tests
8 changes: 3 additions & 5 deletions tests/test_optimize.py
@@ -1,27 +1,25 @@
import os
import tempfile
import random
from unittest import TestCase
import keras
import keras.backend as K
import numpy as np
import tensorflow as tf
from keras_adabound.backend import keras
from keras_adabound.backend import backend as K
from keras_adabound import AdaBound


class TestOptimizers(TestCase):

@staticmethod
def reset_seed(seed):
random.seed(seed)
np.random.seed(seed)
tf.set_random_seed(seed)

@staticmethod
def gen_keras_linear(w, b, amsgrad=False):
model = keras.models.Sequential()
model.add(keras.layers.Dense(input_shape=(3,), units=5, weights=[w, b]))
model.compile(optimizer=AdaBound(lr=1e-3, final_lr=0.1, amsgrad=amsgrad), loss='mse')
model.compile(optimizer=AdaBound(lr=1e-3, final_lr=0.1, amsgrad=amsgrad, weight_decay=1e-3), loss='mse')
return model

@staticmethod
Expand Down
59 changes: 53 additions & 6 deletions tests/test_similar.py
@@ -1,12 +1,12 @@
import os
import sys
import tempfile
import random
from unittest import TestCase
import keras
import keras.backend as K
import torch
import numpy as np
import tensorflow as tf
from keras_adabound.backend import keras
from keras_adabound.backend import backend as K
from adabound import AdaBound as OfficialAdaBound
from keras_adabound import AdaBound

Expand All @@ -15,7 +15,6 @@ class TestOptimizers(TestCase):

@staticmethod
def reset_seed(seed):
random.seed(seed)
np.random.seed(seed)
tf.set_random_seed(seed)
torch.manual_seed(seed)
Expand All @@ -28,17 +27,24 @@ def gen_torch_linear(w, b):
return linear

@staticmethod
def gen_keras_linear(w, b, amsgrad=False):
def gen_keras_linear(w, b, amsgrad=False, weight_decay=0.):
model = keras.models.Sequential()
model.add(keras.layers.Dense(input_shape=(3,), units=5, weights=[w, b]))
model.compile(optimizer=AdaBound(lr=1e-3, final_lr=0.1, amsgrad=amsgrad), loss='mse')
model.compile(optimizer=AdaBound(
lr=1e-3,
final_lr=0.1,
amsgrad=amsgrad,
weight_decay=weight_decay,
), loss='mse')
return model

@staticmethod
def gen_random_weights():
return np.random.standard_normal((3, 5)), np.random.standard_normal((5,))

def test_same(self):
if sys.version_info[0] < 3:
return
self.reset_seed(0xcafe)
w, b = self.gen_random_weights()
torch_linear = self.gen_torch_linear(w, b)
Expand Down Expand Up @@ -73,6 +79,8 @@ def test_same(self):
))

def test_same_amsgrad(self):
if sys.version_info[0] < 3:
return
self.reset_seed(0xcafe)
w, b = self.gen_random_weights()
torch_linear = self.gen_torch_linear(w, b)
Expand Down Expand Up @@ -108,3 +116,42 @@ def test_same_amsgrad(self):
keras_linear.get_weights()[1],
atol=1e-2,
))

def test_same_weight_decay(self):
if sys.version_info[0] < 3:
return
self.reset_seed(0xcafe)
w, b = self.gen_random_weights()
torch_linear = self.gen_torch_linear(w, b)
keras_linear = self.gen_keras_linear(w, b, weight_decay=0.1)
w, b = self.gen_random_weights()
criterion = torch.nn.MSELoss()
optimizer = OfficialAdaBound(
torch_linear.parameters(),
lr=1e-3,
final_lr=0.1,
eps=K.epsilon(),
weight_decay=0.1,
)
for i in range(300):
x = np.random.standard_normal((1, 3))
y = np.dot(x, w) + b
optimizer.zero_grad()
y_hat = torch_linear(torch.Tensor(x.tolist()))
loss = criterion(y_hat, torch.Tensor(y.tolist()))
torch_loss = loss.tolist()
loss.backward()
optimizer.step()
keras_loss = keras_linear.train_on_batch(x, y).tolist()
# print(i, torch_loss, keras_loss)
self.assertTrue(abs(torch_loss - keras_loss) < 1e-2)
self.assertTrue(np.allclose(
torch_linear.weight.detach().numpy().transpose(),
keras_linear.get_weights()[0],
atol=1e-2,
))
self.assertTrue(np.allclose(
torch_linear.bias.detach().numpy(),
keras_linear.get_weights()[1],
atol=1e-2,
))

0 comments on commit 39bd05c

Please sign in to comment.