diff --git a/.travis.yml b/.travis.yml index 194dec3..88aaffe 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,13 +1,9 @@ dist: xenial language: python -python: - - 3.6 +python: "3.6" env: - KERAS_BACKEND=tensorflow - KERAS_BACKEND=tensorflow TF_KERAS=1 - - KERAS_BACKEND=tensorflow TF_KERAS=1 TF_EAGER=1 - - KERAS_BACKEND=tensorflow TF_KERAS=1 TF_2=1 - - KERAS_BACKEND=theano THEANO_FLAGS=optimizer=fast_compile install: - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; - bash miniconda.sh -b -p $HOME/miniconda @@ -21,8 +17,6 @@ install: - pip install --upgrade pip - pip install -r requirements.txt - pip install -r requirements-dev.txt - - if [[ $TF_2 == "1" ]]; then pip install tensorflow==2.0.0-beta1; fi - - if [[ $KERAS_BACKEND == "theano" ]]; then pip install theano && conda install mkl mkl-service; fi - pip install coveralls script: - ./test.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 70c28d0..4f12062 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## [Unreleased] -## [0.0.0] - 2077-07-07 +## [0.0.6] - 2019-10-29 -### Added +### Fixed + +* Compatible with Keras 2.3.0 and TensorFlow 2.0.0 diff --git a/README.md b/README.md index 38c0d10..f928eb7 100644 --- a/README.md +++ b/README.md @@ -7,10 +7,7 @@ ![License](https://img.shields.io/pypi/l/keras-gradient-accumulation.svg) ![](https://img.shields.io/badge/keras-tensorflow-blue.svg) -![](https://img.shields.io/badge/keras-theano-blue.svg) ![](https://img.shields.io/badge/keras-tf.keras-blue.svg) -![](https://img.shields.io/badge/keras-tf.keras/eager-blue.svg) -![](https://img.shields.io/badge/keras-tf.keras/2.0_beta-blue.svg) \[[中文](https://github.com/CyberZHG/keras-gradient-accumulation/blob/master/README.zh-CN.md)|[English](https://github.com/CyberZHG/keras-gradient-accumulation/blob/master/README.md)\] diff --git a/README.zh-CN.md b/README.zh-CN.md index c11f805..b294e7a 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -7,10 +7,7 @@ ![License](https://img.shields.io/pypi/l/keras-gradient-accumulation.svg) ![](https://img.shields.io/badge/keras-tensorflow-blue.svg) -![](https://img.shields.io/badge/keras-theano-blue.svg) ![](https://img.shields.io/badge/keras-tf.keras-blue.svg) -![](https://img.shields.io/badge/keras-tf.keras/eager-blue.svg) -![](https://img.shields.io/badge/keras-tf.keras/2.0_beta-blue.svg) \[[中文](https://github.com/CyberZHG/keras-gradient-accumulation/blob/master/README.zh-CN.md)|[English](https://github.com/CyberZHG/keras-gradient-accumulation/blob/master/README.md)\] diff --git a/keras_gradient_accumulation/__init__.py b/keras_gradient_accumulation/__init__.py index 01e8e91..18e9691 100644 --- a/keras_gradient_accumulation/__init__.py +++ b/keras_gradient_accumulation/__init__.py @@ -1,2 +1,4 @@ from .optimizers import * from .selection import AdamAccumulated + +__version__ = '0.0.6' diff --git a/keras_gradient_accumulation/backend.py b/keras_gradient_accumulation/backend.py index dae3a96..edb6a55 100644 --- a/keras_gradient_accumulation/backend.py +++ b/keras_gradient_accumulation/backend.py @@ -1,25 +1,16 @@ import os +from distutils.util import strtobool __all__ = [ 'keras', 'utils', 'activations', 'applications', 'backend', 'datasets', 'engine', 'layers', 'preprocessing', 'wrappers', 'callbacks', 'constraints', 'initializers', - 'metrics', 'models', 'losses', 'optimizers', 'regularizers', 'TF_KERAS', 'EAGER_MODE' + 'metrics', 'models', 'losses', 'optimizers', 'regularizers', 'TF_KERAS', ] -TF_KERAS = False -EAGER_MODE = False +TF_KERAS = strtobool(os.environ.get('TF_KERAS', '0')) -if os.environ.get('TF_KERAS', '0') != '0': - import tensorflow as tf - from tensorflow.python import keras - TF_KERAS = True - if os.environ.get('TF_EAGER', '0') != '0': - try: - tf.enable_eager_execution() - raise AttributeError() - except AttributeError as e: - pass - EAGER_MODE = tf.executing_eagerly() +if TF_KERAS: + from tensorflow import keras else: import keras @@ -28,7 +19,6 @@ applications = keras.applications backend = keras.backend datasets = keras.datasets -engine = keras.engine layers = keras.layers preprocessing = keras.preprocessing wrappers = keras.wrappers diff --git a/keras_gradient_accumulation/optimizer_v1.py b/keras_gradient_accumulation/optimizer_v1.py index dc5dde4..c84dd03 100644 --- a/keras_gradient_accumulation/optimizer_v1.py +++ b/keras_gradient_accumulation/optimizer_v1.py @@ -4,6 +4,15 @@ __all__ = ['AdamAccumulated'] +def identity(x): + return x + + +symbolic = identity +if hasattr(K, 'symbolic'): + symbolic = K.symbolic + + class AdamAccumulated(keras.optimizers.Optimizer): """Adam optimizer with gradient accumulation. @@ -11,7 +20,7 @@ class AdamAccumulated(keras.optimizers.Optimizer): # Arguments accumulation_steps: int > 0. Update gradient in every accumulation steps. - lr: float >= 0. Learning rate. + learning_rate: float >= 0. Learning rate. beta_1: float, 0 < beta < 1. Generally close to 1. beta_2: float, 0 < beta < 1. Generally close to 1. epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. @@ -24,13 +33,14 @@ class AdamAccumulated(keras.optimizers.Optimizer): - [On the Convergence of Adam and Beyond](https://openreview.net/forum?id=ryQu7f-RZ) """ - def __init__(self, accumulation_steps, lr=0.001, beta_1=0.9, beta_2=0.999, + def __init__(self, accumulation_steps, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0., amsgrad=False, **kwargs): + learning_rate = kwargs.pop('lr', learning_rate) super(AdamAccumulated, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.accumulation_steps = K.variable(accumulation_steps, dtype='int64', name='accumulation_steps') - self.lr = K.variable(lr, name='lr') + self.learning_rate = K.variable(learning_rate, name='learning_rate') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.decay = K.variable(decay, name='decay') @@ -40,6 +50,7 @@ def __init__(self, accumulation_steps, lr=0.001, beta_1=0.9, beta_2=0.999, self.initial_decay = decay self.amsgrad = amsgrad + @symbolic def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] @@ -99,7 +110,7 @@ def get_updates(self, loss, params): def get_config(self): config = {'accumulation_steps': int(K.get_value(self.accumulation_steps)), - 'lr': float(K.get_value(self.lr)), + 'learning_rate': float(K.get_value(self.learning_rate)), 'beta_1': float(K.get_value(self.beta_1)), 'beta_2': float(K.get_value(self.beta_2)), 'decay': float(K.get_value(self.decay)), diff --git a/keras_gradient_accumulation/optimizers.py b/keras_gradient_accumulation/optimizers.py index 55f8ea8..be15bae 100644 --- a/keras_gradient_accumulation/optimizers.py +++ b/keras_gradient_accumulation/optimizers.py @@ -1,9 +1,20 @@ +import tensorflow as tf + from .backend import keras, optimizers, TF_KERAS from .backend import backend as K __all__ = ['GradientAccumulation'] +def identity(x): + return x + + +symbolic = identity +if hasattr(K, 'symbolic'): + symbolic = K.symbolic + + class GradientAccumulation(keras.optimizers.Optimizer): """Optimizer wrapper for gradient accumulation. @@ -13,7 +24,11 @@ class GradientAccumulation(keras.optimizers.Optimizer): momentum_names: A collection of strings. Names of momentum terms. """ - def __init__(self, optimizer, accumulation_steps, momentum_names=None, **kwargs): + def __init__(self, + optimizer, + accumulation_steps, + momentum_names=None, + **kwargs): super(GradientAccumulation, self).__init__(**kwargs) self.optimizer = optimizers.get(optimizer) with K.name_scope(self.__class__.__name__): @@ -22,33 +37,25 @@ def __init__(self, optimizer, accumulation_steps, momentum_names=None, **kwargs) if momentum_names is None: momentum_names = ['momentum', 'rho', 'beta_1', 'beta_2'] self.momentum_names = momentum_names - self._lr = self.optimizer.lr + self._lr = self.optimizer.learning_rate @property - def lr(self): - return self._lr + def learning_rate(self): + return self.optimizer.learning_rate - @lr.setter - def lr(self, lr): - self._lr = lr + @learning_rate.setter + def learning_rate(self, learning_rate): + self.optimizer.learning_rate = learning_rate + @symbolic def get_updates(self, loss, params): # Create accumulated gradients grads = self.get_gradients(loss, params) - if K.backend() == 'tensorflow': - from tensorflow.python.framework import ops - from tensorflow.python import state_ops - self.updates = [] - with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]): - update_cond = K.equal(self.iterations % self.accumulation_steps, 0) - sub_step = (self.iterations - 1) % self.accumulation_steps + 1 - fake_iterations = (self.iterations - 1) // self.accumulation_steps + 1 - else: - self.updates = [K.update_add(self.iterations, 1)] + self.updates = [] + with tf.control_dependencies([self.iterations.assign_add(1)]): update_cond = K.equal(self.iterations % self.accumulation_steps, 0) sub_step = (self.iterations - 1) % self.accumulation_steps + 1 fake_iterations = (self.iterations - 1) // self.accumulation_steps + 1 - fake_iterations = K.maximum(fake_iterations, 1) acc_grads = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] for grad, acc_grad in zip(grads, acc_grads): ave_grad = grad / K.cast(self.accumulation_steps, K.floatx()) @@ -85,7 +92,7 @@ def get_updates(self, loss, params): self.optimizer.iterations = fake_iterations # Use fake learning rate - self.optimizer.lr = K.switch(update_cond, self.lr, 0.0) + self.optimizer.learning_rate = K.switch(update_cond, self.lr, 0.0) # Freeze momentum momentum = {} @@ -101,7 +108,7 @@ def get_updates(self, loss, params): # Restore variables for name, value in momentum.items(): setattr(self.optimizer, name, value) - self.optimizer.lr = self._lr + self.optimizer.learning_rate = self._lr self.optimizer.iterations = original_iterations if TF_KERAS: from tensorflow.python import state_ops diff --git a/setup.py b/setup.py index 3b61650..dd35b26 100644 --- a/setup.py +++ b/setup.py @@ -1,16 +1,31 @@ +import os +import re import codecs from setuptools import setup, find_packages -with codecs.open('README.md', 'r', 'utf8') as reader: - long_description = reader.read() +current_path = os.path.abspath(os.path.dirname(__file__)) -with codecs.open('requirements.txt', 'r', 'utf8') as reader: - install_requires = list(map(lambda x: x.strip(), reader.readlines())) +def read_file(*parts): + with codecs.open(os.path.join(current_path, *parts), 'r', 'utf8') as reader: + return reader.read() + + +def get_requirements(*parts): + with codecs.open(os.path.join(current_path, *parts), 'r', 'utf8') as reader: + return list(map(lambda x: x.strip(), reader.readlines())) + + +def find_version(*file_paths): + version_file = read_file(*file_paths) + version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M) + if version_match: + return version_match.group(1) + raise RuntimeError('Unable to find version string.') setup( - name='keras-gradient-accumulation', + name=find_version('keras_gradient_accumulation', '__init__.py'), version='0.0.5', packages=find_packages(), url='https://github.com/CyberZHG/keras-gradient-accumulation', @@ -18,11 +33,11 @@ author='CyberZHG', author_email='CyberZHG@users.noreply.github.com', description='Gradient accumulation for Keras', - long_description=long_description, + long_description=read_file('README.md'), long_description_content_type='text/markdown', - install_requires=install_requires, + install_requires=get_requirements('requirements.txt'), classifiers=( - "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ), diff --git a/tests/test_optimizers.py b/tests/test_optimizers.py index fadc10a..91e5c50 100644 --- a/tests/test_optimizers.py +++ b/tests/test_optimizers.py @@ -3,7 +3,6 @@ from unittest import TestCase import numpy as np -from tensorflow.python.keras import optimizers from keras_gradient_accumulation.backend import keras, TF_KERAS from keras_gradient_accumulation.backend import backend as K @@ -67,40 +66,28 @@ def _test_accumulation(self, optimizer, acc_optimizer=None, **kwargs): self.assertTrue(np.allclose(actual, expected, atol=0.1), (actual, expected)) def test_update_lr(self): - optimizer = GradientAccumulation(keras.optimizers.SGD(), 128) - optimizer.lr = K.variable(K.get_value(optimizer.lr) * 0.5) + if TF_KERAS: + return + optimizer = GradientAccumulation('sgd', 128) + optimizer.learning_rate = K.get_value(optimizer.learning_rate) * 0.5 def test_sgd(self): if TF_KERAS: - optimizer = optimizers.SGD() - else: - optimizer = 'sgd' - self._test_accumulation(optimizer) + return + self._test_accumulation('sgd') def test_rmsprop(self): if TF_KERAS: - optimizer = optimizers.RMSprop() - else: - optimizer = 'rmsprop' - self._test_accumulation(optimizer) + return + self._test_accumulation('rmsprop') def test_adam(self): if TF_KERAS: - optimizer = optimizers.Adam() - else: - optimizer = 'adam' - self._test_accumulation(optimizer) + return + self._test_accumulation('adam') def test_adam_acc(self): - if TF_KERAS: - optimizer = optimizers.Adam() - else: - optimizer = 'adam' - self._test_accumulation(optimizer, AdamAccumulated(128, decay=1e-3), amsgrad=False, decay=1e-3) + self._test_accumulation('adam', AdamAccumulated(128, decay=1e-3), amsgrad=False, decay=1e-3) def test_adam_acc_amsgrad(self): - if TF_KERAS: - optimizer = optimizers.Adam() - else: - optimizer = 'adam' - self._test_accumulation(optimizer, AdamAccumulated(128, amsgrad=True, decay=1e-4), amsgrad=True, decay=1e-4) + self._test_accumulation('adam', AdamAccumulated(128, amsgrad=True, decay=1e-4), amsgrad=True, decay=1e-4)