#2 Compatible with Keras 2.3.0

CyberZHG · Oct 29, 2019 · 453d1f0 · 453d1f0
1 parent 7d0b512
commit 453d1f0
Show file tree

Hide file tree

Showing 10 changed files with 89 additions and 87 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,13 +1,9 @@
 dist: xenial
 language: python
-python:
-  - 3.6
+python: "3.6"
 env:
   - KERAS_BACKEND=tensorflow
   - KERAS_BACKEND=tensorflow TF_KERAS=1
-  - KERAS_BACKEND=tensorflow TF_KERAS=1 TF_EAGER=1
-  - KERAS_BACKEND=tensorflow TF_KERAS=1 TF_2=1
-  - KERAS_BACKEND=theano THEANO_FLAGS=optimizer=fast_compile
 install:
   - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
   - bash miniconda.sh -b -p $HOME/miniconda
@@ -21,8 +17,6 @@ install:
   - pip install --upgrade pip
   - pip install -r requirements.txt
   - pip install -r requirements-dev.txt
-  - if [[ $TF_2 == "1" ]]; then pip install tensorflow==2.0.0-beta1; fi
-  - if [[ $KERAS_BACKEND == "theano" ]]; then pip install theano && conda install mkl mkl-service; fi
   - pip install coveralls
 script:
   - ./test.sh

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,8 @@
 
 ## [Unreleased]
 
-## [0.0.0] - 2077-07-07
+## [0.0.6] - 2019-10-29
 
-### Added
+### Fixed
+
+* Compatible with Keras 2.3.0 and TensorFlow 2.0.0
diff --git a/README.md b/README.md
@@ -7,10 +7,7 @@
 ![License](https://img.shields.io/pypi/l/keras-gradient-accumulation.svg)
 
 ![](https://img.shields.io/badge/keras-tensorflow-blue.svg)
-![](https://img.shields.io/badge/keras-theano-blue.svg)
 ![](https://img.shields.io/badge/keras-tf.keras-blue.svg)
-![](https://img.shields.io/badge/keras-tf.keras/eager-blue.svg)
-![](https://img.shields.io/badge/keras-tf.keras/2.0_beta-blue.svg)
 
 \[[中文](https://github.com/CyberZHG/keras-gradient-accumulation/blob/master/README.zh-CN.md)|[English](https://github.com/CyberZHG/keras-gradient-accumulation/blob/master/README.md)\]
 

diff --git a/README.zh-CN.md b/README.zh-CN.md
@@ -7,10 +7,7 @@
 ![License](https://img.shields.io/pypi/l/keras-gradient-accumulation.svg)
 
 ![](https://img.shields.io/badge/keras-tensorflow-blue.svg)
-![](https://img.shields.io/badge/keras-theano-blue.svg)
 ![](https://img.shields.io/badge/keras-tf.keras-blue.svg)
-![](https://img.shields.io/badge/keras-tf.keras/eager-blue.svg)
-![](https://img.shields.io/badge/keras-tf.keras/2.0_beta-blue.svg)
 
 \[[中文](https://github.com/CyberZHG/keras-gradient-accumulation/blob/master/README.zh-CN.md)|[English](https://github.com/CyberZHG/keras-gradient-accumulation/blob/master/README.md)\]
 

diff --git a/keras_gradient_accumulation/__init__.py b/keras_gradient_accumulation/__init__.py
@@ -1,2 +1,4 @@
 from .optimizers import *
 from .selection import AdamAccumulated
+
+__version__ = '0.0.6'
diff --git a/keras_gradient_accumulation/backend.py b/keras_gradient_accumulation/backend.py
@@ -1,25 +1,16 @@
 import os
+from distutils.util import strtobool
 
 __all__ = [
     'keras', 'utils', 'activations', 'applications', 'backend', 'datasets', 'engine',
     'layers', 'preprocessing', 'wrappers', 'callbacks', 'constraints', 'initializers',
-    'metrics', 'models', 'losses', 'optimizers', 'regularizers', 'TF_KERAS', 'EAGER_MODE'
+    'metrics', 'models', 'losses', 'optimizers', 'regularizers', 'TF_KERAS',
 ]
 
-TF_KERAS = False
-EAGER_MODE = False
+TF_KERAS = strtobool(os.environ.get('TF_KERAS', '0'))
 
-if os.environ.get('TF_KERAS', '0') != '0':
-    import tensorflow as tf
-    from tensorflow.python import keras
-    TF_KERAS = True
-    if os.environ.get('TF_EAGER', '0') != '0':
-        try:
-            tf.enable_eager_execution()
-            raise AttributeError()
-        except AttributeError as e:
-            pass
-    EAGER_MODE = tf.executing_eagerly()
+if TF_KERAS:
+    from tensorflow import keras
 else:
     import keras
 
@@ -28,7 +19,6 @@
 applications = keras.applications
 backend = keras.backend
 datasets = keras.datasets
-engine = keras.engine
 layers = keras.layers
 preprocessing = keras.preprocessing
 wrappers = keras.wrappers

diff --git a/keras_gradient_accumulation/optimizer_v1.py b/keras_gradient_accumulation/optimizer_v1.py
@@ -4,14 +4,23 @@
 __all__ = ['AdamAccumulated']
 
 
+def identity(x):
+    return x
+
+
+symbolic = identity
+if hasattr(K, 'symbolic'):
+    symbolic = K.symbolic
+
+
 class AdamAccumulated(keras.optimizers.Optimizer):
     """Adam optimizer with gradient accumulation.
 
     Default parameters follow those provided in the original paper.
 
     # Arguments
         accumulation_steps: int > 0. Update gradient in every accumulation steps.
-        lr: float >= 0. Learning rate.
+        learning_rate: float >= 0. Learning rate.
         beta_1: float, 0 < beta < 1. Generally close to 1.
         beta_2: float, 0 < beta < 1. Generally close to 1.
         epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`.
@@ -24,13 +33,14 @@ class AdamAccumulated(keras.optimizers.Optimizer):
         - [On the Convergence of Adam and Beyond](https://openreview.net/forum?id=ryQu7f-RZ)
     """
 
-    def __init__(self, accumulation_steps, lr=0.001, beta_1=0.9, beta_2=0.999,
+    def __init__(self, accumulation_steps, learning_rate=0.001, beta_1=0.9, beta_2=0.999,
                  epsilon=None, decay=0., amsgrad=False, **kwargs):
+        learning_rate = kwargs.pop('lr', learning_rate)
         super(AdamAccumulated, self).__init__(**kwargs)
         with K.name_scope(self.__class__.__name__):
             self.iterations = K.variable(0, dtype='int64', name='iterations')
             self.accumulation_steps = K.variable(accumulation_steps, dtype='int64', name='accumulation_steps')
-            self.lr = K.variable(lr, name='lr')
+            self.learning_rate = K.variable(learning_rate, name='learning_rate')
             self.beta_1 = K.variable(beta_1, name='beta_1')
             self.beta_2 = K.variable(beta_2, name='beta_2')
             self.decay = K.variable(decay, name='decay')
@@ -40,6 +50,7 @@ def __init__(self, accumulation_steps, lr=0.001, beta_1=0.9, beta_2=0.999,
         self.initial_decay = decay
         self.amsgrad = amsgrad
 
+    @symbolic
     def get_updates(self, loss, params):
         grads = self.get_gradients(loss, params)
         self.updates = [K.update_add(self.iterations, 1)]
@@ -99,7 +110,7 @@ def get_updates(self, loss, params):
 
     def get_config(self):
         config = {'accumulation_steps': int(K.get_value(self.accumulation_steps)),
-                  'lr': float(K.get_value(self.lr)),
+                  'learning_rate': float(K.get_value(self.learning_rate)),
                   'beta_1': float(K.get_value(self.beta_1)),
                   'beta_2': float(K.get_value(self.beta_2)),
                   'decay': float(K.get_value(self.decay)),

diff --git a/keras_gradient_accumulation/optimizers.py b/keras_gradient_accumulation/optimizers.py
@@ -1,9 +1,20 @@
+import tensorflow as tf
+
 from .backend import keras, optimizers, TF_KERAS
 from .backend import backend as K
 
 __all__ = ['GradientAccumulation']
 
 
+def identity(x):
+    return x
+
+
+symbolic = identity
+if hasattr(K, 'symbolic'):
+    symbolic = K.symbolic
+
+
 class GradientAccumulation(keras.optimizers.Optimizer):
     """Optimizer wrapper for gradient accumulation.
 
@@ -13,7 +24,11 @@ class GradientAccumulation(keras.optimizers.Optimizer):
         momentum_names: A collection of strings. Names of momentum terms.
     """
 
-    def __init__(self, optimizer, accumulation_steps, momentum_names=None, **kwargs):
+    def __init__(self,
+                 optimizer,
+                 accumulation_steps,
+                 momentum_names=None,
+                 **kwargs):
         super(GradientAccumulation, self).__init__(**kwargs)
         self.optimizer = optimizers.get(optimizer)
         with K.name_scope(self.__class__.__name__):
@@ -22,33 +37,25 @@ def __init__(self, optimizer, accumulation_steps, momentum_names=None, **kwargs)
         if momentum_names is None:
             momentum_names = ['momentum', 'rho', 'beta_1', 'beta_2']
         self.momentum_names = momentum_names
-        self._lr = self.optimizer.lr
+        self._lr = self.optimizer.learning_rate
 
     @property
-    def lr(self):
-        return self._lr
+    def learning_rate(self):
+        return self.optimizer.learning_rate
 
-    @lr.setter
-    def lr(self, lr):
-        self._lr = lr
+    @learning_rate.setter
+    def learning_rate(self, learning_rate):
+        self.optimizer.learning_rate = learning_rate
 
+    @symbolic
     def get_updates(self, loss, params):
         # Create accumulated gradients
         grads = self.get_gradients(loss, params)
-        if K.backend() == 'tensorflow':
-            from tensorflow.python.framework import ops
-            from tensorflow.python import state_ops
-            self.updates = []
-            with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]):
-                update_cond = K.equal(self.iterations % self.accumulation_steps, 0)
-                sub_step = (self.iterations - 1) % self.accumulation_steps + 1
-                fake_iterations = (self.iterations - 1) // self.accumulation_steps + 1
-        else:
-            self.updates = [K.update_add(self.iterations, 1)]
+        self.updates = []
+        with tf.control_dependencies([self.iterations.assign_add(1)]):
             update_cond = K.equal(self.iterations % self.accumulation_steps, 0)
             sub_step = (self.iterations - 1) % self.accumulation_steps + 1
             fake_iterations = (self.iterations - 1) // self.accumulation_steps + 1
-            fake_iterations = K.maximum(fake_iterations, 1)
         acc_grads = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
         for grad, acc_grad in zip(grads, acc_grads):
             ave_grad = grad / K.cast(self.accumulation_steps, K.floatx())
@@ -85,7 +92,7 @@ def get_updates(self, loss, params):
         self.optimizer.iterations = fake_iterations
 
         # Use fake learning rate
-        self.optimizer.lr = K.switch(update_cond, self.lr, 0.0)
+        self.optimizer.learning_rate = K.switch(update_cond, self.lr, 0.0)
 
         # Freeze momentum
         momentum = {}
@@ -101,7 +108,7 @@ def get_updates(self, loss, params):
         # Restore variables
         for name, value in momentum.items():
             setattr(self.optimizer, name, value)
-        self.optimizer.lr = self._lr
+        self.optimizer.learning_rate = self._lr
         self.optimizer.iterations = original_iterations
         if TF_KERAS:
             from tensorflow.python import state_ops

diff --git a/setup.py b/setup.py
@@ -1,28 +1,43 @@
+import os
+import re
 import codecs
 from setuptools import setup, find_packages
 
-with codecs.open('README.md', 'r', 'utf8') as reader:
-    long_description = reader.read()
+current_path = os.path.abspath(os.path.dirname(__file__))
 
 
-with codecs.open('requirements.txt', 'r', 'utf8') as reader:
-    install_requires = list(map(lambda x: x.strip(), reader.readlines()))
+def read_file(*parts):
+    with codecs.open(os.path.join(current_path, *parts), 'r', 'utf8') as reader:
+        return reader.read()
+
+
+def get_requirements(*parts):
+    with codecs.open(os.path.join(current_path, *parts), 'r', 'utf8') as reader:
+        return list(map(lambda x: x.strip(), reader.readlines()))
+
+
+def find_version(*file_paths):
+    version_file = read_file(*file_paths)
+    version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M)
+    if version_match:
+        return version_match.group(1)
+    raise RuntimeError('Unable to find version string.')
 
 
 setup(
-    name='keras-gradient-accumulation',
+    name=find_version('keras_gradient_accumulation', '__init__.py'),
     version='0.0.5',
     packages=find_packages(),
     url='https://github.com/CyberZHG/keras-gradient-accumulation',
     license='MIT',
     author='CyberZHG',
     author_email='CyberZHG@users.noreply.github.com',
     description='Gradient accumulation for Keras',
-    long_description=long_description,
+    long_description=read_file('README.md'),
     long_description_content_type='text/markdown',
-    install_requires=install_requires,
+    install_requires=get_requirements('requirements.txt'),
     classifiers=(
-        "Programming Language :: Python :: 3.6",
+        "Programming Language :: Python :: 3",
         "License :: OSI Approved :: MIT License",
         "Operating System :: OS Independent",
     ),

diff --git a/tests/test_optimizers.py b/tests/test_optimizers.py
@@ -3,7 +3,6 @@
 from unittest import TestCase
 
 import numpy as np
-from tensorflow.python.keras import optimizers
 
 from keras_gradient_accumulation.backend import keras, TF_KERAS
 from keras_gradient_accumulation.backend import backend as K
@@ -67,40 +66,28 @@ def _test_accumulation(self, optimizer, acc_optimizer=None, **kwargs):
         self.assertTrue(np.allclose(actual, expected, atol=0.1), (actual, expected))
 
     def test_update_lr(self):
-        optimizer = GradientAccumulation(keras.optimizers.SGD(), 128)
-        optimizer.lr = K.variable(K.get_value(optimizer.lr) * 0.5)
+        if TF_KERAS:
+            return
+        optimizer = GradientAccumulation('sgd', 128)
+        optimizer.learning_rate = K.get_value(optimizer.learning_rate) * 0.5
 
     def test_sgd(self):
         if TF_KERAS:
-            optimizer = optimizers.SGD()
-        else:
-            optimizer = 'sgd'
-        self._test_accumulation(optimizer)
+            return
+        self._test_accumulation('sgd')
 
     def test_rmsprop(self):
         if TF_KERAS:
-            optimizer = optimizers.RMSprop()
-        else:
-            optimizer = 'rmsprop'
-        self._test_accumulation(optimizer)
+            return
+        self._test_accumulation('rmsprop')
 
     def test_adam(self):
         if TF_KERAS:
-            optimizer = optimizers.Adam()
-        else:
-            optimizer = 'adam'
-        self._test_accumulation(optimizer)
+            return
+        self._test_accumulation('adam')
 
     def test_adam_acc(self):
-        if TF_KERAS:
-            optimizer = optimizers.Adam()
-        else:
-            optimizer = 'adam'
-        self._test_accumulation(optimizer, AdamAccumulated(128, decay=1e-3), amsgrad=False, decay=1e-3)
+        self._test_accumulation('adam', AdamAccumulated(128, decay=1e-3), amsgrad=False, decay=1e-3)
 
     def test_adam_acc_amsgrad(self):
-        if TF_KERAS:
-            optimizer = optimizers.Adam()
-        else:
-            optimizer = 'adam'
-        self._test_accumulation(optimizer, AdamAccumulated(128, amsgrad=True, decay=1e-4), amsgrad=True, decay=1e-4)
+        self._test_accumulation('adam', AdamAccumulated(128, amsgrad=True, decay=1e-4), amsgrad=True, decay=1e-4)