Skip to content
This repository has been archived by the owner on Jun 27, 2021. It is now read-only.

Commit

Permalink
#2 Compatible with Keras 2.3.0
Browse files Browse the repository at this point in the history
  • Loading branch information
CyberZHG committed Oct 29, 2019
1 parent 7d0b512 commit 453d1f0
Show file tree
Hide file tree
Showing 10 changed files with 89 additions and 87 deletions.
8 changes: 1 addition & 7 deletions .travis.yml
@@ -1,13 +1,9 @@
dist: xenial
language: python
python:
- 3.6
python: "3.6"
env:
- KERAS_BACKEND=tensorflow
- KERAS_BACKEND=tensorflow TF_KERAS=1
- KERAS_BACKEND=tensorflow TF_KERAS=1 TF_EAGER=1
- KERAS_BACKEND=tensorflow TF_KERAS=1 TF_2=1
- KERAS_BACKEND=theano THEANO_FLAGS=optimizer=fast_compile
install:
- wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
- bash miniconda.sh -b -p $HOME/miniconda
Expand All @@ -21,8 +17,6 @@ install:
- pip install --upgrade pip
- pip install -r requirements.txt
- pip install -r requirements-dev.txt
- if [[ $TF_2 == "1" ]]; then pip install tensorflow==2.0.0-beta1; fi
- if [[ $KERAS_BACKEND == "theano" ]]; then pip install theano && conda install mkl mkl-service; fi
- pip install coveralls
script:
- ./test.sh
Expand Down
6 changes: 4 additions & 2 deletions CHANGELOG.md
Expand Up @@ -2,6 +2,8 @@

## [Unreleased]

## [0.0.0] - 2077-07-07
## [0.0.6] - 2019-10-29

### Added
### Fixed

* Compatible with Keras 2.3.0 and TensorFlow 2.0.0
3 changes: 0 additions & 3 deletions README.md
Expand Up @@ -7,10 +7,7 @@
![License](https://img.shields.io/pypi/l/keras-gradient-accumulation.svg)

![](https://img.shields.io/badge/keras-tensorflow-blue.svg)
![](https://img.shields.io/badge/keras-theano-blue.svg)
![](https://img.shields.io/badge/keras-tf.keras-blue.svg)
![](https://img.shields.io/badge/keras-tf.keras/eager-blue.svg)
![](https://img.shields.io/badge/keras-tf.keras/2.0_beta-blue.svg)

\[[中文](https://github.com/CyberZHG/keras-gradient-accumulation/blob/master/README.zh-CN.md)|[English](https://github.com/CyberZHG/keras-gradient-accumulation/blob/master/README.md)\]

Expand Down
3 changes: 0 additions & 3 deletions README.zh-CN.md
Expand Up @@ -7,10 +7,7 @@
![License](https://img.shields.io/pypi/l/keras-gradient-accumulation.svg)

![](https://img.shields.io/badge/keras-tensorflow-blue.svg)
![](https://img.shields.io/badge/keras-theano-blue.svg)
![](https://img.shields.io/badge/keras-tf.keras-blue.svg)
![](https://img.shields.io/badge/keras-tf.keras/eager-blue.svg)
![](https://img.shields.io/badge/keras-tf.keras/2.0_beta-blue.svg)

\[[中文](https://github.com/CyberZHG/keras-gradient-accumulation/blob/master/README.zh-CN.md)|[English](https://github.com/CyberZHG/keras-gradient-accumulation/blob/master/README.md)\]

Expand Down
2 changes: 2 additions & 0 deletions keras_gradient_accumulation/__init__.py
@@ -1,2 +1,4 @@
from .optimizers import *
from .selection import AdamAccumulated

__version__ = '0.0.6'
20 changes: 5 additions & 15 deletions keras_gradient_accumulation/backend.py
@@ -1,25 +1,16 @@
import os
from distutils.util import strtobool

__all__ = [
'keras', 'utils', 'activations', 'applications', 'backend', 'datasets', 'engine',
'layers', 'preprocessing', 'wrappers', 'callbacks', 'constraints', 'initializers',
'metrics', 'models', 'losses', 'optimizers', 'regularizers', 'TF_KERAS', 'EAGER_MODE'
'metrics', 'models', 'losses', 'optimizers', 'regularizers', 'TF_KERAS',
]

TF_KERAS = False
EAGER_MODE = False
TF_KERAS = strtobool(os.environ.get('TF_KERAS', '0'))

if os.environ.get('TF_KERAS', '0') != '0':
import tensorflow as tf
from tensorflow.python import keras
TF_KERAS = True
if os.environ.get('TF_EAGER', '0') != '0':
try:
tf.enable_eager_execution()
raise AttributeError()
except AttributeError as e:
pass
EAGER_MODE = tf.executing_eagerly()
if TF_KERAS:
from tensorflow import keras
else:
import keras

Expand All @@ -28,7 +19,6 @@
applications = keras.applications
backend = keras.backend
datasets = keras.datasets
engine = keras.engine
layers = keras.layers
preprocessing = keras.preprocessing
wrappers = keras.wrappers
Expand Down
19 changes: 15 additions & 4 deletions keras_gradient_accumulation/optimizer_v1.py
Expand Up @@ -4,14 +4,23 @@
__all__ = ['AdamAccumulated']


def identity(x):
return x


symbolic = identity
if hasattr(K, 'symbolic'):
symbolic = K.symbolic


class AdamAccumulated(keras.optimizers.Optimizer):
"""Adam optimizer with gradient accumulation.
Default parameters follow those provided in the original paper.
# Arguments
accumulation_steps: int > 0. Update gradient in every accumulation steps.
lr: float >= 0. Learning rate.
learning_rate: float >= 0. Learning rate.
beta_1: float, 0 < beta < 1. Generally close to 1.
beta_2: float, 0 < beta < 1. Generally close to 1.
epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`.
Expand All @@ -24,13 +33,14 @@ class AdamAccumulated(keras.optimizers.Optimizer):
- [On the Convergence of Adam and Beyond](https://openreview.net/forum?id=ryQu7f-RZ)
"""

def __init__(self, accumulation_steps, lr=0.001, beta_1=0.9, beta_2=0.999,
def __init__(self, accumulation_steps, learning_rate=0.001, beta_1=0.9, beta_2=0.999,
epsilon=None, decay=0., amsgrad=False, **kwargs):
learning_rate = kwargs.pop('lr', learning_rate)
super(AdamAccumulated, self).__init__(**kwargs)
with K.name_scope(self.__class__.__name__):
self.iterations = K.variable(0, dtype='int64', name='iterations')
self.accumulation_steps = K.variable(accumulation_steps, dtype='int64', name='accumulation_steps')
self.lr = K.variable(lr, name='lr')
self.learning_rate = K.variable(learning_rate, name='learning_rate')
self.beta_1 = K.variable(beta_1, name='beta_1')
self.beta_2 = K.variable(beta_2, name='beta_2')
self.decay = K.variable(decay, name='decay')
Expand All @@ -40,6 +50,7 @@ def __init__(self, accumulation_steps, lr=0.001, beta_1=0.9, beta_2=0.999,
self.initial_decay = decay
self.amsgrad = amsgrad

@symbolic
def get_updates(self, loss, params):
grads = self.get_gradients(loss, params)
self.updates = [K.update_add(self.iterations, 1)]
Expand Down Expand Up @@ -99,7 +110,7 @@ def get_updates(self, loss, params):

def get_config(self):
config = {'accumulation_steps': int(K.get_value(self.accumulation_steps)),
'lr': float(K.get_value(self.lr)),
'learning_rate': float(K.get_value(self.learning_rate)),
'beta_1': float(K.get_value(self.beta_1)),
'beta_2': float(K.get_value(self.beta_2)),
'decay': float(K.get_value(self.decay)),
Expand Down
47 changes: 27 additions & 20 deletions keras_gradient_accumulation/optimizers.py
@@ -1,9 +1,20 @@
import tensorflow as tf

from .backend import keras, optimizers, TF_KERAS
from .backend import backend as K

__all__ = ['GradientAccumulation']


def identity(x):
return x


symbolic = identity
if hasattr(K, 'symbolic'):
symbolic = K.symbolic


class GradientAccumulation(keras.optimizers.Optimizer):
"""Optimizer wrapper for gradient accumulation.
Expand All @@ -13,7 +24,11 @@ class GradientAccumulation(keras.optimizers.Optimizer):
momentum_names: A collection of strings. Names of momentum terms.
"""

def __init__(self, optimizer, accumulation_steps, momentum_names=None, **kwargs):
def __init__(self,
optimizer,
accumulation_steps,
momentum_names=None,
**kwargs):
super(GradientAccumulation, self).__init__(**kwargs)
self.optimizer = optimizers.get(optimizer)
with K.name_scope(self.__class__.__name__):
Expand All @@ -22,33 +37,25 @@ def __init__(self, optimizer, accumulation_steps, momentum_names=None, **kwargs)
if momentum_names is None:
momentum_names = ['momentum', 'rho', 'beta_1', 'beta_2']
self.momentum_names = momentum_names
self._lr = self.optimizer.lr
self._lr = self.optimizer.learning_rate

@property
def lr(self):
return self._lr
def learning_rate(self):
return self.optimizer.learning_rate

@lr.setter
def lr(self, lr):
self._lr = lr
@learning_rate.setter
def learning_rate(self, learning_rate):
self.optimizer.learning_rate = learning_rate

@symbolic
def get_updates(self, loss, params):
# Create accumulated gradients
grads = self.get_gradients(loss, params)
if K.backend() == 'tensorflow':
from tensorflow.python.framework import ops
from tensorflow.python import state_ops
self.updates = []
with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]):
update_cond = K.equal(self.iterations % self.accumulation_steps, 0)
sub_step = (self.iterations - 1) % self.accumulation_steps + 1
fake_iterations = (self.iterations - 1) // self.accumulation_steps + 1
else:
self.updates = [K.update_add(self.iterations, 1)]
self.updates = []
with tf.control_dependencies([self.iterations.assign_add(1)]):
update_cond = K.equal(self.iterations % self.accumulation_steps, 0)
sub_step = (self.iterations - 1) % self.accumulation_steps + 1
fake_iterations = (self.iterations - 1) // self.accumulation_steps + 1
fake_iterations = K.maximum(fake_iterations, 1)
acc_grads = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
for grad, acc_grad in zip(grads, acc_grads):
ave_grad = grad / K.cast(self.accumulation_steps, K.floatx())
Expand Down Expand Up @@ -85,7 +92,7 @@ def get_updates(self, loss, params):
self.optimizer.iterations = fake_iterations

# Use fake learning rate
self.optimizer.lr = K.switch(update_cond, self.lr, 0.0)
self.optimizer.learning_rate = K.switch(update_cond, self.lr, 0.0)

# Freeze momentum
momentum = {}
Expand All @@ -101,7 +108,7 @@ def get_updates(self, loss, params):
# Restore variables
for name, value in momentum.items():
setattr(self.optimizer, name, value)
self.optimizer.lr = self._lr
self.optimizer.learning_rate = self._lr
self.optimizer.iterations = original_iterations
if TF_KERAS:
from tensorflow.python import state_ops
Expand Down
31 changes: 23 additions & 8 deletions setup.py
@@ -1,28 +1,43 @@
import os
import re
import codecs
from setuptools import setup, find_packages

with codecs.open('README.md', 'r', 'utf8') as reader:
long_description = reader.read()
current_path = os.path.abspath(os.path.dirname(__file__))


with codecs.open('requirements.txt', 'r', 'utf8') as reader:
install_requires = list(map(lambda x: x.strip(), reader.readlines()))
def read_file(*parts):
with codecs.open(os.path.join(current_path, *parts), 'r', 'utf8') as reader:
return reader.read()


def get_requirements(*parts):
with codecs.open(os.path.join(current_path, *parts), 'r', 'utf8') as reader:
return list(map(lambda x: x.strip(), reader.readlines()))


def find_version(*file_paths):
version_file = read_file(*file_paths)
version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M)
if version_match:
return version_match.group(1)
raise RuntimeError('Unable to find version string.')


setup(
name='keras-gradient-accumulation',
name=find_version('keras_gradient_accumulation', '__init__.py'),
version='0.0.5',
packages=find_packages(),
url='https://github.com/CyberZHG/keras-gradient-accumulation',
license='MIT',
author='CyberZHG',
author_email='CyberZHG@users.noreply.github.com',
description='Gradient accumulation for Keras',
long_description=long_description,
long_description=read_file('README.md'),
long_description_content_type='text/markdown',
install_requires=install_requires,
install_requires=get_requirements('requirements.txt'),
classifiers=(
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
),
Expand Down
37 changes: 12 additions & 25 deletions tests/test_optimizers.py
Expand Up @@ -3,7 +3,6 @@
from unittest import TestCase

import numpy as np
from tensorflow.python.keras import optimizers

from keras_gradient_accumulation.backend import keras, TF_KERAS
from keras_gradient_accumulation.backend import backend as K
Expand Down Expand Up @@ -67,40 +66,28 @@ def _test_accumulation(self, optimizer, acc_optimizer=None, **kwargs):
self.assertTrue(np.allclose(actual, expected, atol=0.1), (actual, expected))

def test_update_lr(self):
optimizer = GradientAccumulation(keras.optimizers.SGD(), 128)
optimizer.lr = K.variable(K.get_value(optimizer.lr) * 0.5)
if TF_KERAS:
return
optimizer = GradientAccumulation('sgd', 128)
optimizer.learning_rate = K.get_value(optimizer.learning_rate) * 0.5

def test_sgd(self):
if TF_KERAS:
optimizer = optimizers.SGD()
else:
optimizer = 'sgd'
self._test_accumulation(optimizer)
return
self._test_accumulation('sgd')

def test_rmsprop(self):
if TF_KERAS:
optimizer = optimizers.RMSprop()
else:
optimizer = 'rmsprop'
self._test_accumulation(optimizer)
return
self._test_accumulation('rmsprop')

def test_adam(self):
if TF_KERAS:
optimizer = optimizers.Adam()
else:
optimizer = 'adam'
self._test_accumulation(optimizer)
return
self._test_accumulation('adam')

def test_adam_acc(self):
if TF_KERAS:
optimizer = optimizers.Adam()
else:
optimizer = 'adam'
self._test_accumulation(optimizer, AdamAccumulated(128, decay=1e-3), amsgrad=False, decay=1e-3)
self._test_accumulation('adam', AdamAccumulated(128, decay=1e-3), amsgrad=False, decay=1e-3)

def test_adam_acc_amsgrad(self):
if TF_KERAS:
optimizer = optimizers.Adam()
else:
optimizer = 'adam'
self._test_accumulation(optimizer, AdamAccumulated(128, amsgrad=True, decay=1e-4), amsgrad=True, decay=1e-4)
self._test_accumulation('adam', AdamAccumulated(128, amsgrad=True, decay=1e-4), amsgrad=True, decay=1e-4)

0 comments on commit 453d1f0

Please sign in to comment.