In [2]:
#======================================================================
#======================================================================
#                  MNIST
#======================================================================
#======================================================================

from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

num_classes = 10

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train_mnist, y_train_mnist), (x_test_mnist, y_test_mnist) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train_mnist = x_train_mnist.reshape(x_train_mnist.shape[0], 1, img_rows, img_cols)
    x_test_mnist = x_test_mnist.reshape(x_test_mnist.shape[0], 1, img_rows, img_cols)
    input_shape_mnist = (1, img_rows, img_cols)
else:
    x_train_mnist = x_train_mnist.reshape(x_train_mnist.shape[0], img_rows, img_cols, 1)
    x_test_mnist = x_test_mnist.reshape(x_test_mnist.shape[0], img_rows, img_cols, 1)
    input_shape_mnist = (img_rows, img_cols, 1)

x_train_mnist = x_train_mnist.astype('float32')
x_test_mnist = x_test_mnist.astype('float32')
x_train_mnist /= 255
x_test_mnist /= 255
print('x_train shape:', x_train_mnist.shape)
print(x_train_mnist.shape[0], 'train samples')
print(x_test_mnist.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train_mnist = keras.utils.to_categorical(y_train_mnist, num_classes)
y_test_mnist = keras.utils.to_categorical(y_test_mnist, num_classes)

Using TensorFlow backend.
DEBUG:matplotlib:$HOME=/home/jan
DEBUG:matplotlib:matplotlib data path /usr/lib/python2.7/site-packages/matplotlib/mpl-data
DEBUG:matplotlib:loaded rc file /usr/lib/python2.7/site-packages/matplotlib/mpl-data/matplotlibrc
DEBUG:matplotlib:matplotlib version 2.2.3
DEBUG:matplotlib:interactive is False
DEBUG:matplotlib:platform is linux2


x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [3]:
#======================================================================
#======================================================================
#                  CIFAR 10
#======================================================================
#======================================================================

import tensorflow
import numpy as np
from keras import datasets

def load_cifar10_data():
    cifar10 = datasets.cifar10
    (x_train_cifar, y_train_cifar),(x_test_cifar, y_test_cifar) = cifar10.load_data()
    x_train_cifar = x_train_cifar.astype('float32') / 255.0
    x_test_cifar = x_test_cifar.astype('float32') / 255.0

    return (x_train_cifar, y_train_cifar), (x_test_cifar, y_test_cifar)

input_shape_cifar = (32, 32, 3)
train_cifar, test_cifar = load_cifar10_data()
x_train_cifar = train_cifar[0]
x_test_cifar = test_cifar[0]

y_train_cifar = keras.utils.to_categorical(train_cifar[1], num_classes)
y_test_cifar = keras.utils.to_categorical(test_cifar[1], num_classes)

In [4]:
from keras import backend as K
from keras.utils.generic_utils import serialize_keras_object
from keras.utils.generic_utils import deserialize_keras_object
from keras.legacy import interfaces
from keras.optimizers import Optimizer

def buildMnist():
  model = Sequential()
  model.add(Conv2D(32, kernel_size=(3, 3),
                   activation='relu',
                   input_shape=input_shape_mnist, padding='same'))
  model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Dropout(0.25))
  model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Flatten())
  model.add(Dense(128, activation='relu'))
  model.add(Dropout(0.5))
  model.add(Dense(num_classes, activation='softmax'))

  model.compile(loss=keras.losses.categorical_crossentropy,
                optimizer=keras.optimizers.SGD(),
                metrics=['accuracy'])
  return model

In [5]:
from keras import backend as K
from keras.utils.generic_utils import serialize_keras_object
from keras.utils.generic_utils import deserialize_keras_object
from keras.legacy import interfaces
from keras.optimizers import Optimizer
from keras.layers import Dense, Dropout, Activation, Flatten

def buildCifar():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), padding='same',
                     input_shape=x_train_cifar.shape[1:]))
    model.add(Activation('relu'))
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='same'))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    model.compile(loss=keras.losses.categorical_crossentropy,
                optimizer=keras.optimizers.SGD(),
                metrics=['accuracy'])
    
    return model

In [6]:
import logging
import numpy as np
logging.basicConfig(level=logging.DEBUG)

class OSGDTrainer():

    def __init__(self, model):
        self.model = model
        self.logger = logging.getLogger("OSGDTrainer")
    
        layer=-1
        grads = model.optimizer.get_gradients(model.total_loss, model.layers[layer].output)
        symb_inputs = (model._feed_inputs + model._feed_targets + model._feed_sample_weights)
        self.f_grads = K.function(symb_inputs, grads)
    
    def _get_grads(self, xs, ys):
        x, y, sample_weight = self.model._standardize_user_data(xs, ys)
        output_grad = self.f_grads(x + y + sample_weight)
        return output_grad

    def _get_distribution(self, xs, ys):
        n = xs.shape[0]
        k = 1000
        p = np.zeros(xs.shape[0])
        for i in range((n-1) // k + 1):
            fst = i*k
            snd = min(n, (i+1) * k)
            grads = self._get_grads(xs[fst:snd], ys[fst:snd])[0]
            v = np.linalg.norm(grads, axis=1)
            p[fst:snd] = v    
        return p / p.sum()

    def _test_on_validation(self, validation_data):
        valid_x, valid_y = validation_data
        loss, acc = self.model.test_on_batch(valid_x, valid_y)
        self.logger.info("========== Validation loss: " + str(loss) + " acc: " + str(acc))
        return [(loss, acc)]
      
    def train(self, train_x, train_y, batch_size=16, epochs=1, validation_data=None, update_distributions=False, distrib_update_every=1):
        res = []
        N = train_x.shape[0]
        steps_per_epoch = N // batch_size
        indicies = []
        for e_n in range(1, epochs+1):
          self.logger.info("==== Epoch " + str(e_n) + " started")
          for b_n in range(1, steps_per_epoch+1):
            if update_distributions and (b_n == 1 or b_n % distrib_update_every == 0):
              p = self._get_distribution(train_x, train_y)
              indicies = np.random.choice(N, batch_size, p=p)
            else:
              indicies = np.random.choice(N, batch_size)
            batch_x = np.take(train_x, indicies, axis=0)
            batch_y = np.take(train_y, indicies, axis=0)
            loss, acc = self.model.train_on_batch(batch_x, batch_y)
            #res += [(loss, acc)]
            if b_n % 10 == 0:
              self.logger.info(str(b_n) + "/" + str(steps_per_epoch) + " loss: " + str(loss) + " acc: " + str(acc))
              if validation_data is not None:
                res += self._test_on_validation(validation_data)
              
            
          self.logger.info(str(b_n) + "/" + str(steps_per_epoch) + " loss: " + str(loss) + " acc: " + str(acc))
         
          if validation_data is not None:
            self._test_on_validation(validation_data)
            
        return res

In [22]:
rais = OSGDTrainer(buildMnist())
res_sgd = rais.train(x_train_mnist, y_train_mnist,
                     validation_data=(x_test_mnist, y_test_mnist),
                     epochs=1, batch_size=128)

INFO:RAISTrainer:==== Epoch 1 started
INFO:RAISTrainer:10/468 loss: 2.2901115 acc: 0.15625
INFO:RAISTrainer:20/468 loss: 2.2738469 acc: 0.1953125
INFO:RAISTrainer:30/468 loss: 2.263488 acc: 0.21875
INFO:RAISTrainer:40/468 loss: 2.24787 acc: 0.1953125
INFO:RAISTrainer:50/468 loss: 2.2312279 acc: 0.265625
INFO:RAISTrainer:60/468 loss: 2.228036 acc: 0.265625
INFO:RAISTrainer:70/468 loss: 2.1669893 acc: 0.359375
INFO:RAISTrainer:80/468 loss: 2.1240468 acc: 0.3984375
INFO:RAISTrainer:90/468 loss: 2.1076782 acc: 0.3359375
INFO:RAISTrainer:100/468 loss: 2.0366845 acc: 0.46875
INFO:RAISTrainer:110/468 loss: 1.9870081 acc: 0.3984375
INFO:RAISTrainer:120/468 loss: 1.8370723 acc: 0.5
INFO:RAISTrainer:130/468 loss: 1.6971197 acc: 0.5234375
INFO:RAISTrainer:140/468 loss: 1.5350161 acc: 0.515625
INFO:RAISTrainer:150/468 loss: 1.3071474 acc: 0.6484375
INFO:RAISTrainer:160/468 loss: 1.2123736 acc: 0.671875
INFO:RAISTrainer:170/468 loss: 1.1926723 acc: 0.625
INFO:RAISTrainer:180/468 loss: 1.2217171 acc

In [23]:
rais = OSGDTrainer(buildMnist())
res_rais = rais.train(x_train_mnist, y_train_mnist,
                      validation_data=(x_test_mnist, y_test_mnist),
                      update_distributions=True, 
                      epochs=1, batch_size=128)

INFO:RAISTrainer:==== Epoch 1 started
INFO:RAISTrainer:10/468 loss: 2.2874026 acc: 0.1171875
INFO:RAISTrainer:20/468 loss: 2.2956975 acc: 0.1171875
INFO:RAISTrainer:30/468 loss: 2.2703202 acc: 0.1640625
INFO:RAISTrainer:40/468 loss: 2.2496877 acc: 0.1875
INFO:RAISTrainer:50/468 loss: 2.2480154 acc: 0.1875
INFO:RAISTrainer:60/468 loss: 2.2364697 acc: 0.2421875
INFO:RAISTrainer:70/468 loss: 2.2271852 acc: 0.2578125
INFO:RAISTrainer:80/468 loss: 2.2153335 acc: 0.2265625
INFO:RAISTrainer:90/468 loss: 2.1463177 acc: 0.3828125
INFO:RAISTrainer:100/468 loss: 2.1141958 acc: 0.34375
INFO:RAISTrainer:110/468 loss: 2.081684 acc: 0.359375
INFO:RAISTrainer:120/468 loss: 2.0190217 acc: 0.453125
INFO:RAISTrainer:130/468 loss: 1.9460832 acc: 0.453125
INFO:RAISTrainer:140/468 loss: 1.8108435 acc: 0.5078125
INFO:RAISTrainer:150/468 loss: 1.747921 acc: 0.4453125
INFO:RAISTrainer:160/468 loss: 1.7660316 acc: 0.3671875
INFO:RAISTrainer:170/468 loss: 1.4804865 acc: 0.578125
INFO:RAISTrainer:180/468 loss: 1.

In [8]:
rais = OSGDTrainer(buildCifar())
res_sgd = rais.train(x_train_cifar, y_train_cifar,
                      validation_data=(x_test_cifar, y_test_cifar),
                      epochs=2, batch_size=256)

INFO:OSGDTrainer:==== Epoch 1 started
INFO:OSGDTrainer:10/195 loss: 2.3061678 acc: 0.09765625
INFO:OSGDTrainer:20/195 loss: 2.3072603 acc: 0.1015625
INFO:OSGDTrainer:30/195 loss: 2.3015347 acc: 0.109375
INFO:OSGDTrainer:40/195 loss: 2.3090572 acc: 0.09375
INFO:OSGDTrainer:50/195 loss: 2.3055403 acc: 0.1171875
INFO:OSGDTrainer:60/195 loss: 2.293264 acc: 0.12109375
INFO:OSGDTrainer:70/195 loss: 2.2954168 acc: 0.10546875
INFO:OSGDTrainer:80/195 loss: 2.302365 acc: 0.1015625
INFO:OSGDTrainer:90/195 loss: 2.2917593 acc: 0.12109375
INFO:OSGDTrainer:100/195 loss: 2.2959588 acc: 0.08984375
INFO:OSGDTrainer:110/195 loss: 2.2962341 acc: 0.1171875
INFO:OSGDTrainer:120/195 loss: 2.2979002 acc: 0.10546875
INFO:OSGDTrainer:130/195 loss: 2.277919 acc: 0.15625
INFO:OSGDTrainer:140/195 loss: 2.2843597 acc: 0.12890625
INFO:OSGDTrainer:150/195 loss: 2.2853818 acc: 0.1171875
INFO:OSGDTrainer:160/195 loss: 2.280778 acc: 0.1484375
INFO:OSGDTrainer:170/195 loss: 2.2971249 acc: 0.11328125
INFO:OSGDTrainer:180

In [6]:
rais = OSGDTrainer(buildCifar())
res_sgd = rais.train(x_train_cifar, y_train_cifar,
                     validation_data=(x_test_cifar, y_test_cifar),
                     update_distributions=True, 
                     epochs=2, batch_size=256)

INFO:RAISTrainer:==== Epoch 1 started
INFO:RAISTrainer:10/195 loss: 2.2937965 acc: 0.12109375
INFO:RAISTrainer:20/195 loss: 2.3033197 acc: 0.109375
INFO:RAISTrainer:30/195 loss: 2.3042238 acc: 0.1015625
INFO:RAISTrainer:40/195 loss: 2.2989793 acc: 0.11328125
INFO:RAISTrainer:50/195 loss: 2.2996209 acc: 0.09375
INFO:RAISTrainer:60/195 loss: 2.313193 acc: 0.12890625
INFO:RAISTrainer:70/195 loss: 2.2980049 acc: 0.12890625
INFO:RAISTrainer:80/195 loss: 2.3114603 acc: 0.08984375
INFO:RAISTrainer:90/195 loss: 2.3006597 acc: 0.12109375
INFO:RAISTrainer:100/195 loss: 2.295659 acc: 0.1171875
INFO:RAISTrainer:110/195 loss: 2.305141 acc: 0.09765625
INFO:RAISTrainer:120/195 loss: 2.295997 acc: 0.1015625
INFO:RAISTrainer:130/195 loss: 2.3058844 acc: 0.109375
INFO:RAISTrainer:140/195 loss: 2.2986233 acc: 0.11328125
INFO:RAISTrainer:150/195 loss: 2.2978609 acc: 0.1171875
INFO:RAISTrainer:160/195 loss: 2.295796 acc: 0.1171875
INFO:RAISTrainer:170/195 loss: 2.2946677 acc: 0.12109375
INFO:RAISTrainer:18

In [10]:
rais = OSGDTrainer(buildCifar())
res_sgd = rais.train(x_train_cifar, y_train_cifar,
                     validation_data=(x_test_cifar, y_test_cifar),
                     update_distributions=True,
                     distrib_update_every=10,
                     epochs=2, batch_size=256)

INFO:OSGDTrainer:==== Epoch 1 started
INFO:OSGDTrainer:10/195 loss: 2.3197238 acc: 0.09375
INFO:OSGDTrainer:20/195 loss: 2.3056889 acc: 0.0703125
INFO:OSGDTrainer:30/195 loss: 2.2938235 acc: 0.1171875
INFO:OSGDTrainer:40/195 loss: 2.3046622 acc: 0.07421875
INFO:OSGDTrainer:50/195 loss: 2.2998488 acc: 0.0703125
INFO:OSGDTrainer:60/195 loss: 2.2998717 acc: 0.09765625
INFO:OSGDTrainer:70/195 loss: 2.2989705 acc: 0.08984375
INFO:OSGDTrainer:80/195 loss: 2.302354 acc: 0.1015625
INFO:OSGDTrainer:90/195 loss: 2.2939446 acc: 0.11328125
INFO:OSGDTrainer:100/195 loss: 2.2895403 acc: 0.1328125
INFO:OSGDTrainer:110/195 loss: 2.2941947 acc: 0.1171875
INFO:OSGDTrainer:120/195 loss: 2.2897534 acc: 0.125
INFO:OSGDTrainer:130/195 loss: 2.2915628 acc: 0.08203125
INFO:OSGDTrainer:140/195 loss: 2.2900586 acc: 0.11328125
INFO:OSGDTrainer:150/195 loss: 2.2934353 acc: 0.0859375
INFO:OSGDTrainer:160/195 loss: 2.2780352 acc: 0.13671875
INFO:OSGDTrainer:170/195 loss: 2.2858396 acc: 0.09765625
INFO:OSGDTrainer:1

In [None]:
import logging
import numpy as np
logging.basicConfig(level=logging.DEBUG)

class RAISTrainer():

    def __init__(self, model):
        self.model = model
        self.logger = logging.getLogger("RAISTrainer")
    
        layer=-1
        grads = model.optimizer.get_gradients(model.total_loss, model.layers[layer].output)
        symb_inputs = (model._feed_inputs + model._feed_targets + model._feed_sample_weights)
        self.f_grads = K.function(symb_inputs, grads)
    
    def _get_grads(self, xs, ys):
        x, y, sample_weight = self.model._standardize_user_data(xs, ys)
        output_grad = self.f_grads(x + y + sample_weight)
        return output_grad

    def _get_distribution(self, xs, ys):
        n = xs.shape[0]
        k = 1000
        p = np.zeros(xs.shape[0])
        for i in range((n-1) // k + 1):
            fst = i*k
            snd = min(n, (i+1) * k)
            grads = self._get_grads(xs[fst:snd], ys[fst:snd])[0]
            v = np.linalg.norm(grads, axis=1)
            p[fst:snd] = v    
        return p / p.sum()

    def _test_on_validation(self, validation_data):
        valid_x, valid_y = validation_data
        loss, acc = self.model.test_on_batch(valid_x, valid_y)
        self.logger.info("========== Validation loss: " + str(loss) + " acc: " + str(acc))
        return [(loss, acc)]
      
    def train(self, train_x, train_y, batch_size=16, epochs=1, validation_data=None, update_distributions=False):
        res = []
        N = train_x.shape[0]
        steps_per_epoch = N // batch_size
        for e_n in range(1, epochs+1):
          self.logger.info("==== Epoch " + str(e_n) + " started")
          for b_n in range(1, steps_per_epoch+1):
            if update_distributions:
              p = self._get_distribution(train_x, train_y)
              indicies = np.random.choice(N, batch_size, p=p)
            else:
              indicies = np.random.choice(N, batch_size)
            batch_x = np.take(train_x, indicies, axis=0)
            batch_y = np.take(train_y, indicies, axis=0)
            loss, acc = self.model.train_on_batch(batch_x, batch_y)
            #res += [(loss, acc)]
            if b_n % 10 == 0:
              self.logger.info(str(b_n) + "/" + str(steps_per_epoch) + " loss: " + str(loss) + " acc: " + str(acc))
              if validation_data is not None:
                res += self._test_on_validation(validation_data)
              
          
          self.logger.info(str(b_n) + "/" + str(steps_per_epoch) + " loss: " + str(loss) + " acc: " + str(acc))
         
          if validation_data is not None:
            self._test_on_validation(validation_data)
            
        return res