In [1]:
import numpy as np
from keras_efficientmixnets import EfficientNetB2

Using TensorFlow backend.


In [2]:
eff = EfficientNetB2(mixed=True, activation='swish', typeBN="an", n_mixture=6, weights=None, pooling='avg', classes=2)

Instructions for updating:
Shapes are always computed; don't use the compute_shapes as it has no effect.


In [3]:
a = eff.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 260, 260, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 130, 130, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
attentive_normalization_1 (Atte (None, 130, 130, 32) 646         conv2d_1[0][0]                   
__________________________________________________________________________________________________
swish_1 (Swish)                 (None, 130, 130, 32) 0           attentive_normalization_1[0][0]  
__________________________________________________________________________________________________
depthwise_

In [4]:
x = np.array([np.ones((260, 260, 3)) for _ in range(5)] + [np.zeros((260, 260, 3)) for _ in range(5)])

In [5]:
y = np.array([[1., 0.] for _ in range(5)] + [[0., 1.] for _ in range(5)])

In [6]:
eff.compile(loss='binary_crossentropy', optimizer="adam")

In [7]:
eff.fit(x, y, epochs=10)

Epoch 1/10


ResourceExhaustedError: OOM when allocating tensor with shape[10,130,130,96] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node training/Adam/gradients/zeros_125-0-1-TransposeNCHWToNHWC-LayoutOptimizer}} = Transpose[T=DT_FLOAT, Tperm=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](training/Adam/gradients/zeros_125, PermConstNCHWToNHWC-LayoutOptimizer)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[{{node loss/mul/_5251}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_42883_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


In [None]:
y_pred = eff.predict(x)

In [None]:
np.sum(np.argmax(y_pred, axis=1) == np.argmax(y, axis=1)) / len(y_pred)

In [None]:
import keras.backend as K
from keras import initializers, layers


class ANInitializer(initializers.Initializer):
    """Initialization for gamma and beta weights according to BigGan paper 
    (A. Brock, J. Donahue, and K. Simonyan. Large scale gan
    training for high fidelity natural image synthesis. arXiv
    preprint arXiv:1809.11096, 2018.)
    
        This initialization is equal to :  scale * N(0, 1) + bias
         
        # Arguments:
          scale: rescaling factor
          bias: bias factor
          shape: shape of variable
          dtype: dtype of variable
          seed: random seed for reprocudibility
        # Returns:
          an initialization for the variable
          
    """
    def __init__(self, scale=0.1, bias=0., seed=1997):
        super(ANInitializer, self).__init__()
        self.scale = scale
        self.bias = bias
        self.seed = seed

    def __call__(self, shape, dtype=None):
        dtype = dtype or K.floatx()
        return self.scale * K.random_normal(shape=shape, mean=0.0, stddev=1., seed=self.seed) + self.bias




class AttentiveNormalization(layers.BatchNormalization):
    
    def __init__(self, n_mixture=5, momentum=0.99, epsilon=0.1, axis=-1, **kwargs):
        super(AttentiveNormalization, self).__init__(momentum=momentum, epsilon=epsilon, axis=axis, center=False, scale=False, **kwargs)

        if self.axis == -1:
            self.data_format = 'channels_last'
        else:
            self.data_format = 'channel_first'
            
        self.n_mixture = n_mixture
        
    def build(self, input_shape):
        if len(input_shape) != 4:
            raise ValueError('expected 4D input (got {}D input)'.format(input_shape))
            
        super(AttentiveNormalization, self).build(input_shape)
        
        dim = input_shape[self.axis]
        shape = (self.n_mixture, dim) # K x C 
        
        self.FC = layers.Dense(self.n_mixture, activation="sigmoid")
        self.FC.build(input_shape) # (N, C)
        
        self.GlobalAvgPooling = layers.GlobalAveragePooling2D(self.data_format)
        self.GlobalAvgPooling.build(input_shape)
        
        self._trainable_weights = self.FC.trainable_weights
        
        self.learnable_weights = self.add_weight(name='gamma2', 
                                      shape=shape,
                                      initializer=ANInitializer(scale=0.1, bias=1.),
                                      trainable=True)

        self.learnable_bias = self.add_weight(name='bias2', 
                                    shape=shape,
                                    initializer=ANInitializer(scale=0.1, bias=0.),
                                    trainable=True)
        

    def call(self, inputs):
        # input is a batch of shape : (N, H, W, C)
        avg = self.GlobalAvgPooling(inputs) # N x C 
        attention = self.FC(avg) # N x K 
        gamma_readjust = K.dot(attention, self.learnable_weights) # N x C
        beta_readjust  = K.dot(attention, self.learnable_bias)  # N x C
        
        out_BN = super(AttentiveNormalization, self).call(inputs) # rescale input, N x H x W x C

        # broadcast if needed
        if K.int_shape(inputs)[0] is None or K.int_shape(inputs)[0] > 1:
            gamma_readjust = gamma_readjust[:, None, None, :]
            beta_readjust  = beta_readjust[:, None, None, :]

        return gamma_readjust * out_BN + beta_readjust

    def compute_output_shape(self, input_shape):
        return input_shape

    def get_config(self):
        config = {
            'n_mixture' : self.n_mixture
        }
        base_config = super(AttentiveNormalization, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))




class BatchAttNorm(layers.BatchNormalization):
    def __init__(self, momentum=0.99, epsilon=0.001, axis=-1, **kwargs):
        super(BatchAttNorm, self).__init__(momentum=momentum, epsilon=epsilon, axis=axis, center=False, scale=False, **kwargs)
        
        if self.axis == -1:
            self.data_format = 'channels_last'
        else:
            self.data_format = 'channel_first'
        
    def build(self, input_shape):
        if len(input_shape) != 4:
            raise ValueError('expected 4D input (got {}D input)'.format(len(input_shape)))
                
        super(BatchAttNorm, self).build(input_shape)   
        
        dim = input_shape[self.axis]
        shape = (dim, )
        
        self.GlobalAvgPooling = layers.GlobalAveragePooling2D(self.data_format)
        self.GlobalAvgPooling.build(input_shape)
    
        self.weight = self.add_weight(name='weight', 
                                      shape=shape,
                                      initializer=initializers.Constant(1),
                                      trainable=True)

        self.bias = self.add_weight(name='bias', 
                                    shape=shape,
                                    initializer=initializers.Constant(0),
                                    trainable=True)

        self.weight_readjust = self.add_weight(name='weight_readjust', 
                                               shape=shape,
                                               initializer=initializers.Constant(0),
                                               trainable=True)
        
        self.bias_readjust = self.add_weight(name='bias_readjust', 
                                             shape=shape,
                                             initializer=initializers.Constant(-1),
                                             trainable=True)
        

    def call(self, inputs):
        avg = self.GlobalAvgPooling(inputs) 
        attention = K.sigmoid(avg * self.weight_readjust + self.bias_readjust)

        bn_weights = self.weight * attention          
        
        out_bn = super(BatchAttNorm, self).call(inputs)
        
        if K.int_shape(inputs)[0] is None or K.int_shape(inputs)[0] > 1:
            bn_weights = bn_weights[:, None, None, :]
            self.bias  = self.bias[None, None, None, :]
 
        return out_bn * bn_weights + self.bias

    def compute_output_shape(self, input_shape):
        return input_shape


In [None]:
X = K.variable(np.random.rand(1, 299, 299, 5))

In [None]:
a = AttentiveNormalization()

In [None]:
K.eval(a(X))

In [None]:
b = BatchAttNorm()

In [None]:
K.eval(b(X))

In [None]:
import os

In [None]:
p = Path("outputs", "logs_", "logs_2019-10-08 14:40:34", "model_instantiation.log")

In [None]:
open(p)

In [None]:
os.path.join("outputs", "logs_", "logs_2019-10-08 14:40:34", "model_instantiation.log")

In [None]:
open('outputs/logs_/logs_2019-10-08 14:40:34/model_instantiation.log')

In [None]:
import datetime

In [None]:
str(datetime.datetime.now()).split(".")[0][:-3].replace(":", "h")