In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
tf.keras.backend.clear_session() # For easy reset of notebook state.
from tensorflow import keras

In [2]:
from tensorflow.keras import layers

In [3]:
class Linear(layers.Layer):
    
    def __init__(self, units=32, input_dim = 32):
        super(Linear, self).__init__()
        w_init = tf.random_normal_initializer()
        self.w = tf.Variable(initial_value = w_init(shape=(input_dim, units),
                                                    dtype = 'float32'),
                            trainable = True)
        b_init = tf.zeros_initializer()
        self.b = tf.Variable(initial_value = b_init(shape=(units,),
                                                   dtype='float32'),
                            trainable=True)
    
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

x = tf.ones((2,2))
linear_layer = Linear(4,2)
y = linear_layer(x)
print(y)

tf.Tensor(
[[ 0.05653545  0.03444588 -0.01249578 -0.00676338]
 [ 0.05653545  0.03444588 -0.01249578 -0.00676338]], shape=(2, 4), dtype=float32)


In [4]:
assert linear_layer.weights == [linear_layer.w, linear_layer.b]

In [5]:
class Linear(layers.Layer):
    
    def __init__(self, units = 32, input_dim = 32):
        super(Linear, self).__init__()
        self.w = self.add_weight(shape = (input_dim, units),
                                initializer = 'random_normal',
                                trainable = True)
        self.b = self.add_weight(shape = (units, ),
                                initializer = 'zeros',
                                trainable = True)
        
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

x = tf.ones((2, 2))
linear_layer = Linear(4, 2)
y = linear_layer(x)
print(y)

tf.Tensor(
[[-0.04208444 -0.11881118  0.03900589 -0.05673023]
 [-0.04208444 -0.11881118  0.03900589 -0.05673023]], shape=(2, 4), dtype=float32)


In [6]:
class ComputeSum(layers.Layer):
    
    def __init__(self, input_dim):
        super(ComputeSum, self).__init__()
        self.total = tf.Variable(initial_value = tf.zeros((input_dim, )),
                                trainable = False)
    
    def call(self, inputs):
        self.total.assign_add(tf.reduce_sum(inputs, axis = 0))
        return self.total

x = tf.ones((2, 2))
my_sum = ComputeSum(2)
y = my_sum(x)
print(y.numpy())
y = my_sum(x)
print(y.numpy())

[2. 2.]
[4. 4.]


In [7]:
print('weights:',len(my_sum.weights))
print('non-trainable weights:', len(my_sum.non_trainable_weights))

#It's not included in the trainable weights:
print('trainable_weights:', my_sum.trainable_weights)

weights: 1
non-trainable weights: 1
trainable_weights: []


In [8]:
class Linear(layers.Layer):
    
    def __init__(self, units = 32, input_dim = 32):
        super(Linear, self).__init__()
        self.w = self.add_weight(shape = (input_dim, units),
                                initializer = 'random_normal',
                                trainable = True)
        self.b = self.add_weight(shape = (units, ),
                                 initializer = 'zeros',
                                 trainable = True)

In [15]:
class Linear(layers.Layer):
    
    def __init__(self, units = 32):
        super(Linear, self).__init__()
        self.units = units
    
    def build(self, input_shape):
        self.w = self.add_weight(shape = (input_shape[-1], self.units),
                                initializer = 'random_normal',
                                trainable = True)
        self.b = self.add_weight(shape = (self.units, ),
                                initializer = 'random_normal',
                                trainable = True)
        
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

In [16]:
linear_layer = Linear(32) 
# At instantiation, we don't know on what inputs this going to get called

y = linear_layer(x)
# The layer's weights are created dynamically the first time the layer is called

In [17]:
# Let's assume we are reusing the Linear class
# with a 'build' method that we defined above.

class MLPBlock(layers.Layer):
    
    def __init__(self):
        super(MLPBlock, self).__init__()
        self.linear_1 = Linear(32)
        self.linear_2 = Linear(32)
        self.linear_3 = Linear(1)
        
    def call(self, inputs):
        x = self.linear_1(inputs)
        x = tf.nn.relu(x)
        x = self.linear_2(x)
        x = tf.nn.relu(x)
        return self.linear_3(x)
    
mlp = MLPBlock()
y = mlp(tf.ones(shape = (3, 64))) # The first call to the 'mlp' will create the weights
print('weights:', len(mlp.weights))
print('trainable weights:', len(mlp.trainable_weights))

weights: 6
trainable weights: 6


In [18]:
# A layer that creates an activity regularization loss
class ActivityRegularizationLayer(layers.Layer):
    
    def __init__(self, rate = 1e-2):
        super(ActivityRegularizationLayer, self).__init__()
        self.rate = rate
        
    def call(self, inputs):
        self.add_loss(self.rate * tf.reduce_sum(inputs))
        return inputs

In [19]:
class OuterLayer(layers.Layer):
    
    def __init__(self):
        super(OuterLayer, self).__init__()
        self.activity_reg = ActivityRegularizationLayer(1e-2)
        
    def call(self, inputs):
        return self.activity_reg(inputs)
    
layer = OuterLayer()
assert len(layer.losses) == 0 # No losses yet since the layer has never been called
_ = layer(tf.zeros(1,1))
assert len(layer.losses) == 1 # We created one loss value

# 'layer.losses' gets reset at the start of each __call__
_ = layer(tf.zeros(1,1))
assert len(layer.losses) == 1 # This is the loss created during the call above

In [22]:
class OuterLayer(layers.Layer):
    
    def __init__(self):
        super(OuterLayer, self).__init__()
        self.dense = layers.Dense(32, kernel_regularizer = tf.keras.regularizers.l2(1e-3))
        
    def call(self, inputs):
        return self.dense(inputs)
    
layer = OuterLayer()
_ = layer(tf.zeros((1,1)))

# This is '1e-3 * sum(layer.dense.kernel ** 2)',
# created by the 'kernel_regularizer' above.
print(layer.losses)

[<tf.Tensor: id=323, shape=(), dtype=float32, numpy=0.0022824414>]


In [25]:
# Instantiate an optimizer.
optimizer = tf.keras.optimizers.SGD(learning_rate = 1e-3)
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits = True)

# Iterate over the batches of a dataset.
for x_batch_train, y_batch_train in train_dataset:
    with tf.GradientTape() as tape:
        logits = layer(x_batch_train) # Logits for this minibatch
        # Loss value for this minibatch
        loss_value = loss_fn(y_batch_train, logits)
        # Add extra losses created during this forward pass:
        loss_value += sum(model.losses)
        
    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))

NameError: name 'train_dataset' is not defined

In [26]:
class Linear(layers.Layer):
    
    def __init__(self, units = 32):
        super(Linear, self).__init__()
        self.units = units
        
    def build(self, input_shape):
        self.w = self.add_weight(shape = (input_shape[-1], self.units),
                                initializer = 'random_normal',
                                trainable = True)
        self.b = self.add_weight(shape = (self.units, ),
                                initializer = 'random_normal',
                                trainable = True)
        
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b
    
    def get_config(self):
        return {'units' : self.units}
    
# Now you can recreate the layer from its config:
layer = Linear(64)
config = layer.get_config()
print(config)
new_layer = Linear.from_config(config)

{'units': 64}


In [27]:
class Linear(layers.Layer):
    
    def __init__(self, units = 32, **kwargs):
        super(Linear, self).__init__(**kwargs)
        self.units = units
        
    def build(self, input_shape):
        self.w = self.add_weight(shape = (input_shape[-1], self.units),
                                initializer = 'random_normal',
                                trainable = True)
        self.b = self.add_weight(shape = (self.units, ),
                                initializer = 'random_normal',
                                trainable = True)
        
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b
    
    def get_config(self):
        config = super(Linear, self).get_config()
        config.update({'units' : self.units})
        return config
    
layer = Linear(64)
config = layer.get_config()
print(config)
new_layer = Linear.from_config(config)

{'name': 'linear_2', 'trainable': True, 'dtype': 'float32', 'units': 64}


In [28]:
def from_config(cls, config):
    return cls(**config)

In [29]:
class CustomDropout(layers.Layer):
    
    def __init__(self, rate, **kwargs):
        super(CustomDropout, self).__init__(**kwargs)
        self.rate = rate
        
    def call(self, inputs, training = None):
        if training:
            return tf.nn.dropout(inputs, rate = self.rate)
        return inputs

In [30]:
class ResNet(tf.keras.Model):
    
    def __init__(self):
        super(ResNet, self).__init__()
        self.block_1 = ResNetBlock()
        self.block_2 = ResNetBlock()
        self.global_pool = layers.GlobalAveragePooling2D()
        self.classifier = Dense(num_classes)
        
    def call(self, inputs):
        x = self.block_1(inputs)
        x = self.block_2(x)
        x = self.global_pool(x)
        return self.classifier(x)
    
resnet = ResNet()
dataset = ...
resnet.fit(dataset, epochs = 10)
resnet.save_weights(filepath)

NameError: name 'ResNetBlock' is not defined

In [31]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
    
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape = (batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon
    
class Encoder(layers.Layer):
    """Maps MNIST digits to a triplet (z_mean, z_log_var, z)."""
    
    def __init__(self,
                latent_dim = 32,
                 intermediate_dim = 64,
                 name = 'encoder',
                 **kwargs):
        super(Encoder, self).__init__(name=name, **kwargs)
        self.dense_proj = layers.Dense(intermediate_dim, activation = 'relu')
        self.dense_mean = layers.Dense(latent_dim)
        self.dense_log_var = layers.Dense(latent_dim)
        self.sampling = Sampling()
        
    def call(self, inputs):
        x = self.dense_proj(inputs)
        z_mean = self.dense_mean(x)
        z_log_var = self.dense_log_var(x)
        z = self.sampling((z_mean, z_log_var))
        return z_mean, z_log_var, z
    
class Decoder(layers.Layer):
    """Converts z, the encoded digit vector, back into a readable digit."""
    
    def __init__(self,
                original_dim,
                intermediate_dim = 64,
                name = 'decoder',
                **kwargs):
        super(VariationalAutoEncoder, self).__init__(name=name, **kwargs)
        self.original_dim = original_dim
        self.encoder = Encoder(latent_dim = latent_dim,
                               intermediate_dim = intermediate_dim)
        self.decoder = Decoder(original_dim, intermediate_dim = intermediate_dim)
        
    def call(self, inputs):
        z_mean, z_log_var, z = self.encoder(inputs)
        reconstructed = self.decoder(z)
        # Add KL divergence regularization loss.
        kl_loss = -0.5*tf.reduce_mean(
            z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1)
        self.add_loss(kl_loss)
        return reconstructed

In [None]:
original_dim = 784
vae = VariationalAutoEncoder(original_dim, 64, 32)

optimizer = tf.keras.optimizers.Adam(learning_rate = 1e-3)
mse_loss_fn = tf.keras.losses.MeanSquaredError()

loss_metric = tf.keras.metrics.Mean()

(x_train, _), _ = tf.keras.datasets.mnist.load_data()

In [3]:
x = tf.ones((2,2))

In [32]:
class Linear2(layers.Layer):

    def __init__(self, units=32):
        super(Linear2, self).__init__()
        self.units = units
        print("init")

    def call(self, defg):
        print("call")
        return tf.matmul(defg, self.w) + self.b

    def build(self, abc):
        self.w = self.add_weight(shape=(abc[-1], self.units),
                                 initializer='random_normal',
                                 trainable=True)
        self.b = self.add_weight(shape=(self.units,),
                                 initializer='random_normal',
                                 trainable=True)
        print("build")
        

In [33]:
linear_layer2 = Linear2(32)

init


In [34]:
y=linear_layer2(x)

build
call


In [31]:
linear_layer2(x)

call


<tf.Tensor: id=158, shape=(2, 32), dtype=float32, numpy=
array([[-0.06849635,  0.00995546,  0.05816585,  0.03781942, -0.02526987,
         0.05539752, -0.17635791,  0.05470857,  0.17366175,  0.01917322,
         0.06387094, -0.01698533, -0.00506841, -0.00584116,  0.07189137,
        -0.06351052, -0.00188681, -0.08475505, -0.05147968,  0.0179968 ,
        -0.03531788,  0.12632455, -0.05915659, -0.05977134,  0.07300624,
        -0.20184529, -0.06021175, -0.1589274 , -0.0158653 ,  0.14214398,
        -0.03911573, -0.11371459],
       [-0.06849635,  0.00995546,  0.05816585,  0.03781942, -0.02526987,
         0.05539752, -0.17635791,  0.05470857,  0.17366175,  0.01917322,
         0.06387094, -0.01698533, -0.00506841, -0.00584116,  0.07189137,
        -0.06351052, -0.00188681, -0.08475505, -0.05147968,  0.0179968 ,
        -0.03531788,  0.12632455, -0.05915659, -0.05977134,  0.07300624,
        -0.20184529, -0.06021175, -0.1589274 , -0.0158653 ,  0.14214398,
        -0.03911573, -0.11371459

In [30]:
y

<tf.Tensor: id=154, shape=(2, 32), dtype=float32, numpy=
array([[-0.06849635,  0.00995546,  0.05816585,  0.03781942, -0.02526987,
         0.05539752, -0.17635791,  0.05470857,  0.17366175,  0.01917322,
         0.06387094, -0.01698533, -0.00506841, -0.00584116,  0.07189137,
        -0.06351052, -0.00188681, -0.08475505, -0.05147968,  0.0179968 ,
        -0.03531788,  0.12632455, -0.05915659, -0.05977134,  0.07300624,
        -0.20184529, -0.06021175, -0.1589274 , -0.0158653 ,  0.14214398,
        -0.03911573, -0.11371459],
       [-0.06849635,  0.00995546,  0.05816585,  0.03781942, -0.02526987,
         0.05539752, -0.17635791,  0.05470857,  0.17366175,  0.01917322,
         0.06387094, -0.01698533, -0.00506841, -0.00584116,  0.07189137,
        -0.06351052, -0.00188681, -0.08475505, -0.05147968,  0.0179968 ,
        -0.03531788,  0.12632455, -0.05915659, -0.05977134,  0.07300624,
        -0.20184529, -0.06021175, -0.1589274 , -0.0158653 ,  0.14214398,
        -0.03911573, -0.11371459

In [35]:
linear_layer2.weights

[<tf.Variable 'linear2_3/Variable:0' shape=(2, 32) dtype=float32, numpy=
 array([[-0.05283543, -0.0082053 , -0.06088096,  0.07135466, -0.08922812,
          0.00388604, -0.00522436,  0.04253149,  0.09767316, -0.06347967,
          0.04051628,  0.04747967, -0.03607544,  0.03302233, -0.05037155,
         -0.03650972, -0.02882522,  0.01401194,  0.0320702 , -0.0496694 ,
         -0.03895427,  0.07002649,  0.01673152, -0.00125052,  0.04609814,
         -0.10299792,  0.0235304 ,  0.0149266 , -0.02171146, -0.04270264,
          0.07031806, -0.03288479],
        [ 0.06132748,  0.02129406, -0.02074615,  0.06057013,  0.0153632 ,
         -0.0258691 ,  0.04322994,  0.10100406, -0.04173629,  0.12975429,
         -0.03897617, -0.00556219,  0.05746824,  0.0208741 ,  0.0259151 ,
         -0.00101555,  0.03396238, -0.12266488,  0.02647762, -0.02423989,
          0.01235323,  0.0094605 , -0.04054355,  0.01750218, -0.06966328,
         -0.00336607, -0.04003532,  0.0286801 , -0.01431572,  0.00417895,
   

In [23]:
class Linear3(layers.Layer):

    def __init__(self, units=32):
        super(Linear3, self).__init__()
        self.units = units
        print("init")

    def build(self ,input_shape):
        self.w = self.add_weight(shape=(input_shape[-1], self.units),
                                 initializer='random_normal',
                                 trainable=True)
        self.b = self.add_weight(shape=(self.units,),
                                 initializer='random_normal',
                                 trainable=True)
        print("build")

    def call(self,defg):
        print("call")
        return tf.matmul(defg, self.w) + self.b

In [24]:
linear_layer3 = Linear3(32)
linear_layer3(x)

init
build
call


<tf.Tensor: id=118, shape=(2, 32), dtype=float32, numpy=
array([[ 0.05445231, -0.00421354, -0.00246931, -0.01939628,  0.02870491,
        -0.08737153,  0.00841595,  0.02016823,  0.22865051, -0.03089071,
        -0.05110834, -0.12780133, -0.00230058,  0.06311996,  0.1361354 ,
        -0.08656222, -0.03167488, -0.01516738,  0.06812147, -0.02753681,
        -0.061635  ,  0.07040352,  0.09777652, -0.10591459, -0.00030224,
         0.18461281, -0.00934733, -0.03347958, -0.01383944,  0.06318942,
         0.05514618,  0.05762808],
       [ 0.05445231, -0.00421354, -0.00246931, -0.01939628,  0.02870491,
        -0.08737153,  0.00841595,  0.02016823,  0.22865051, -0.03089071,
        -0.05110834, -0.12780133, -0.00230058,  0.06311996,  0.1361354 ,
        -0.08656222, -0.03167488, -0.01516738,  0.06812147, -0.02753681,
        -0.061635  ,  0.07040352,  0.09777652, -0.10591459, -0.00030224,
         0.18461281, -0.00934733, -0.03347958, -0.01383944,  0.06318942,
         0.05514618,  0.05762808