In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.losses import mean_squared_error, huber_loss, MeanSquaredError
from keras.metrics import Accuracy

In [2]:
# to make this notebook's output stable across runs
np.random.seed(42)
tf.random.set_seed(42)

In [3]:
t = tf.constant(10)
t

<tf.Tensor: shape=(), dtype=int32, numpy=10>

In [4]:
shape = tf.constant([[[1,2,3],[1,2,3],[1,2,3]]])
shape.shape

TensorShape([1, 3, 3])

In [5]:
t = t + 5
t = tf.add(t, 5)  # tf.math.add(t,5)
# t.__add__(10)
t

<tf.Tensor: shape=(), dtype=int32, numpy=20>

In [6]:
a = tf.constant([[1,2,3]])
# a_n = a @ tf.transpose(a)
a_n = tf.matmul(a, tf.transpose(a))
a_n

<tf.Tensor: shape=(1, 1), dtype=int32, numpy=array([[14]])>

In [7]:
t = tf.constant([[1,2,3],[4,5,6],[7,8,9]])

In [8]:
print(np.mean(t, axis=1))
print(tf.reduce_mean(t, axis=1))

[2. 5. 8.]
tf.Tensor([2 5 8], shape=(3,), dtype=int32)


In [9]:
t = tf.constant([4., 5., 6.])
a = np.array([4., 5., 6.])

In [10]:
np.square(t)

array([16., 25., 36.], dtype=float32)

In [11]:
tf.square(a)

<tf.Tensor: shape=(3,), dtype=float64, numpy=array([16., 25., 36.])>

Tensorflow does not perform automatic type conversion to not hurt perfomance.
If type conversion is needed you can use tf.cast()

In [12]:
t2 = tf.constant(4., dtype=tf.float64)
tf.constant(2.0) + tf.cast(t2, dtype=tf.float32)

<tf.Tensor: shape=(), dtype=float32, numpy=6.0>

Because the tf.Tensor values so far are immutable we cannot change them which is problematic when we are working with weights that need to be adjusted by backpropagation.
We can use tf.Variable

In [13]:
v = tf.Variable([[1., 2., 3.],[4., 5., 6.]])

In [14]:
v

<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

In [15]:
v.assign(v * 2)

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]], dtype=float32)>

In [16]:
v[0, 0].assign(42)

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[42.,  4.,  6.],
       [ 8., 10., 12.]], dtype=float32)>

In [17]:
v[:, 2].assign([0, 1])

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[42.,  4.,  0.],
       [ 8., 10.,  1.]], dtype=float32)>

In [18]:
arr = tf.constant([1,2,3,4,5,6,7,8,9])
print(tf.reduce_sum(arr))

tf.Tensor(45, shape=(), dtype=int32)


# Custom Loss Function

In [19]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [20]:
dataset = fetch_california_housing()
X, y = dataset.data, dataset.target
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, random_state=42)

std_scaler = StandardScaler()
X_train_scaled = std_scaler.fit_transform(X_train)
X_test_scaled = std_scaler.transform(X_test)
X_val_scaled = std_scaler.transform(X_val)

In [21]:
def huber_fn(y_true, y_pred):
    error = y_true - y_pred
    mse = tf.square(error) / 2
    mae = tf.abs(error) - 0.5
    is_small_error = tf.abs(error) < 1
    return tf.where(is_small_error, mse, mae)

In [22]:
def create_huber(threshold=1.0):
    def huber_fn(y_true, y_pred):
        error = y_true - y_pred
        is_small_error = tf.abs(error) < threshold
        squared_loss = tf.square(error) / 2
        linear_loss  = threshold * tf.abs(error) - threshold**2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)
    return huber_fn

In [23]:
# model.compile(loss=huber_fn, optimizer="Nadam")
# model.fit(X_train, y_train, [...])

In [24]:
class HuberLoss(keras.losses.Loss):
    def __init__(self, threshold=1.0, **kwargs):
        self.threshold = threshold
        super().__init__(**kwargs)
    def call(self, y_true, y_pred):
        error = y_true - y_pred
        huber_mse = 0.5 * tf.square(error)
        huber_mae = self.threshold * (tf.abs(error) - 0.5 * self.threshold)
        is_smaller_error = tf.abs(error) < self.threshold
        return tf.where(is_smaller_error, huber_mse, huber_mae)
    def get_config(self):
        """
        This method used to store the parameters used in the training
        """
        base_config = super().get_config()
        return {**base_config, "threshold": self.threshold}

In [25]:
model = keras.models.Sequential()
model.add(keras.layers.Dense(30, activation="selu", kernel_initializer="lecun_normal",
                            input_shape=[8,]))
model.add(keras.layers.Dense(1))

In [26]:
model.compile(loss=HuberLoss(2.), optimizer="nadam", metrics=["accuracy"])

In [27]:
model.fit(X_train_scaled, y_train, epochs=2,
          validation_data=(X_val_scaled, y_val))

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x1f2c14d7e80>

In [28]:
model.save("my_model_with_a_custom_loss_class.h5")
model = keras.models.load_model("my_model_with_a_custom_loss_class.h5", 
                                custom_objects={"HuberLoss": HuberLoss})
history = model.fit(X_train_scaled, y_train, epochs=2, 
          validation_data=(X_val_scaled, y_val))

Epoch 1/2
Epoch 2/2


In [29]:
model.loss.threshold

2.0

In [30]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [31]:
def my_softplus(z):
    return tf.math.log(tf.exp(z) + 1.0)

def my_glorot_initializer(shape, dtype=tf.float32):
    stddev = tf.sqrt(2. / (shape[0] + shape[1]))
    return tf.random.normal(shape, stddev = stddev, dtype=dtype)

def my_l1_regularizer(weights):
    return tf.reduce_sum(tf.abs(weights * 0.1))

def my_positive_weights(weights):
    return tf.nn.relu(weights)

In [32]:
layer = keras.layers.Dense(1, activation=my_softplus, 
                           kernel_initializer=my_glorot_initializer, 
                           kernel_regularizer=my_l1_regularizer, 
                           kernel_constraint=my_positive_weights)

In [33]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [34]:
class MyL1Regularizer(keras.regularizers.Regularizer):
    def __init__(self, factor):
        self.factor = factor
    def __call__(self, weights):
        return tf.reduce_sum(tf.abs(weights * self.factor))
    def get_config(self):
        return {"factor": self.factor}

In [35]:
model = keras.models.Sequential([
    keras.layers.Dense(30, activation="selu", kernel_initializer="lecun_normal",
                      input_shape=[8,]),
    keras.layers.Dense(1, activation=my_softplus, 
                       kernel_initializer=my_glorot_initializer,
                       kernel_regularizer=MyL1Regularizer(0.01),
                       kernel_constraint=my_positive_weights)
])

In [36]:
model.compile(loss="mse", optimizer="nadam", metrics=["mae"])

In [37]:
model.fit(X_train_scaled, y_train, epochs=2, validation_data=(X_val_scaled, y_val))

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x1f2c395c730>

In [38]:
model.save("my_model_with_many_custom_parts.h5")

In [39]:
model = keras.models.load_model(
    "my_model_with_many_custom_parts.h5",
    custom_objects={
       "MyL1Regularizer": MyL1Regularizer,
       "my_positive_weights": my_positive_weights,
       "my_glorot_initializer": my_glorot_initializer,
       "my_softplus": my_softplus,
    }
)

# Custom Metrics

In [40]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

In [41]:
class CustomAccuracy(keras.metrics.Metric):
    def __init__(self,  delta = 1.0, **kwargs):
        super().__init__(**kwargs)
        self.delta = delta
        self.huber_fn = create_huber(delta)
        self.total = self.add_weight("total", initializer="zeros")
        self.count = self.add_weight("count", initializer="zeros")
        
    def update_state(self, y_true, y_pred, sample_weight=None):
        metric = self.huber_fn(y_true, y_pred)
        self.total.assign_add(tf.reduce_sum(metric))
        self.count.assign_add(tf.cast(tf.size(y_true), tf.float32))
        
    def result(self):
        return self.total / self.count
    
    def reset_states(self):
        self.total.assign(0.)
        self.count.assign(0.)

In [42]:
y_pred = model.predict(X_train_scaled)



# Custom Layers

To create custom layers without any weight such as Flatten, Relu we can wrap it in Lambda layer

In [43]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [44]:
exponential_layer = keras.layers.Lambda(lambda x: tf.exp(x))

In [45]:
exponential_layer([-1., 0., 1.])

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([0.36787945, 1.        , 2.7182817 ], dtype=float32)>

In [46]:
model = keras.models.Sequential([
        keras.layers.Dense(30, activation="relu", input_shape=[8,]),
        keras.layers.Dense(1),
        exponential_layer
])

In [47]:
model.compile(loss="mse", optimizer="sgd")

model.fit(X_train_scaled, y_train, epochs=5,
         validation_data=(X_val_scaled, y_val))
model.evaluate(X_test_scaled, y_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


0.36233896017074585

To create a custom layer with weights we have to build a new subclass of keras.layers.Layer

In [48]:
class CustomLayerDense(keras.layers.Layer):
    def __init__(self, units, activation=None, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        self.activation = keras.activations.get(activation)
        
    def build(self, input_shape):
        self.kernel = self.add_weight(
            name="kernel", 
            shape=[input_shape[-1], self.units],
            initializer="glorot_normal"
        )
        self.bias = self.add_weight(
            name="bias",
            shape=[self.units], 
            initializer="zeros"
        )
        super().build(input_shape)
    def call(self, X):
        return self.activation(X @ self.kernel + self.bias)
    
    def compute_output_shape(self, input_shape):
        return tf.TensorShape(input_shape.as_list()[:-1] + [self.units])
    
    def get_config(self):
        """
        Method used to store the parameters
        """
        base_config = super().get_config()
        return {**base_config, "units": self.units,
                "activation": keras.activations.serialize(self.activation)}

In [49]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

In [50]:
model = keras.models.Sequential([
    CustomLayerDense(30, activation="relu", input_shape=[8,]),
    CustomLayerDense(1)
])

In [51]:
model.compile(loss="mse", optimizer="nadam")
model.fit(X_train_scaled, y_train, epochs=10, 
         validation_data=(X_val_scaled, y_val))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1f2c4c184f0>

In [52]:
class MyMultiLayer(keras.layers.Layer):
    def call(self, X):
        x1, x2 = X
        print("x1.shape: ", x1.shape ," x2.shape: ", x2.shape) # Debugging of custom layer
        return x1 + x2, x1 * x2
    def compute_output_shape(self, input_shape):
        input_shape1, input_shape2 = input_shape
        return [input_shape1, input_shape2]

In [53]:
inputs1 = keras.layers.Input(shape=[2])
inputs2 = keras.layers.Input(shape=[2])
outputs1, outputs2 = MyMultiLayer()((inputs1, inputs2))

x1.shape:  (None, 2)  x2.shape:  (None, 2)


# Custom Model

In [54]:
class ResidualBlock(keras.layers.Layer):
    def __init__(self, n_layers, n_neurons, **kwargs):
        super().__init__(**kwargs)
        self.hidden = [keras.layers.Dense(n_neurons, activation="relu", 
                                          kernel_initializer="he_normal") for _ in range(n_layers)]
    def call(self, inputs):
        Z = inputs
        print(Z.shape, inputs.shape)
        for layer in self.hidden:
            Z = layer(Z)
        return inputs + Z

In [55]:
class ResidualRegressor(keras.Model):
    def __init__(self, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.hidden_1 = keras.layers.Dense(30, activation="elu", kernel_initializer="he_normal")
        self.block_1 = ResidualBlock(2, 30)
        self.block_2 = ResidualBlock(2, 30)
        
        self.out = keras.layers.Dense(output_dim)
    
    def call(self, inputs):
        Z = self.hidden_1(inputs)
        for _ in range(1 + 3):
            Z = self.block_1(Z)
        Z = self.block_2(Z)
        return self.out(Z)
        

In [56]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [57]:
model = ResidualRegressor(1)
model.compile(loss="mse", optimizer="nadam")
history = model.fit(X_train_scaled, y_train, epochs=5)
score = model.evaluate(X_val_scaled, y_val)
y_pred = model.predict(X_test_scaled)

Epoch 1/5
(None, 30) (None, 30)
(None, 30) (None, 30)
(None, 30) (None, 30)
(None, 30) (None, 30)
(None, 30) (None, 30)
(None, 30) (None, 30)
(None, 30) (None, 30)
(None, 30) (None, 30)
(None, 30) (None, 30)
(None, 30) (None, 30)
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
(None, 30) (None, 30)
(None, 30) (None, 30)
(None, 30) (None, 30)
(None, 30) (None, 30)
(None, 30) (None, 30)
(None, 30) (None, 30)
(None, 30) (None, 30)
(None, 30) (None, 30)
(None, 30) (None, 30)
(None, 30) (None, 30)


In [58]:
model.summary()

Model: "residual_regressor"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               multiple                  270       
                                                                 
 residual_block (ResidualBlo  multiple                 1860      
 ck)                                                             
                                                                 
 residual_block_1 (ResidualB  multiple                 1860      
 lock)                                                           
                                                                 
 dense_5 (Dense)             multiple                  31        
                                                                 
Total params: 4,021
Trainable params: 4,021
Non-trainable params: 0
_________________________________________________________________


# Custom Training Loops

In [59]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

In [60]:
model = keras.models.Sequential([
    keras.layers.Dense(30, activation="elu", 
                       kernel_initializer="he_normal", 
                       kernel_regularizer=keras.regularizers.l2(0.05)),
    keras.layers.Dense(1, kernel_regularizer=keras.regularizers.l2(0.05))
])

In [61]:
def random_batch(X, y, batch_size=32):
    indices = np.random.randint(len(X), size=batch_size)
    return X[indices], y[indices]

In [62]:
n_epochs = 5
batch_size = 32
n_steps = len(X_train) // batch_size
loss_fn = keras.losses.mean_squared_error
optimizer = keras.optimizers.SGD(learning_rate=1e-3)
acc_metric = keras.metrics.MeanAbsoluteError()

In [63]:
for epoch in range(1, n_epochs + 1):
    print(f"\nEpoch {epoch}/{n_epochs}")
    progbar = keras.utils.Progbar(len(y_train))
    for step in range(1, n_steps + 1):
        X_batch, y_batch = random_batch(X_train_scaled, y_train)
        with tf.GradientTape() as tape:
            y_pred = model(X_batch, training=True)
            loss = loss_fn(y_batch, y_pred)
        
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        
        acc_metric.update_state(y_batch, y_pred)
        acc = acc_metric.result()
        progbar.add(batch_size, values=[("acc", acc),("loss", loss)])
    
    acc_metric.reset_states()


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

# Functions and Graphs

In [64]:
def cube(num):
    return num ** 3

cube(tf.constant(2))

<tf.Tensor: shape=(), dtype=int32, numpy=8>

In [65]:
tf_cube = tf.function(cube)
tf_cube

<tensorflow.python.eager.polymorphic_function.polymorphic_function.Function at 0x1f2c8a6d760>

In [66]:
tf_cube(2)

<tf.Tensor: shape=(), dtype=int32, numpy=8>

In [67]:
print(tf.constant(10))

tf.Tensor(10, shape=(), dtype=int32)


In [68]:
a = tf.constant(np.arange(10))
a[0]

<tf.Tensor: shape=(), dtype=int32, numpy=0>

# Exercise 12

In [69]:
class CustomLayerNormalization(keras.layers.Layer):
    def __init__(self, ep=1e-3, **kwargs):
        self.ep = ep
        super().__init__(**kwargs)
    
    def build(self, input_shape):
        self.alpha = self.add_weight(name="alpha", 
                                     shape=input_shape[-1:],
                                     initializer="ones")
        self.beta = self.add_weight(name="beta", 
                                    shape=input_shape[-1:],
                                    initializer="zeros")
    def call(self, inputs):
        mean, variance = tf.nn.moments(inputs, axes=-1, keepdims=True)
        return self.alpha * (inputs - mean) / (tf.sqrt(self.ep + variance)) + self.beta

In [70]:
X = X_train.astype(np.float32)

In [71]:
custom_layer_normalization = CustomLayerNormalization()
layer_normalization = keras.layers.LayerNormalization()

tf.reduce_mean(keras.losses.mean_absolute_error(
    layer_normalization(X), custom_layer_normalization(X)))

<tf.Tensor: shape=(), dtype=float32, numpy=3.9357733e-08>

In [72]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
X_test = X_test.astype(np.float32) / 255.
X_train_full = X_train_full.astype(np.float32) / 255.
X_train, X_val = X_train_full[5000:], X_train_full[:5000]
y_train, y_val = y_train_full[5000:], y_train_full[:5000]

val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
val_dataset = val_dataset.batch(32)

In [73]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

In [74]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.Dense(100, activation="relu"),
    keras.layers.Dense(10, activation="softmax")
])

In [75]:
batch_size = 32
n_epochs = 5
n_steps = len(X_train) // batch_size
loss_fn = keras.losses.sparse_categorical_crossentropy
optimizer = keras.optimizers.Nadam(learning_rate=1e-2)
metrics = keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = keras.metrics.SparseCategoricalAccuracy()

In [81]:
for epoch in range(1, n_epochs + 1):
    print(f"\nEpoch {epoch}/{n_epochs}")
    progbar = keras.utils.Progbar(n_steps)
    for step in range(1, n_steps + 1):
        X_batch, y_batch = random_batch(X_train, y_train)
        with tf.GradientTape() as tape:
            y_pred = model(X_batch)
            loss = loss_fn(y_batch, y_pred)
        
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        
        metrics.update_state(y_batch, y_pred)
        acc = metrics.result()
        progbar.add(1,values=[("acc", acc), ("loss", loss)])
    metrics.reset_states()
    
    for X_batch_val, y_batch_val in val_dataset:
        val_pred = model(X_batch_val, training=False)
        val_acc_metric.update_state(y_batch_val, val_pred)
    val_acc = val_acc_metric.result()
    val_loss = keras.losses.sparse_categorical_crossentropy(y_batch_val, val_pred)
    print(f"\rValidation acc: %.4f" % (float(val_acc),))

    val_acc_metric.reset_states()


Epoch 1/5
Validation acc: 0.8566

Epoch 2/5
Validation acc: 0.8656

Epoch 3/5
Validation acc: 0.8702

Epoch 4/5
Validation acc: 0.8618

Epoch 5/5
Validation acc: 0.8708
