In [1]:
import tensorflow as tf
import numpy as np

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
t = tf.constant(
    [
        [1.0, 2.0, 3.0],
        [
            4.0,
            5.0,
            6.0,
        ],
    ]
)  # this is a matrix
t

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

In [3]:
print(t.shape)
print(t.dtype)

(2, 3)
<dtype: 'float32'>


In [4]:
print(t[:, 1:])
print(t[:, 1, tf.newaxis])
print(t + 10)
print(tf.square(t))
t @ tf.transpose(t)

tf.Tensor(
[[2. 3.]
 [5. 6.]], shape=(2, 2), dtype=float32)
tf.Tensor(
[[2.]
 [5.]], shape=(2, 1), dtype=float32)
tf.Tensor(
[[11. 12. 13.]
 [14. 15. 16.]], shape=(2, 3), dtype=float32)
tf.Tensor(
[[ 1.  4.  9.]
 [16. 25. 36.]], shape=(2, 3), dtype=float32)


<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[14., 32.],
       [32., 77.]], dtype=float32)>

In [5]:
# tf.reduce_mean()
# tf.reduce_max()
# tf.reduce_sum()
# tf.math.log()
a = np.array([2.0, 4.0, 5.0])
print(tf.constant(a))
print(t.numpy())

tf.Tensor([2. 4. 5.], shape=(3,), dtype=float64)
[[1. 2. 3.]
 [4. 5. 6.]]


In [6]:
# Type converstion

# tf.constant(2.) + tf.constant(40)
# tf.constant(2.) + tf.constant(40, dtype=tf.float64)
t2 = tf.constant(40.0, dtype=tf.float64)
print(tf.constant(2.0) + tf.cast(t2, tf.float32))

tf.Tensor(42.0, shape=(), dtype=float32)


In [7]:
# Variables
v = tf.Variable([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
print(v.assign(2 * v))
print(v[0, 1].assign(42))
print(v[:, 2].assign([0.0]))
v.scatter_nd_update(indices=[[0, 0], [1, 2]], updates=[100.0, 200.0])

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]], dtype=float32)>
<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[ 2., 42.,  6.],
       [ 8., 10., 12.]], dtype=float32)>
<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[ 2., 42.,  0.],
       [ 8., 10.,  0.]], dtype=float32)>


<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[100.,  42.,   0.],
       [  8.,  10., 200.]], dtype=float32)>

In [8]:
# Customizing models and training algorithms
# Custom loss functions


@tf.keras.utils.register_keras_serializable()
def huber_fn(y_true, y_pred):
    error = y_true - y_pred
    is_small_error = tf.abs(error) < 1
    squared_loss = tf.square(error) / 2
    linear_loss = tf.abs(error) - 0.5
    return tf.where(is_small_error, squared_loss, linear_loss)


housing = fetch_california_housing()
x_train_full, x_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target.reshape(-1, 1), random_state=42
)
x_train, x_valid, y_train, y_valid = train_test_split(
    x_train_full, y_train_full, random_state=42
)

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_valid_scaled = scaler.transform(x_valid)
x_test_scaled = scaler.transform(x_test)


input_shape = x_train.shape[1:]
tf.keras.utils.set_random_seed(42)

model = tf.keras.Sequential(
    [
        tf.keras.layers.Input(shape=(input_shape)),
        tf.keras.layers.Dense(30, activation="relu", kernel_initializer="he_normal"),
        tf.keras.layers.Dense(1),
    ]
)

model.compile(loss=huber_fn, optimizer="nadam", metrics=["mae"])

model.fit(x_train_scaled, y_train, epochs=2, validation_data=(x_valid_scaled, y_valid))

model.save("my_model_with_huber_loss.keras")

Epoch 1/2
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.7494 - mae: 1.1371 - val_loss: 0.3474 - val_mae: 0.6522
Epoch 2/2
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 699us/step - loss: 0.2607 - mae: 0.5681 - val_loss: 0.2553 - val_mae: 0.5383


In [9]:
# saving loading models with custom objects
model = tf.keras.models.load_model("my_model_with_huber_loss.keras")

In [10]:
@tf.keras.utils.register_keras_serializable()
def create_huber(threshold=1.0):
    def huber_fn(y_true, y_pred):
        error = y_true - y_pred
        is_small_error = tf.abs(error) < threshold
        squared_loss = tf.square(error) / 2
        linear_loss = threshold * tf.abs(error) - threshold**2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)

    return huber_fn


model.compile(loss=huber_fn, optimizer="nadam", metrics=["mae"])
model.fit(x_train_scaled, y_train, epochs=2, validation_data=(x_valid_scaled, y_valid))
model.save("my_model_with_custom_huber_loss_threshold_2.keras")
# while loading load using with same threshold

Epoch 1/2
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.2176 - mae: 0.5075 - val_loss: 0.2200 - val_mae: 0.4935
Epoch 2/2
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 691us/step - loss: 0.1964 - mae: 0.4770 - val_loss: 0.1943 - val_mae: 0.4655


In [11]:
# to get the threshold while loading a saved model


@tf.keras.utils.register_keras_serializable()
class HuberLoss(tf.keras.losses.Loss):
    def __init__(self, threshold=1.0, **kwargs):
        self.threshold = threshold
        super().__init__(**kwargs)

    def call(self, y_true, y_pred):
        error = y_true - y_pred
        is_small_error = tf.abs(error) < self.threshold
        squared_loss = tf.square(error) / 2
        linear_loss = self.threshold * tf.abs(error) - self.threshold**2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)

    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "threshold": self.threshold}


model.compile(loss=HuberLoss(2.0), optimizer="nadam", metrics=["mae"])
model.fit(x_train_scaled, y_train, epochs=2, validation_data=(x_valid_scaled, y_valid))

model.save("my_model_with_custom_huber_with_cofig_loss_threshold_2.keras")

Epoch 1/2
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 934us/step - loss: 0.2125 - mae: 0.4665 - val_loss: 0.2213 - val_mae: 0.4560
Epoch 2/2
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 676us/step - loss: 0.2042 - mae: 0.4583 - val_loss: 0.1889 - val_mae: 0.4373


In [12]:
model = tf.keras.models.load_model(
    "my_model_with_custom_huber_with_cofig_loss_threshold_2.keras"
)

In [13]:
model.fit(x_train_scaled, y_train, epochs=2, validation_data=(x_valid_scaled, y_valid))

Epoch 1/2
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 968us/step - loss: 0.1991 - mae: 0.4527 - val_loss: 0.1810 - val_mae: 0.4313
Epoch 2/2
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 686us/step - loss: 0.1950 - mae: 0.4479 - val_loss: 0.1829 - val_mae: 0.4294


<keras.src.callbacks.history.History at 0x32010bd10>

In [14]:
#  Custom activarion functions, Initalizers, Reglarizers and Constraints


def my_softplus(z):
    return tf.math.log(1.0 + tf.exp(z))


def my_glorot_initializer(shape, dtype=tf.float32):
    stddev = tf.sqrt(2.0 / (shape[0] + shape[1]))
    return tf.random.normal(shape, stddev=stddev, dtype=dtype)


def my_l1_regularizer(weights):
    return tf.reduce_sum(tf.abs(0.01, *weights))


def my_positive_weights(weights):
    return tf.where(weights < 0.0, tf.zeros_like(weights), weights)

In [15]:
layer = tf.keras.layers.Dense(
    1,
    activation=my_softplus,
    kernel_initializer=my_glorot_initializer,
    kernel_regularizer=my_l1_regularizer,
    kernel_constraint=my_positive_weights,
)

In [16]:
# custom metrics

tf.keras.utils.set_random_seed(42)
model = tf.keras.Sequential(
    [
        tf.keras.layers.Input(shape=input_shape),
        tf.keras.layers.Dense(30, activation="relu", kernel_initializer="he_normal"),
        tf.keras.layers.Dense(1),
    ]
)

model.compile(loss="mse", optimizer="nadam", metrics=[create_huber(2.0)])

model.fit(x_train_scaled, y_train, epochs=2, validation_data=(x_valid_scaled, y_valid))

Epoch 1/2
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 979us/step - huber_fn: 1.0788 - loss: 2.5942 - val_huber_fn: 0.5648 - val_loss: 21.3898
Epoch 2/2
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 673us/step - huber_fn: 0.3403 - loss: 0.7644 - val_huber_fn: 0.4283 - val_loss: 11.2129


<keras.src.callbacks.history.History at 0x315e97cb0>

In [17]:
precision = tf.keras.metrics.Precision()
precision([0, 1, 1, 1, 0, 1, 0, 1], [1, 1, 0, 1, 0, 1, 0, 1])
precision.result()
precision.variables
precision.reset_state()
# this is called streaming metric

In [18]:
# custom streaming metric


class HuberMetric(tf.keras.metrics.Metric):
    def __init__(self, threshold=1.0, **kwargs):
        super().__init__(**kwargs)  # handles base args (e.g., dtype)
        self.threshold = threshold
        self.huber_fn = create_huber(threshold)
        self.total = self.add_weights("total", initializer="zeros")
        self.count = self.add_weights("count", initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        sample_metrics = self.huber_fn(y_true, y_pred)
        self.total.assign_add(tf.reduce_sum(sample_metrics))
        self.count.assign_add(tf.cast(tf.size(y_true), tf.float32))

    def result(self):
        return self.total / self.count

    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "threshold": self.threshold}

In [19]:
# custom layers
# exponential function layer
exponential_layer = tf.keras.layers.Lambda(lambda x: tf.exp(x))
exponential_layer(tf.constant([-1.0, 0.0, 1.0]))

# adding exponential layer at the output of the regression model can be useful if the values to predict are positve and with very different scales

tf.keras.utils.set_random_seed(42)
model = tf.keras.Sequential(
    [
        tf.keras.layers.Input(shape=(input_shape)),
        tf.keras.layers.Dense(30, activation="relu"),
        tf.keras.layers.Dense(1),
        exponential_layer,
    ]
)

model.compile(loss="mse", optimizer="sgd")
model.fit(x_train_scaled, y_train, epochs=5, validation_data=(x_valid_scaled, y_valid))
model.evaluate(x_test_scaled, y_test)

Epoch 1/5
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 778us/step - loss: 1.2350 - val_loss: 0.4393
Epoch 2/5
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 567us/step - loss: 0.6130 - val_loss: 0.4048
Epoch 3/5
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 567us/step - loss: 0.5172 - val_loss: 0.3942
Epoch 4/5
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 563us/step - loss: 0.4498 - val_loss: 0.3699
Epoch 5/5
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 561us/step - loss: 0.5445 - val_loss: 0.3770
[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 407us/step - loss: 0.3923


0.3950682282447815

In [20]:
# custom Dense layer


@tf.keras.utils.register_keras_serializable()
class MyDense(tf.keras.layers.Layer):
    def __init__(self, units, activation=None, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        self.activation = tf.keras.activations.get(activation)

    def build(self, batch_input_shape):
        self.kernel = self.add_weight(
            name="kernel",
            shape=[batch_input_shape[-1], self.units],
            initializer="glorot_normal",
        )
        self.bias = self.add_weight(
            name="bias", shape=[self.units], initializer="zeros"
        )

    def call(self, X):
        return self.activation(X @ self.kernel + self.bias)

    def get_config(self):
        base_config = super().get_config()
        return {
            **base_config,
            "units": self.units,
            "activation": tf.keras.activations.serialize(self.activation),
        }


tf.keras.utils.set_random_seed(42)
model = tf.keras.Sequential(
    [MyDense(30, activation="relu", input_shape=input_shape), MyDense(1)]
)

model.compile(loss="mse", optimizer="nadam")
model.fit(x_train_scaled, y_train, epochs=2, validation_data=(x_valid_scaled, y_valid))
model.evaluate(x_test_scaled, y_test)
model.save("my_custom_Dense_layer_model.keras")

Epoch 1/2


  super().__init__(**kwargs)


[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 839us/step - loss: 4.2221 - val_loss: 1.7553
Epoch 2/2
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 629us/step - loss: 0.7143 - val_loss: 0.6906
[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 425us/step - loss: 0.5624


In [21]:
model = tf.keras.models.load_model("my_custom_Dense_layer_model.keras")
model.fit(x_train_scaled, y_train, epochs=2, validation_data=(x_valid_scaled, y_valid))

Epoch 1/2
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 886us/step - loss: 0.5586 - val_loss: 0.4317
Epoch 2/2
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 617us/step - loss: 0.4826 - val_loss: 0.4608


<keras.src.callbacks.history.History at 0x31722aea0>

In [22]:
# creating a layer with multiple inputs


class MyMultiLayer(tf.keras.layers.Layer):
    def call(self, X):
        X1, X2 = X
        return X1 + X2, X1 * X2, X1 / X2


inputs1 = tf.keras.layers.Input(shape=[2])
inputs2 = tf.keras.layers.Input(shape=[2])
MyMultiLayer()((inputs1, inputs2))

(<KerasTensor shape=(None, 2), dtype=float32, sparse=False, name=keras_tensor_26>,
 <KerasTensor shape=(None, 2), dtype=float32, sparse=False, name=keras_tensor_27>,
 <KerasTensor shape=(None, 2), dtype=float32, sparse=False, name=keras_tensor_28>)

In [23]:
X1, X2 = np.array([[3.0, 6.0], [2.0, 7.0]]), np.array([[6.0, 12.0], [4.0, 3.0]])
MyMultiLayer()((X1, X2))

(<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
 array([[ 9., 18.],
        [ 6., 10.]], dtype=float32)>,
 <tf.Tensor: shape=(2, 2), dtype=float32, numpy=
 array([[18., 72.],
        [ 8., 21.]], dtype=float32)>,
 <tf.Tensor: shape=(2, 2), dtype=float32, numpy=
 array([[0.5      , 0.5      ],
        [0.5      , 2.3333333]], dtype=float32)>)

In [24]:
# creating a layer that adds guassian noise during training and not during testing


class MyGaussianNoise(tf.keras.layers.Layer):
    def __init__(self, stddev, **kwargs):
        super().__init__(**kwargs)
        self.stddev = stddev

    def call(self, X, training=False):
        if training:
            noise = tf.random.normal(tf.shape(X), stddev=self.stddev)
            return X + noise
        else:
            return X


model = tf.keras.Sequential(
    [
        tf.keras.layers.Input(shape=input_shape),
        MyGaussianNoise(stddev=1.0),
        tf.keras.layers.Dense(30, activation="relu", kernel_initializer="he_normal"),
        tf.keras.layers.Dense(1),
    ]
)

model.compile(loss="mse", optimizer="nadam")
model.fit(x_train_scaled, y_train, epochs=2)
model.evaluate(x_test_scaled, y_test)

Epoch 1/2
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 491us/step - loss: 2.4080
Epoch 2/2
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 464us/step - loss: 1.3024
[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 450us/step - loss: 1.0171


1.0278319120407104

In [29]:
# custom models
# creating residual block layer


@tf.keras.utils.register_keras_serializable()
class ResidualBlock(tf.keras.layers.Layer):
    def __init__(self, n_layers, n_neurons, **kwargs):
        super().__init__(**kwargs)
        self.hidden = [
            tf.keras.layers.Dense(
                n_neurons, activation="relu", kernel_initializer="he_normal"
            )
            for _ in range(n_layers)
        ]

    def call(self, inputs):
        Z = inputs
        for layer in self.hidden:
            Z = layer(Z)
        return inputs + Z


@tf.keras.utils.register_keras_serializable()
class ResidualRegressor(tf.keras.Model):
    def __init__(self, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.output_dim = output_dim
        self.hidden1 = tf.keras.layers.Dense(
            30, activation="relu", kernel_initializer="he_normal"
        )

        self.block1 = ResidualBlock(2, 30)
        self.block2 = ResidualBlock(2, 30)

        self.out = tf.keras.layers.Dense(output_dim)

    def call(self, inputs):
        Z = self.hidden1(inputs)
        for _ in range(1 + 3):
            Z = self.block1(Z)
        Z = self.block2(Z)
        return self.out(Z)

    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "output_dim": self.output_dim}


tf.keras.utils.set_random_seed(42)
model = ResidualRegressor(1)
model.compile(loss="mse", optimizer="nadam")
history = model.fit(
    x_train_scaled, y_train, epochs=2, validation_data=(x_valid_scaled, y_valid)
)
score = model.evaluate(x_test_scaled, y_test)
model.save("my_custom_model_with_residual.keras")

Epoch 1/2
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 123.5810 - val_loss: 2.0637
Epoch 2/2
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 866us/step - loss: 1.5416 - val_loss: 1.2041
[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 470us/step - loss: 1.1640


In [None]:
# losses and metrics based on model internals


@tf.keras.utils.register_keras_serializable()
class ReconstructingRegressor(tf.keras.Model):
    def __init__(self, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.hidden = [
            tf.keras.layers.Dense(30, activation="relu", kernel_initializer="he_normal")
            for _ in range(5)
        ]
        self.out = tf.keras.layers.Dense(output_dim)
        self.reconstruction_mean = tf.keras.metrics.Mean(name="reconstruction_error")

    def build(self, batch_input_shape):
        n_inputs = batch_input_shape[-1]
        self.reconstruct = tf.keras.layers.Dense(n_inputs)

    def call(self, inputs, training=None):
        Z = inputs
        for layer in self.hidden:
            Z = layer(Z)
        reconstruction = self.reconstruct(Z)
        recon_loss = tf.reduce_mean(tf.square(reconstruction - inputs))
        self.add_loss(0.05 * recon_loss)
        return self.out(Z)


tf.keras.utils.set_random_seed(42)
model = ReconstructingRegressor(1)
model.compile(loss="mse", optimizer="nadam")
history = model.fit(
    x_train_scaled, y_train, epochs=5, validation_data=(x_valid_scaled, y_valid)
)
model.evaluate(x_test_scaled, y_test)

Epoch 1/5
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 1.1051 - reconstruction_error: 0.0000e+00 - val_loss: 0.9605 - val_reconstruction_error: 0.0000e+00
Epoch 2/5
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 896us/step - loss: 0.5082 - reconstruction_error: 0.0000e+00 - val_loss: 8.8663 - val_reconstruction_error: 0.0000e+00
Epoch 3/5
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 883us/step - loss: 0.4392 - reconstruction_error: 0.0000e+00 - val_loss: 3.4861 - val_reconstruction_error: 0.0000e+00
Epoch 4/5
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 897us/step - loss: 0.3985 - reconstruction_error: 0.0000e+00 - val_loss: 6.6116 - val_reconstruction_error: 0.0000e+00
Epoch 5/5
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 976us/step - loss: 0.3805 - reconstruction_error: 0.0000e+00 - val_loss: 2.3522 - val_reconstruction_error: 0.0000e+00
[1m162/162[0m [32m━━━━━━

[0.3726007342338562, 0.0]

In [None]:
# computing gradients using autodiff


def f(w1, w2):
    return 3 * w1**2 + 2 * w1 * w2


w1, w2 = 5, 3
eps = 1e-6
print((f(w1 + eps, w2) - f(w1, w2)) / eps)
print(f(w1, w2 + eps) - f(w1, w2) / eps)

36.000003007075065
-104999894.99999


In [None]:
w1, w2 = tf.Variable(5.0), tf.Variable(3.0)
with tf.GradientTape() as tape:
    z = f(w1, w2)

gradients = tape.gradient(z, [w1, w2])
print(gradients)

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>, <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]


In [None]:
tf.keras.utils.set_random_seed(42)
l2_reg = tf.keras.regularizers.l2(0.05)
model = tf.keras.models.Sequential(
    [
        tf.keras.layers.Dense(
            30,
            activation="relu",
            kernel_initializer="he_normal",
            kernel_regularizer=l2_reg,
        ),
        tf.keras.layers.Dense(1, kernel_regularizer=l2_reg),
    ]
)


def random_batch(X, y, batch_size=32):
    idx = np.random.randint(len(X), size=batch_size)
    return X[idx], y[idx]

In [None]:
class LayerNormatization(tf.keras.layers.Layer):
    def __init__(self, eps=0.001, **kwargs):
        super().__init__(**kwargs)
        self.eps = eps

    def build(self, batch_input_shape):
        self.apha = self.add_weight(
            name="alpha", shape=batch_input_shape[-1:], initializer="ones"
        )

        self.beta = self.add_weight(
            name="beta", shape=batch_input_shape[-1:], initializer="zeros"
        )

    def call(self, X):
        mean, variance = tf.nn.moments(X, axes=-1, keepdims=True)
        return self.apha * (X - mean) / (tf.sqrt(variance + self.eps)) + self.beta

    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "eps": self.eps}


X = x_train.astype(np.float32)

custom_layer_norm = LayerNormatization()
keras_layer_norm = tf.keras.layers.LayerNormalization()

tf.reduce_mean(
    tf.keras.losses.MeanAbsoluteError()(keras_layer_norm(X), custom_layer_norm(X))
)

<tf.Tensor: shape=(), dtype=float32, numpy=3.3358560358465184e-08>

In [None]:
tf.keras.utils.set_random_seed(42)
random_alpha = np.random.rand(X.shape[-1])
random_beta = np.random.rand(X.shape[-1])

custom_layer_norm.set_weights([random_alpha, random_beta])
keras_layer_norm.set_weights([random_alpha, random_beta])

tf.reduce_mean(
    tf.keras.losses.MeanAbsoluteError()(keras_layer_norm(X), custom_layer_norm(X))
)

<tf.Tensor: shape=(), dtype=float32, numpy=1.691972784101381e-08>