# Custom Losses, Metrics, and Gradients

In [2]:
import tensorflow as tf
import numpy as np

# Define a simple regression model with auxiliary outputs
inputs = tf.keras.Input(shape=(10,))
hidden = tf.keras.layers.Dense(5, activation="relu")(inputs)
outputs = tf.keras.layers.Dense(1)(hidden)

In [3]:
# Define the model
model = tf.keras.Model(inputs, outputs)


In [7]:
import tensorflow as tf

# Define a simple regression model
inputs = tf.keras.Input(shape=(10,))
hidden = tf.keras.layers.Dense(5, activation="relu", name="hidden_layer")(inputs)
outputs = tf.keras.layers.Dense(1, name="output_layer")(hidden)

In [8]:
# Define the model
model = tf.keras.Model(inputs, outputs)

# Define a custom loss function as a Keras layer
class ReconstructionLossLayer(tf.keras.layers.Layer):
    def call(self, inputs):
        return tf.reduce_mean(tf.square(inputs))

In [9]:
# Create an instance of the custom loss layer
reconstruction_loss_layer = ReconstructionLossLayer()

# Add the reconstruction loss using the hidden layer
def reconstruction_loss(hidden):
    return reconstruction_loss_layer(hidden)


In [11]:
# Wrap hidden layer computation for loss calculation
hidden_output = model.get_layer("hidden_layer").output
recon_loss = reconstruction_loss(hidden_output)




In [15]:
import tensorflow as tf
from tensorflow.keras.layers import Layer

# Define a custom loss layer
class ReconstructionLossLayer(Layer):
    def __init__(self, **kwargs):
        super(ReconstructionLossLayer, self).__init__(**kwargs)

    def call(self, inputs):
        return tf.reduce_mean(tf.square(inputs))

# Define the model
inputs = tf.keras.Input(shape=(10,))
hidden = tf.keras.layers.Dense(5, activation="relu", name="hidden_layer")(inputs)
outputs = tf.keras.layers.Dense(1, name="output_layer")(hidden)

# Instantiate the reconstruction loss layer
reconstruction_loss_layer = ReconstructionLossLayer()

# Use the hidden layer's output for reconstruction loss
reconstruction_loss = reconstruction_loss_layer(hidden)

# Create the model
model = tf.keras.Model(inputs, outputs)

# Define a combined loss function
def combined_loss(y_true, y_pred):
    mse_loss = tf.reduce_mean(tf.square(y_true - y_pred))  # MSE loss
    total_loss = mse_loss + 0.01 * reconstruction_loss  # Add reconstruction loss
    return total_loss

# Compile the model
model.compile(optimizer="adam", loss=combined_loss, metrics=["mae"])

# Generate synthetic data
X_train = tf.random.normal((100, 10))
y_train = tf.random.normal((100, 1))




In [18]:
import tensorflow as tf
from tensorflow.keras.layers import Layer

# Define a custom layer for reconstruction loss
class ReconstructionLossLayer(Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def call(self, inputs):
        # Compute the reconstruction loss
        return tf.reduce_mean(tf.square(inputs))

# Define the model
inputs = tf.keras.Input(shape=(10,), name="input_layer")
hidden = tf.keras.layers.Dense(5, activation="relu", name="hidden_layer")(inputs)
reconstruction_loss = ReconstructionLossLayer()(hidden)  # Use custom layer
outputs = tf.keras.layers.Dense(1, name="output_layer")(hidden)

# Create the model
model = tf.keras.Model(inputs=inputs, outputs=outputs)



# Compile the model
model.compile(optimizer="adam", loss="mse", metrics=["mae"])

# Generate synthetic data
X_train = tf.random.normal((100, 10))  # 100 samples, 10 features
y_train = tf.random.normal((100, 1))   # 100 samples, 1 output

# Train the model
model.fit(X_train, y_train, epochs=5, batch_size=16)


Epoch 1/5
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 1.3795 - mae: 0.9855  
Epoch 2/5
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.1823 - mae: 0.8909  
Epoch 3/5
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.1175 - mae: 0.8605 
Epoch 4/5
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.2197 - mae: 0.9133 
Epoch 5/5
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.2481 - mae: 0.9134 


<keras.src.callbacks.history.History at 0x7bd3fc1aea10>

# Customizable Loss with Parameters
To allow custom thresholds in Huber loss:

In [20]:
def create_huber(threshold=1.0):
    def huber_fn(y_true, y_pred):
        error = y_true - y_pred
        is_small_error = tf.abs(error) < threshold
        squared_loss = tf.square(error) / 2
        linear_loss = threshold * tf.abs(error) - threshold ** 2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)
    return huber_fn


In [21]:
model.compile(loss=create_huber(2.0), optimizer="nadam")


# Subclassing for Persistent Custom Loss
To save threshold values within the model:

In [23]:
class HuberLoss(tf.keras.losses.Loss):
    def __init__(self, threshold=1.0, **kwargs):
        self.threshold = threshold
        super().__init__(**kwargs)

    def call(self, y_true, y_pred):
        error = y_true - y_pred
        is_small_error = tf.abs(error) < self.threshold
        squared_loss = tf.square(error) / 2
        linear_loss = self.threshold * tf.abs(error) - self.threshold**2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)

    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "threshold": self.threshold}


#  Custom Functions
You can create custom implementations for:

Activation Functions: Define how neurons activate.

In [25]:
def my_softplus(z):
    return tf.math.log(1.0 + tf.exp(z))


Initializers: Customize weight initialization.

In [26]:
def my_glorot_initializer(shape, dtype=tf.float32):
    stddev = tf.sqrt(2. / (shape[0] + shape[1]))
    return tf.random.normal(shape, stddev=stddev, dtype=dtype)


Regularizers: Add penalties to model weights to prevent overfitting

In [27]:
def my_l1_regularizer(weights):
    return tf.reduce_sum(tf.abs(0.01 * weights))


Constraints: Enforce specific conditions on weights (e.g., non-negativity)

In [28]:
def my_positive_weights(weights):
    return tf.where(weights < 0., tf.zeros_like(weights), weights)


Usage in Layers: These functions are seamlessly integrated into layers:

In [29]:
layer = tf.keras.layers.Dense(1,
                              activation=my_softplus,
                              kernel_initializer=my_glorot_initializer,
                              kernel_regularizer=my_l1_regularizer,
                              kernel_constraint=my_positive_weights)


2. Custom Classes
For functions with hyperparameters, use specialized Keras classes like:

Custom Regularizers:

In [30]:
class MyL1Regularizer(tf.keras.regularizers.Regularizer):
    def __init__(self, factor):
        self.factor = factor

    def __call__(self, weights):
        return tf.reduce_sum(tf.abs(self.factor * weights))

    def get_config(self):
        return {"factor": self.factor}


#  Custom Metrics
Metrics differ from losses:

Losses: Differentiable, for optimization.
Metrics: Not necessarily differentiable, for evaluation.
For example, a stateful metric like precision:

In [31]:
precision = tf.keras.metrics.Precision()
precision([0, 1, 1, 0], [1, 1, 0, 0])
precision.result().numpy()  # Returns precision after multiple batches.


0.5

Custom Metric Example:

Stateful metric for mean Huber loss:

In [32]:
class HuberMetric(tf.keras.metrics.Metric):
    def __init__(self, threshold=1.0, **kwargs):
        super().__init__(**kwargs)
        self.threshold = threshold
        self.total = self.add_weight("total", initializer="zeros")
        self.count = self.add_weight("count", initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        sample_metrics = tf.where(
            tf.abs(y_true - y_pred) < self.threshold,
            0.5 * tf.square(y_true - y_pred),
            self.threshold * tf.abs(y_true - y_pred) - 0.5 * self.threshold**2)
        self.total.assign_add(tf.reduce_sum(sample_metrics))
        self.count.assign_add(tf.cast(tf.size(y_true), tf.float32))

    def result(self):
        return self.total / self.count

    def get_config(self):
        return {"threshold": self.threshold}


# Custom Layers
To build new layers:

Subclass tf.keras.layers.Layer.
Define:
__init__(): Layer hyperparameters.
build(): Create weights.
call(): Layer's forward pass logic.

In [33]:
class MyDense(tf.keras.layers.Layer):
    def __init__(self, units, activation=None, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        self.activation = tf.keras.activations.get(activation)

    def build(self, input_shape):
        self.kernel = self.add_weight(
            name="kernel", shape=[input_shape[-1], self.units], initializer="glorot_normal")
        self.bias = self.add_weight(
            name="bias", shape=[self.units], initializer="zeros")

    def call(self, X):
        return self.activation(X @ self.kernel + self.bias)

    def get_config(self):
        return {"units": self.units, "activation": self.activation}


# Computing Gradients with Autodiff
Automatic Gradient Calculation
Use tf.GradientTape for automatic differentiation.

In [34]:
# Define a toy function
def f(w1, w2):
    return w1**2 + w2**3

# Compute gradients
w1 = tf.Variable(2.0)
w2 = tf.Variable(3.0)

with tf.GradientTape() as tape:
    loss = f(w1, w2)

gradients = tape.gradient(loss, [w1, w2])
print("Gradients:", gradients)


Gradients: [<tf.Tensor: shape=(), dtype=float32, numpy=4.0>, <tf.Tensor: shape=(), dtype=float32, numpy=27.0>]


# Persistent Gradient Calculation
For multiple gradient calculations, use persistent=True.

In [35]:
with tf.GradientTape(persistent=True) as tape:
    x = tf.Variable(5.0)
    y = x**2 + 3*x

grad_x = tape.gradient(y, x)  # First calculation
grad_x_again = tape.gradient(grad_x, x)  # Second calculation
del tape  # Release resources


# Customizing What to Track
Use tape.watch() to track non-variable tensors.

In [36]:
x = tf.constant(3.0)

with tf.GradientTape() as tape:
    tape.watch(x)
    y = x**3

grad = tape.gradient(y, x)
print("Gradient:", grad)


Gradient: tf.Tensor(27.0, shape=(), dtype=float32)


# Advanced Gradient Techniques
Gradient of Summed Losses
By default, TensorFlow computes gradients of summed losses. Use tape.jacobian() for individual gradients.

In [37]:
with tf.GradientTape() as tape:
    x = tf.constant([1.0, 2.0])
    y = x**2

jacobian = tape.jacobian(y, x)
print("Jacobian:", jacobian)


Jacobian: None


# Stopping Gradients
Use tf.stop_gradient() to prevent backpropagation.

In [38]:
x = tf.Variable(3.0)

with tf.GradientTape() as tape:
    y = tf.stop_gradient(x) * x

grad = tape.gradient(y, x)
print("Gradient:", grad)  # Should be 0 due to stop_gradient


Gradient: tf.Tensor(3.0, shape=(), dtype=float32)


# Handling Numerical Stability
Common Issues
Operations like square roots or exponentials can cause instability for extreme values, resulting in infinities or NaNs.

Solutions
Modify operations to ensure stability.

In [39]:
# Stable computation of log(1 + exp(z))
def stable_log_exp(z):
    return tf.math.log1p(tf.math.exp(-tf.abs(z))) + tf.maximum(z, 0)

z = tf.constant([100.0, -100.0])
result = stable_log_exp(z)
print("Stable Output:", result)


Stable Output: tf.Tensor([100.   0.], shape=(2,), dtype=float32)
