From cf0ce13479fe15e34f4d70d616b131b0efefa4e1 Mon Sep 17 00:00:00 2001 From: Monasri29-hub Date: Mon, 20 Oct 2025 20:13:12 +0530 Subject: [PATCH 1/2] fix: avoid log(0) in KL divergence by filtering zero entries Fixes #12233 - Filter out zero entries from y_true before computing logarithm - Add doctests demonstrating correct behavior with zeros - Mathematically correct per information theory conventions --- machine_learning/loss_functions.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/machine_learning/loss_functions.py b/machine_learning/loss_functions.py index 0bd9aa8b5401..8781e08bf502 100644 --- a/machine_learning/loss_functions.py +++ b/machine_learning/loss_functions.py @@ -628,7 +628,6 @@ def smooth_l1_loss(y_true: np.ndarray, y_pred: np.ndarray, beta: float = 1.0) -> loss = np.where(diff < beta, 0.5 * diff**2 / beta, diff - 0.5 * beta) return np.mean(loss) - def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float: """ Calculate the Kullback-Leibler divergence (KL divergence) loss between true labels @@ -653,16 +652,29 @@ def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float >>> predicted_probs = np.array([0.3, 0.3, 0.4, 0.5]) >>> kullback_leibler_divergence(true_labels, predicted_probs) Traceback (most recent call last): - ... + ... ValueError: Input arrays must have the same length. + >>> true_labels = np.array([0.0, 0.3, 0.7]) + >>> predicted_probs = np.array([0.1, 0.3, 0.6]) + >>> float(kullback_leibler_divergence(true_labels, predicted_probs)) + 0.10790547587908085 + >>> true_labels = np.array([0.0, 0.0, 1.0]) + >>> predicted_probs = np.array([0.2, 0.3, 0.5]) + >>> float(kullback_leibler_divergence(true_labels, predicted_probs)) + 0.6931471805599453 """ if len(y_true) != len(y_pred): raise ValueError("Input arrays must have the same length.") - kl_loss = y_true * np.log(y_true / y_pred) + # Filter out entries where y_true is 0 to avoid log(0) + # By definition of KL divergence: 0 * log(0/q) = 0 + mask = y_true != 0 + kl_loss = y_true[mask] * np.log(y_true[mask] / y_pred[mask]) return np.sum(kl_loss) + + if __name__ == "__main__": import doctest From 008d408922eb95f234df41083d944bda8c48adc1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 20 Oct 2025 15:42:06 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/loss_functions.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/machine_learning/loss_functions.py b/machine_learning/loss_functions.py index 8781e08bf502..59c92ae627a0 100644 --- a/machine_learning/loss_functions.py +++ b/machine_learning/loss_functions.py @@ -628,6 +628,7 @@ def smooth_l1_loss(y_true: np.ndarray, y_pred: np.ndarray, beta: float = 1.0) -> loss = np.where(diff < beta, 0.5 * diff**2 / beta, diff - 0.5 * beta) return np.mean(loss) + def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float: """ Calculate the Kullback-Leibler divergence (KL divergence) loss between true labels @@ -673,8 +674,6 @@ def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float return np.sum(kl_loss) - - if __name__ == "__main__": import doctest