NOAA-OWP · jarq6c · Feb 8, 2022 · Feb 7, 2022 · Feb 7, 2022 · Feb 7, 2022
diff --git a/python/metrics/src/hydrotools/metrics/_validation.py b/python/metrics/src/hydrotools/metrics/_validation.py
@@ -0,0 +1,102 @@
+"""
+=============================
+Method Validation for Metrics
+=============================
+Various methods and objects for validating metric inputs.
+
+Functions
+---------
+ - raise_for_non_vector
+ - raise_for_inconsistent_shapes
+
+Classes
+-------
+ - InconsistentShapesError
+ - NonVectorError
+
+"""
+
+import numpy as np
+import numpy.typing as npt
+from typing import List, Tuple
+
+class InconsistentShapesError(Exception):
+    def __init__(self,
+        *args: object,
+        array_shape_1: Tuple[int],
+        array_shape_2: Tuple[int]
+        ) -> None:
+        self._array_shape_1 = array_shape_1
+        self._array_shape_2 = array_shape_2
+        super().__init__(*args)
+
+    def __str__(self) -> str:
+        a1s = self._array_shape_1
+        a2s = self._array_shape_2
+        message = f"Arrays are not the same shape. The array shapes are {a1s} and {a2s}."
+        return message
+
+class NonVectorError(Exception):
+    def __init__(self,
+        *args: object,
+        arr: npt.ArrayLike
+        ) -> None:
+        self._arr = arr
+        super().__init__(*args)
+
+    def __str__(self) -> str:
+        message = f"This array is not 1-dimensional\n {self._arr}"
+        return message
+
+def raise_for_non_vector(
+    *arrays: List[npt.ArrayLike]
+    ) -> None:
+    """
+    Validate that all arrays are 1-dimensional.
+
+    Parameters
+    ----------
+    arrays: arbitrary number of array-like values, required
+        Arrays to check.
+
+    Raises
+    ------
+    NonVectorError:
+        Raises if one of the arrays is not 1-dimensional.
+    """
+    # Check each array
+    for x in arrays:
+        if len(np.array(x).shape) != 1:
+            raise NonVectorError(arr=x)
+
+def raise_for_inconsistent_shapes(
+    *arrays: List[npt.ArrayLike]
+    ) -> None:
+    """
+    Validate that all arrays are the same shape.
+
+    Parameters
+    ----------
+    arrays: arbitrary number of array-like values, required
+        Arrays to check.
+
+    Raises
+    ------
+    InconsistentShapesError:
+        Raises if all of the arrays are not the same shape.
+    """
+    # Convert to numpy arrays
+    arrays = [np.array(x) for x in arrays]
+
+    # Extract first array
+    x = arrays[0]
+
+    # Check shape of each
+    for y in arrays:
+        # Test each array
+        test = all(i == j for i, j in zip(x.shape, y.shape))
+        if not test:
+            raise InconsistentShapesError(
+                array_shape_1=x.shape,
+                array_shape_2=y.shape
+                )
diff --git a/python/metrics/src/hydrotools/metrics/_version.py b/python/metrics/src/hydrotools/metrics/_version.py
@@ -1 +1 @@
-__version__ = "1.1.3"
+__version__ = "1.2.0"
diff --git a/python/metrics/src/hydrotools/metrics/metrics.py b/python/metrics/src/hydrotools/metrics/metrics.py
@@ -22,13 +22,15 @@
  - equitable_threat_score
  - mean_squared_error
  - nash_sutcliffe_efficiency
+ - kling_gupta_efficiency
 
 """
 
 import numpy as np
 import numpy.typing as npt
 import pandas as pd
 from typing import Union, Mapping, MutableMapping
+from . import _validation as validate
 
 def mean_squared_error(
     y_true: npt.ArrayLike,
@@ -39,9 +41,9 @@ def mean_squared_error(
 
     Parameters
     ----------
-    y_true: array-like of shape (n_samples,) or (n_samples, n_outputs)
+    y_true: array-like of shape (n_samples,), required
         Ground truth (correct) target values, also called observations, measurements, or observed values.
-    y_pred: pandas.Series, required
+    y_pred: array-like of shape (n_samples,), required
         Estimated target values, also called simulations or modeled values.
     root: bool, default False
         When True, return the root mean squared error.
@@ -66,14 +68,14 @@ def nash_sutcliffe_efficiency(
     log: bool = False,
     normalized: bool = False
     ) -> float:
-    """Compute the Nash–Sutcliffe model efficiency coefficient (NSE), also called the 
+    """Compute the Nash-Sutcliffe model efficiency coefficient (NSE), also called the 
     mean squared error skill score or the R^2 (coefficient of determination) regression score.
 
     Parameters
     ----------
-    y_true: array-like of shape (n_samples,) or (n_samples, n_outputs)
+    y_true: array-like of shape (n_samples,), required
         Ground truth (correct) target values, also called observations, measurements, or observed values.
-    y_pred: pandas.Series, required
+    y_pred: array-like of shape (n_samples,), required
         Estimated target values, also called simulations or modeled values.
     log: bool, default False
         Apply numpy.log (natural logarithm) to y_true and y_pred 
@@ -98,6 +100,12 @@ def nash_sutcliffe_efficiency(
         Conference Abstracts (p. 237).
 
     """
+    # Raise if not 1-D arrays
+    validate.raise_for_non_vector(y_true, y_pred)
+
+    # Raise if not same shape
+    validate.raise_for_inconsistent_shapes(y_true, y_pred)
+
     # Optionally transform components
     if log:
         y_true = np.log(y_true)
@@ -112,6 +120,69 @@ def nash_sutcliffe_efficiency(
         return 1.0 / (1.0 + numerator/denominator)
     return 1.0 - numerator/denominator
 
+def kling_gupta_efficiency(
+    y_true: npt.ArrayLike,
+    y_pred: npt.ArrayLike,
+    r_scale: float = 1.0,
+    a_scale: float = 1.0,
+    b_scale: float = 1.0
+    ) -> float:
+    """Compute the Kling-Gupta model efficiency coefficient (KGE).
+
+    Parameters
+    ----------
+    y_true: array-like of shape (n_samples,), required
+        Ground truth (correct) target values, also called observations, measurements, or observed values.
+    y_pred: array-like of shape (n_samples,), required
+        Estimated target values, also called simulations or modeled values.
+    r_scale: float, optional, default 1.0
+        Linear correlation (r) scaling factor. Used to re-scale the Euclidean space by 
+        emphasizing different KGE components.
+    a_scale: float, optional, default 1.0
+        Relative variability (alpha) scaling factor. Used to re-scale the Euclidean space by 
+        emphasizing different KGE components.
+    b_scale: float, optional, default 1.0
+        Relative mean (beta) scaling factor. Used to re-scale the Euclidean space by 
+        emphasizing different KGE components.
+
+    Returns
+    -------
+    score: float
+        Kling-Gupta efficiency.
+
+    References
+    ----------
+    Gupta, H. V., Kling, H., Yilmaz, K. K., & Martinez, G. F. (2009). Decomposition of 
+        the mean squared error and NSE performance criteria: Implications for improving 
+        hydrological modelling. Journal of hydrology, 377(1-2), 80-91. 
+        https://doi.org/10.1016/j.jhydrol.2009.08.003
+
+    """
+    # Raise if not 1-D arrays
+    validate.raise_for_non_vector(y_true, y_pred)
+
+    # Raise if not same shape
+    validate.raise_for_inconsistent_shapes(y_true, y_pred)
+
+    # Pearson correlation coefficient
+    r = np.corrcoef(y_pred, y_true)[0,1]
+
+    # Relative variability
+    a = np.std(y_pred) / np.std(y_true)
+
+    # Relative mean
+    b = np.mean(y_pred) / np.mean(y_true)
+
+    # Scaled Euclidean distance
+    EDs = np.sqrt(
+        (r_scale * (r - 1.0)) ** 2.0 + 
+        (a_scale * (a - 1.0)) ** 2.0 + 
+        (b_scale * (b - 1.0)) ** 2.0
+        )
+
+    # Return KGE
+    return 1.0 - EDs
+
 def compute_contingency_table(
     observed: pd.Series,
     simulated: pd.Series,

diff --git a/python/metrics/tests/test_metrics.py b/python/metrics/tests/test_metrics.py
@@ -316,4 +316,30 @@ def test_convert_mapping_values():
     converted = metrics.convert_mapping_values(char_series)
 
     assert converted.dtype == np.float64
-
+
+def test_kling_gupta_efficiency():
+    # Default (inverse case)
+    KGE = metrics.kling_gupta_efficiency(y_true, y_pred)
+    expected = -1.0
+    assert np.isclose(KGE, expected)
+
+    # Half-scale
+    KGE = metrics.kling_gupta_efficiency(y_true, np.array(y_true) * 0.5)
+    expected = (1.0 - np.sqrt(0.5))
+    assert np.isclose(KGE, expected)
+
+    # Double-scale
+    KGE = metrics.kling_gupta_efficiency(y_true, np.array(y_true) * 2.0)
+    expected = (1.0 - np.sqrt(2.0))
+    assert np.isclose(KGE, expected)
+
+    # Identity
+    KGE = metrics.kling_gupta_efficiency(y_true, np.array(y_true) * 1.0)
+    expected = 1.0
+    assert np.isclose(KGE, expected)
+
+    # Scaling parameters
+    KGE = metrics.kling_gupta_efficiency(y_true, np.array(y_true) * 0.25,
+        r_scale=0.5, a_scale=0.25, b_scale=0.25)
+    expected = (1.0 - np.sqrt(9.0/128.0))
+    assert np.isclose(KGE, expected)
diff --git a/python/metrics/tests/test_validation.py b/python/metrics/tests/test_validation.py
@@ -0,0 +1,22 @@
+import pytest
+import numpy as np
+from hydrotools.metrics import _validation
+
+def test_raise_for_non_vector():
+    x = np.array([[1, 2, 3, 4], [1, 1, 1, 4]])
+    y = [1, 2, 3, 4]
+
+    _validation.raise_for_non_vector(y)
+
+    with pytest.raises(_validation.NonVectorError):
+        _validation.raise_for_non_vector(x)
+
+def test_raise_for_inconsistent_shapes():
+    x = np.array([[1, 2, 3, 4], [1, 1, 1, 4]])
+    y = [1, 2, 3, 4]
+    z = [5, 6, 7, 8]
+
+    _validation.raise_for_inconsistent_shapes(y, z)
+
+    with pytest.raises(_validation.InconsistentShapesError):
+        _validation.raise_for_inconsistent_shapes(x, y)