metrics/probabilities: add quantile score (#530)

* metrics/probabilities: add quantile score Add Quantile Score and Quantile Skill Score for evaluating probabilistic forecasts. * metrics/probabilistic: update _MAP variables * metrics/probabilistic: add quantile_score test * metrics/tests: add more quantile score test cases * metrics/tests: add quantile skill score test * quantile_score: revise to use indicator func form Revise the quantile_score() function use the indicator function form, which, while not as intuitive as explicitly listing out the if-else calculation, is more robust to handling either a single `obs, fx, fx_prob` input or an array of `obs, fx, fx_prob` inputs. But to counteract the non-intuitive nature of the indicator function form, this commit also adds notes in the function docstring to explain how the function behaves for when `obs > fx` and `obs < fx`. * docs/api: add quantile score and skill score * tests/calculator: fix test that relied on ordering of results Fix a test that relied on checking the ordering of the probabilistic metric results since the addition of the quantile score and skill score bumped the indexing. * probabilistic: remove extra `-` from docstring * docstring: correct unit in examples * docs/whatsnew: add quantile score API change * improve docstring and add references * use :math:`x` in quantile skill score docstring * docstring: add line breaks in multi-line equation * docstring: try align and eqnarray for multi-line eqn * get equations rendering properly (maybe...) * quantile skill score: improve math formatting * flake8: ignore E501 and W605 in QS docstring Ignore E501 (line too long) in QS docstring since the specific line is an equation and breaking it up over multiple lines doesn't seem worth it. Ignore W605 (invalid escape sequence '\{' and '\}') in QS docstring since those are necessary strings for the LaTeX equations (i.e. without the escape character, the curly brackets will not be rendered in the readthedocs output). * Add additional tests for quantile skill score Add additional tests, including for edge cases such as negative infinity. * add Wilks 3rd edition reference * cite Bouallegue et al. (2015) for quantile skill score * quantile_skill_score: explicitly handle divide by zero warnings
SolarArbiter · Aug 12, 2020 · e8cfb4e · e8cfb4e
1 parent 13046c8
commit e8cfb4e
Show file tree

Hide file tree

Showing 5 changed files with 177 additions and 11 deletions.
diff --git a/docs/source/api.rst b/docs/source/api.rst
@@ -569,6 +569,8 @@ Functions to compute forecast probabilistic performance metrics:
 
     metrics.probabilistic.brier_score
     metrics.probabilistic.brier_skill_score
+    metrics.probabilistic.quantile_score
+    metrics.probabilistic.quantile_skill_score
     metrics.probabilistic.brier_decomposition
     metrics.probabilistic.reliability
     metrics.probabilistic.resolution

diff --git a/docs/source/whatsnew/1.0.0rc3.rst b/docs/source/whatsnew/1.0.0rc3.rst
@@ -8,7 +8,9 @@ This is the third 1.0 release candidate.
 
 API Changes
 ~~~~~~~~~~~
-
+* Added quantile score and skill score metrics
+  :py:func:`solarforecastarbiter.metrics.probabilistic.quantile_score`
+  :py:func:`solarforecastarbiter.metrics.probabilistic.quantile_skill_score` (:issue:`520`) (:pull:`530`)
 
 
 Enhancements

diff --git a/solarforecastarbiter/metrics/probabilistic.py b/solarforecastarbiter/metrics/probabilistic.py
@@ -88,6 +88,140 @@ def brier_skill_score(obs, fx, fx_prob, ref, ref_prob):
     return skill
 
 
+def quantile_score(obs, fx, fx_prob):
+    """Quantile Score (QS).
+
+    .. math::
+
+        \\text{QS} = \\frac{1}{n} \\sum_{i=1}^n (fx_i - obs_i) * (p - 1\{obs_i > fx_i\})
+
+    where :math:`n` is the number of forecasts, :math:`obs_i` is an
+    observation, :math:`fx_i` is a forecast, :math:`1\{obs_i > fx_i\}` is an
+    indicator function (1 if :math:`obs_i > fx_i`, 0 otherwise) and :math:`p`
+    is the probability that :math:`obs_i <= fx_i`. [1]_ [2]_
+
+    If :math:`obs > fx`, then we have:
+
+    .. math::
+
+        (fx - obs) < 0 \\\\
+        (p - 1\{obs > fx\}) = (p - 1) <= 0 \\\\
+        (fx - obs) * (p - 1) >= 0
+
+    If instead :math:`obs < fx`, then we have:
+
+    .. math::
+
+        (fx - obs) > 0 \\\\
+        (p - 1\{obs > fx\}) = (p - 0) >= 0 \\\\
+        (fx - obs) * p >= 0
+
+    Therefore, the quantile score is non-negative regardless of the obs and fx.
+
+    Parameters
+    ----------
+    obs : (n,) array_like
+        Observations (physical unit).
+    fx : (n,) array_like
+        Forecasts (physical units) of the right-hand-side of a CDF interval,
+        e.g., fx = 10 MW is interpreted as forecasting <= 10 MW.
+    fx_prob : (n,) array_like
+        Probability [%] associated with the forecasts.
+
+    Returns
+    -------
+    qs : float
+        The Quantile Score, with the same units as the observations.
+
+    Notes
+    -----
+    Quantile score is meant to be computed for a single probability of
+    :math:`n` samples.
+
+    Examples
+    --------
+    >>> obs = 100     # observation [MW]
+    >>> fx = 80       # forecast [MW]
+    >>> fx_prob = 60  # probability [%]
+    >>> quantile_score(obs, fx, fx_prob)   # score [MW]
+    8.0
+
+    References
+    ----------
+    .. [1] Koenker and Bassett, Jr. (1978) "Regression Quantiles", Econometrica
+       46 (1), pp. 33-50. doi: 10.2307/1913643
+    .. [2] Wilks (2020) "Forecast Verification". In "Statistical Methods in the
+       Atmospheric Sciences" (3rd edition). Academic Press. ISBN: 9780123850225
+
+    """  # NOQA: E501,W605
+
+    # Prob(obs <= fx) = p
+    p = fx_prob / 100.0
+    qs = np.mean((fx - obs) * (p - np.where(obs > fx, 1.0, 0.0)))
+    return qs
+
+
+def quantile_skill_score(obs, fx, fx_prob, ref, ref_prob):
+    """Quantile Skill Score (QSS).
+
+    .. math::
+
+        \\text{QSS} = 1 - \\text{QS}_{\\text{fx}} / \\text{QS}_{\\text{ref}}
+
+    where :math:`\\text{QS}_{\\text{fx}}` is the Quantile Score of the
+    evaluated forecast and :math:`\\text{QS}_{\\text{ref}}` is the Quantile
+    Score of a reference forecast. [1]_
+
+    Parameters
+    ----------
+    obs : (n,) array_like
+        Observations (physical unit).
+    fx : (n,) array_like
+        Forecasts (physical units) of the right-hand-side of a CDF interval,
+        e.g., fx = 10 MW is interpreted as forecasting <= 10 MW.
+    fx_prob : (n,) array_like
+        Probability [%] associated with the forecasts.
+    ref : (n,) array_like
+        Reference forecast (physical units) of the right-hand-side of a CDF
+        interval.
+    ref_prob : (n,) array_like
+        Probability [%] associated with the reference forecast.
+
+    Returns
+    -------
+    skill : float
+        The Quantile Skill Score [unitless].
+
+    References
+    ----------
+    .. [1] Bouallegue, Pinson and Friederichs (2015) "Quantile forecast
+       discrimination ability and value", Quarterly Journal of the Royal
+       Meteorological Society 141, pp. 3415-3424. doi: 10.1002/qj.2624
+
+    Notes
+    -----
+    This function returns 0 if QS_fx and QS_ref are both 0.
+
+    See Also
+    --------
+    :py:func:`solarforecastarbiter.metrics.probabilistic.quantile_score`
+
+    """
+
+    qs_fx = quantile_score(obs, fx, fx_prob)
+    qs_ref = quantile_score(obs, ref, ref_prob)
+
+    # avoid 0 / 0 --> nan
+    if qs_fx == qs_ref:
+        return 0.0
+    elif qs_ref == 0.0:
+        # avoid divide by 0
+        # typically caused by deadbands and short time periods
+        return np.NINF
+    else:
+        return 1.0 - qs_fx / qs_ref
+
+
 def _unique_forecasts(f):
     """Convert forecast probabilities to a set of unique values.
 
@@ -224,8 +358,8 @@ def reliability(obs, fx, fx_prob):
         The reliability of the forecast [unitless], where a perfectly reliable
         forecast has value of 0.
 
-    See Also:
-    ---------
+    See Also
+    --------
     brier_decomposition : 3-component decomposition of the Brier Score
 
     """
@@ -261,8 +395,8 @@ def resolution(obs, fx, fx_prob):
         The resolution of the forecast [unitless], where higher values are
         better.
 
-    See Also:
-    ---------
+    See Also
+    --------
     brier_decomposition : 3-component decomposition of the Brier Score
 
     """
@@ -294,8 +428,8 @@ def uncertainty(obs, fx, fx_prob):
         The uncertainty [unitless], where lower values indicate the event being
         forecasted occurs rarely.
 
-    See Also:
-    ---------
+    See Also
+    --------
     brier_decomposition : 3-component decomposition of the Brier Score
 
     """
@@ -402,14 +536,16 @@ def continuous_ranked_probability_score(obs, fx, fx_prob):
     'rel': (reliability, 'REL'),
     'res': (resolution, 'RES'),
     'unc': (uncertainty, 'UNC'),
+    'qs': (quantile_score, 'QS'),
+    'qss': (quantile_skill_score, 'QSS'),
     # 'sh': (sharpness, 'SH'),  # TODO
     'crps': (continuous_ranked_probability_score, 'CRPS'),
 }
 
 __all__ = [m[0].__name__ for m in _MAP.values()]
 
 # Functions that require a reference forecast
-_REQ_REF_FX = ['bss']
+_REQ_REF_FX = ['bss', 'qss']
 
 # Functions that require normalized factor
 _REQ_NORM = []

diff --git a/solarforecastarbiter/metrics/tests/test_calculator.py b/solarforecastarbiter/metrics/tests/test_calculator.py
@@ -372,9 +372,9 @@ def test_calculate_metrics_with_probablistic(single_observation,
     expected = {
         0: ('total', 'bs', '0', 0.8308500000000001),
         1: ('total', 'bss', '0', -0.010366947374821578),
-        5: ('year', 'bs', '2019', 0.8308500000000001),
-        9: ('year', 'unc', '2019', 0.08999999999999998),
-        10: ('month', 'bs', 'Aug', 0.8308500000000001)
+        7: ('year', 'bs', '2019', 0.8308500000000001),
+        11: ('year', 'unc', '2019', 0.08999999999999998),
+        14: ('month', 'bs', 'Aug', 0.8308500000000001)
     }
     attr_order = ('category', 'metric', 'index', 'value')
     for k, expected_attrs in expected.items():

diff --git a/solarforecastarbiter/metrics/tests/test_probabilistic.py b/solarforecastarbiter/metrics/tests/test_probabilistic.py
@@ -42,6 +42,32 @@ def test_brier_skill_score(fx, fx_prob, ref, ref_prob, obs, value):
     assert prob.brier_skill_score(obs, fx, fx_prob, ref, ref_prob) == value
 
 
+@pytest.mark.parametrize("obs,fx,fx_prob,value", [
+    (4, 5, 50, 0.5),
+    (5, 4, 50, 0.5),
+    (2, 10, 80, 6.4),
+    (2, 3, 80, 0.8),
+    (2, 100, 100, 98),
+    (100, 80, 50, 10.0),
+    (100, 80, 60, 8.0),
+    (np.array([4, 5]), np.array([5, 4]), np.array([50, 50]), 0.5),
+])
+def test_quantile_score(obs, fx, fx_prob, value):
+    assert prob.quantile_score(obs, fx, fx_prob) == value
+
+
+@pytest.mark.parametrize("obs,fx,fx_prob,ref,ref_prob,value", [
+    (4, 5, 50, 3, 50, 1 - 0.5 / 0.5),
+    (100, 80, 60, 80, 50, 1 - 8 / 10),
+    (2, 1, 80, 10, 80, 1 - 0.2 / 6.4),
+    (2, 3, 80, 10, 80, 1 - 0.8 / 6.4),
+    (2, 3, 80, 100, 100, 1 - 0.8 / 98),
+    (4, 5, 50, 4, 100, np.NINF),
+])
+def test_quantile_skill_score(obs, fx, fx_prob, ref, ref_prob, value):
+    assert prob.quantile_skill_score(obs, fx, fx_prob, ref, ref_prob) == value
+
+
 @pytest.mark.parametrize("f,value", [
     ([0.1234], [0.1]),
     ([0.1689], [0.2]),