Skip to content

Commit

Permalink
metrics: add CRPSS (#605)
Browse files Browse the repository at this point in the history
* metrics: add CRPSS

Add skill score version of the CRPS metric, which shows the relative
improvement (in terms of CRPS) between a forecast versus a reference
forecast.

* metrics: add test for negative CRPSS value

Add test for situation where the CRPS of the forecast is worse than the
reference forecast and therefore the CRPSS is negative.

* docs/api: list CRPSS metric (for prob fx)

* docs/whatsnew/1.0.04rc4: add CRPS skill score mention

* tests: flake8 corrections

* tests/calculator: handle CRPSS requiring ref dist fx

Revisions to handle CRPSS requiring reference forecasts that are full
distribution forecasts (not just single probability forecasts). Still a
work-in-progress from these changes are moving things forward.

* CRPS: add checks to ensure forecasts have proper dimensions

Add checks to ensure that the forecasts given to the CRPS function have
the correct dimensions. If there is an issue, raise a ValueError.

* CRPS: more revisions to improve robustness

Revisions to further improve robustness of inputs to CRPS and CRPSS.
Particularly for the reference forecasts.

* calculator tests: revise for CRPSS changes

* CRPSS: modify to avoid divide by zero

Modify the CRPSS calculation to avoid divide by zero (same pattern as in
`deterministic.forecast_skill`).
  • Loading branch information
dplarson committed Oct 29, 2020
1 parent c943b9b commit e12a37b
Show file tree
Hide file tree
Showing 5 changed files with 159 additions and 19 deletions.
1 change: 1 addition & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,7 @@ Functions to compute forecast probabilistic performance metrics:
metrics.probabilistic.uncertainty
metrics.probabilistic.sharpness
metrics.probabilistic.continuous_ranked_probability_score
metrics.probabilistic.crps_skill_score

Event
-----
Expand Down
2 changes: 2 additions & 0 deletions docs/source/whatsnew/1.0.0rc4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ API Changes
from the BSRN network (:issue:`541`) (:pull:`604`)
* Removed ``preprocessing.VALIDATION_RESULT_TOTAL_STRING`` and
``preprocessing.UNDEFINED_DATA_STRING``.
* Added CRPS skill score metric :py:func:`metrics.probabilistic.crps_skill_score`
(:issue:`494`) (:pull:`605`)


Enhancements
Expand Down
73 changes: 70 additions & 3 deletions solarforecastarbiter/metrics/probabilistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,14 @@ def continuous_ranked_probability_score(obs, fx, fx_prob):
crps : float
The Continuous Ranked Probability Score [unitless].
Raises
------
ValueError
If the forecasts have incorrect dimensions; either a) the forecasts are
for a single sample (n=1) with d CDF intervals but are given as a 1D
array with d values or b) the forecasts are given as 2D arrays (n,d)
but do not contain at least 2 CDF intervals (i.e. d < 2).
Examples
--------
Expand All @@ -514,7 +522,14 @@ def continuous_ranked_probability_score(obs, fx, fx_prob):
"""

# match observations to fx shape: (n,) => (n, d)
obs = np.tile(obs, (fx.shape[1], 1)).T
if np.ndim(fx) < 2:
raise ValueError("forecasts must be 2D arrays (expected (n,d), got"
f"{np.shape(fx)})")
elif np.shape(fx)[1] < 2:
raise ValueError("forecasts must have d >= 2 CDF intervals "
f"(expected >= 2, got {np.shape(fx)[1]})")
else:
obs = np.tile(obs, (fx.shape[1], 1)).T

# event: 0=did not happen, 1=did happen
o = np.where(obs <= fx, 1.0, 0.0)
Expand All @@ -529,6 +544,57 @@ def continuous_ranked_probability_score(obs, fx, fx_prob):
return crps


def crps_skill_score(obs, fx, fx_prob, ref, ref_prob):
"""CRPS skill score.
CRPSS = 1 - CRPS_fx / CRPS_ref
where CRPS_fx is the CPRS of the evaluated forecast and CRPS_ref is the
CRPS of a reference forecast.
Parameters
----------
obs : (n,) array_like
Observations (physical unit).
fx : (n, d) array_like
Forecasts (physical units) of the right-hand-side of a CDF with d
intervals (d >= 2), e.g., fx = [10 MW, 20 MW, 30 MW] is interpreted as
<= 10 MW, <= 20 MW, <= 30 MW.
fx_prob : (n,) array_like
Probability [%] associated with the forecasts.
ref : (n, d) array_like
Reference forecasts (physical units) of the right-hand-side of a CDF
with d intervals (d >= 2), e.g., fx = [10 MW, 20 MW, 30 MW] is
interpreted as <= 10 MW, <= 20 MW, <= 30 MW.
ref_prob : (n,) array_like
Probability [%] associated with the reference forecast.
Returns
-------
skill : float
The CRPS skill score [unitless].
See Also
--------
:py:func:`solarforecastarbiter.metrics.probabilistic.continuous_ranked_probability_score`
"""

if np.isscalar(ref):
return np.nan
else:
crps_fx = continuous_ranked_probability_score(obs, fx, fx_prob)
crps_ref = continuous_ranked_probability_score(obs, ref, ref_prob)

if crps_fx == crps_ref:
return 0.0
elif crps_ref == 0.0:
# avoid divide by zero
return np.NINF
else:
return 1 - crps_fx / crps_ref


# Add new metrics to this map to map shorthand to function
_MAP = {
'bs': (brier_score, 'BS'),
Expand All @@ -540,18 +606,19 @@ def continuous_ranked_probability_score(obs, fx, fx_prob):
'qss': (quantile_skill_score, 'QSS'),
# 'sh': (sharpness, 'SH'), # TODO
'crps': (continuous_ranked_probability_score, 'CRPS'),
'crpss': (crps_skill_score, 'CRPSS'),
}

__all__ = [m[0].__name__ for m in _MAP.values()]

# Functions that require a reference forecast
_REQ_REF_FX = ['bss', 'qss']
_REQ_REF_FX = ['bss', 'qss', 'crpss']

# Functions that require normalized factor
_REQ_NORM = []

# Functions that require full distribution forecasts (as 2dim)
_REQ_DIST = ['crps']
_REQ_DIST = ['crps', 'crpss']

# TODO: Functions that require two forecasts (e.g., sharpness)
# _REQ_FX_FX = ['sh']
28 changes: 15 additions & 13 deletions solarforecastarbiter/metrics/tests/test_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,10 +303,9 @@ def test_calculate_metrics_with_probablistic(single_observation,
PROB_NO_REF)
assert len(result) == 1
assert isinstance(result[0], datamodel.MetricResult)
verify_metric_result(result[0],
proc_prfx_obs,
LIST_OF_CATEGORIES,
probabilistic._REQ_DIST)
verify_metric_result(
result[0], proc_prfx_obs, LIST_OF_CATEGORIES,
set(probabilistic._REQ_DIST) - set(probabilistic._REQ_REF_FX))

# With reference
ref_fx_values = fx_values + .5
Expand Down Expand Up @@ -336,11 +335,11 @@ def test_calculate_metrics_with_probablistic(single_observation,

expected = {
0: ('total', 'crps', '0', 17.247),
1: ('year', 'crps', '2019', 17.247),
2: ('season', 'crps', 'JJA', 17.247),
3: ('month', 'crps', 'Aug', 17.247),
4: ('hour', 'crps', '0', 19.801000000000002),
5: ('hour', 'crps', '1', 19.405)
2: ('year', 'crps', '2019', 17.247),
4: ('season', 'crps', 'JJA', 17.247),
6: ('month', 'crps', 'Aug', 17.247),
8: ('hour', 'crps', '0', 19.801000000000002),
9: ('hour', 'crps', '1', 19.405)
}
attr_order = ('category', 'metric', 'index', 'value')
for k, expected_attrs in expected.items():
Expand Down Expand Up @@ -384,9 +383,10 @@ def test_calculate_metrics_with_probablistic(single_observation,

# Distribution and constant values
all_results = calculator.calculate_metrics(
[proc_prfx_obs] + cv_proc_ref_prfx_obs, LIST_OF_CATEGORIES,
[proc_ref_prfx_obs] + cv_proc_ref_prfx_obs, LIST_OF_CATEGORIES,
# reverse to ensure order output is independent
PROBABILISTIC_METRICS[::-1])

assert all_results[0] == dist_results[0]
assert len(all_results[1:]) == len(cv_results)
for a, b in zip(all_results[1:], cv_results):
Expand Down Expand Up @@ -589,7 +589,8 @@ def test_calculate_probabilistic_metrics_missing_ref(
pd.DataFrame(np.random.randn(10, 3), index=create_dt_index(10)),
pd.Series(np.random.randn(10), index=create_dt_index(10)))
result = calculator.calculate_probabilistic_metrics(
proc_fxobs, LIST_OF_CATEGORIES, probabilistic._REQ_REF_FX
proc_fxobs, LIST_OF_CATEGORIES,
set(probabilistic._REQ_REF_FX) - set(probabilistic._REQ_DIST)
)
assert isinstance(result, datamodel.MetricResult)
assert result.values == ()
Expand All @@ -606,7 +607,8 @@ def test_calculate_probabilistic_metrics_no_reference_data(
pd.Series(np.random.randn(10), index=create_dt_index(10)),
ref_values=None)
result = calculator.calculate_probabilistic_metrics(
proc_fxobs, LIST_OF_CATEGORIES, probabilistic._REQ_REF_FX
proc_fxobs, LIST_OF_CATEGORIES,
set(probabilistic._REQ_REF_FX) - set(probabilistic._REQ_DIST)
)
assert isinstance(result, datamodel.MetricResult)
assert result.values == ()
Expand Down Expand Up @@ -955,7 +957,7 @@ def test_apply_deterministic_bad_metric_func():
('unc', [1, 1, 1], [100, 100, 100], [1, 1, 1], None, None, 0.),
# CRPS single forecast
('crps', [[1]], [[100]], [[0]], None, None, 0.),
('crps', [[1, 1]], [[100, 100]], [[0, 0]], None, None, 0.),
# CRPS mulitple forecasts
('crps', [[1, 1, 1], [2, 2, 2], [3, 3, 3]],
[[100, 100, 100], [100, 100, 100], [100, 100, 100]],
Expand Down
74 changes: 71 additions & 3 deletions solarforecastarbiter/metrics/tests/test_probabilistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,10 +218,10 @@ def test_sharpness(fx_lower, fx_upper, value):


@pytest.mark.parametrize("fx,fx_prob,obs,value", [
# 1 sample, 1 CDF interval
# 1 sample, 2 CDF intervals
(
np.array([[10]]),
np.array([[100]]),
np.array([[10, 20]]),
np.array([[100, 100]]),
np.array([8]),
0.0,
),
Expand Down Expand Up @@ -283,7 +283,75 @@ def test_sharpness(fx_lower, fx_upper, value):
np.array([8, 8]),
(1.0 ** 2) * 10 + (0.5 ** 2) * 10,
),
# fail: only 1 CDF interval
pytest.param(
np.array([[10], [20]]),
np.array([[100], [100]]),
np.array([8, 8]),
0.0,
marks=pytest.mark.xfail(strict=True, type=ValueError),
),
# fail: forecast as 1D array (instead of 2D)
pytest.param(
np.array([10, 20]),
np.array([100, 100]),
np.array([8, 8]),
0.0,
marks=pytest.mark.xfail(strict=True, type=ValueError),
),
])
def test_crps(fx, fx_prob, obs, value):
crps = prob.continuous_ranked_probability_score(obs, fx, fx_prob)
assert crps == value


@pytest.mark.parametrize("fx,fx_prob,ref,ref_prob,obs,value", [
# 2 samples, 3 CDF intervals
(
np.array([[10, 20, 30], [10, 20, 30]]), # fx
np.array([[0, 100, 100], [0, 100, 100]]), # fx_prob
np.array([[10, 20, 30], [10, 20, 30]]), # ref
np.array([[0, 0, 100], [0, 0, 100]]), # ref_prob
np.array([8, 8]), # obs
1 - 10 / 20,
),
(
np.array([[10, 20, 30], [10, 20, 30]]), # fx
np.array([[0, 0, 100], [0, 0, 100]]), # fx_prob
np.array([[10, 20, 30], [10, 20, 30]]), # ref
np.array([[0, 100, 100], [0, 100, 100]]), # ref_prob
np.array([8, 8]), # obs
1 - 20 / 10,
),
(
np.array([[10, 20, 30], [10, 20, 30]]), # fx
np.array([[0, 100, 100], [0, 100, 100]]), # fx_prob
np.array([[10, 20, 30], [10, 20, 30]]), # ref
np.array([[0, 100, 100], [0, 100, 100]]), # ref_prob
np.array([8, 8]), # obs
0.0
),
(
np.array([[10, 20, 30], [10, 20, 30]]), # fx
np.array([[0, 100, 100], [0, 100, 100]]), # fx_prob
np.array([[10, 20, 30], [10, 20, 30]]), # ref
np.array([[0, 100, 100], [0, 100, 100]]), # ref_prob
np.array([8, 8]), # obs
0.0
),
# 2 samples, 2 CDF intervals
(
np.array([[10, 20], [10, 20]]), # fx
np.array([[0, 100], [0, 100]]), # fx_prob
np.array([[10, 20], [10, 20]]), # ref
np.array([[100, 100], [100, 100]]), # ref_prob
np.array([8, 8]), # obs
np.NINF,
),
])
def test_crps_skill_score(obs, fx, fx_prob, ref, ref_prob, value):
crpss = prob.crps_skill_score(obs, fx, fx_prob, ref, ref_prob)
assert crpss == value

0 comments on commit e12a37b

Please sign in to comment.