From ea60a16d0c47bd7172411ebfecfe03fe4e379954 Mon Sep 17 00:00:00 2001 From: Will Holmgren Date: Tue, 1 Sep 2020 09:44:49 -0700 Subject: [PATCH] relative_euclidean_distance. more pearson tests (#549) * relative_euclidean_distance. more pearson tests * docs * forgot to add to _MAP * test from review --- docs/source/api.rst | 1 + docs/source/whatsnew/1.0.0rc3.rst | 2 + solarforecastarbiter/metrics/deterministic.py | 80 +++++++++++++++++++ .../metrics/tests/test_deterministic.py | 44 ++++++++-- 4 files changed, 121 insertions(+), 6 deletions(-) diff --git a/docs/source/api.rst b/docs/source/api.rst index f954b4889..be06a0036 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -535,6 +535,7 @@ Functions to compute forecast deterministic performance metrics: metrics.deterministic.forecast_skill metrics.deterministic.pearson_correlation_coeff metrics.deterministic.coeff_determination + metrics.deterministic.relative_euclidean_distance metrics.deterministic.kolmogorov_smirnov_integral metrics.deterministic.over metrics.deterministic.combined_performance_index diff --git a/docs/source/whatsnew/1.0.0rc3.rst b/docs/source/whatsnew/1.0.0rc3.rst index 791e91a3b..eacfd54b5 100644 --- a/docs/source/whatsnew/1.0.0rc3.rst +++ b/docs/source/whatsnew/1.0.0rc3.rst @@ -15,6 +15,8 @@ API Changes Enhancements ~~~~~~~~~~~~ +* Add :py:func:`solarforecastarbiter.metrics.deterministic.relative_euclidean_distance` + (:issue:`542`, :pull:`549`) Bug fixes diff --git a/solarforecastarbiter/metrics/deterministic.py b/solarforecastarbiter/metrics/deterministic.py index f928f1994..e0829d48d 100644 --- a/solarforecastarbiter/metrics/deterministic.py +++ b/solarforecastarbiter/metrics/deterministic.py @@ -408,6 +408,85 @@ def centered_root_mean_square(obs, fx): )) +def _careful_ratio(obs_stat, fx_stat): + if obs_stat == fx_stat: + ratio = 1. + elif obs_stat == 0.: + ratio = np.Inf + else: + ratio = fx_stat / obs_stat + return ratio + + +def relative_euclidean_distance(obs, fx): + r"""Relative Euclidean distance (D): + + .. math:: \text{D} = \sqrt{ + \left( \frac{\overline{\text{fx}} - \overline{\text{obs}} } + { \overline{\text{obs}} } \right) ^ 2 + + \left( \frac{\sigma_{\text{fx}} - \sigma_{\text{obs}} } + { \sigma_{\text{obs}} } \right) ^ 2 + + \left( \textrm{corr} - 1 \right) ^ 2 + } + + where: + + * :math:`\overline{\text{fx}}` is the forecast mean + * :math:`\overline{\text{obs}}` is the observation mean + * :math:`\sigma_{\text{fx}}` is the forecast standard deviation + * :math:`\sigma_{\text{obs}}` is the observation standard deviation + * :math:`\textrm{corr}` is the + :py:func:`Pearson correlation coefficient ` + + Described in [1]_ + + Parameters + ---------- + obs : (n,) array-like + Observed values. + fx : (n,) array-like + Forecasted values. + + Returns + ------- + d : float + The relative Euclidean distance of the forecast. + + Examples + -------- + >>> relative_euclidean_distance(np.array([0, 1]), np.array([1, 2])) + 2.0 + # observation mean is 0, forecast mean is not 0. d --> inf + >>> relative_euclidean_distance(np.array([-1, 1]), np.array([2, 3])) + np.Inf + # both forecast and observation mean are 0. d is finite + >>> relative_euclidean_distance(np.array([-2, 2]), np.array([3, -3])) + 2.0615528128088303 + # variance of observation or forecast is 0. d --> nan + >>> relative_euclidean_distance(np.array([1, 1]), np.array([2, 3]) + np.NaN + + References + ---------- + .. [1] Wu et al. Journal of Geophysical Research : Atmospheres 117, + D12202, doi: 10.1029/2011JD016971 (2012) + """ + obs_mean = np.mean(obs) + obs_stdev = np.std(obs) + fx_mean = np.mean(fx) + fx_stdev = np.std(fx) + + # compute as a ratio so we can handle obs_mean = 0 + mean_ratio = _careful_ratio(obs_mean, fx_mean) + stdev_ratio = _careful_ratio(obs_stdev, fx_stdev) + + return np.sqrt( + (mean_ratio - 1) ** 2 + + (stdev_ratio - 1) ** 2 + + (pearson_correlation_coeff(obs, fx) - 1) ** 2 + ) + + def kolmogorov_smirnov_integral(obs, fx, normed=False): """Kolmogorov-Smirnov Test Integral (KSI). @@ -926,6 +1005,7 @@ def cost(obs, fx, cost_params, error_fnc=error): 'r': (pearson_correlation_coeff, 'r'), 'r^2': (coeff_determination, 'R^2'), 'crmse': (centered_root_mean_square, 'CRMSE'), + 'd': (relative_euclidean_distance, 'Rel. Euc. Dist.'), 'ksi': (kolmogorov_smirnov_integral, 'KSI'), 'over': (over, 'OVER'), 'cpi': (combined_performance_index, 'CPI'), diff --git a/solarforecastarbiter/metrics/tests/test_deterministic.py b/solarforecastarbiter/metrics/tests/test_deterministic.py index 2a7ebfc10..f391e1379 100644 --- a/solarforecastarbiter/metrics/tests/test_deterministic.py +++ b/solarforecastarbiter/metrics/tests/test_deterministic.py @@ -1,5 +1,6 @@ import datetime as dt from functools import partial +from contextlib import nullcontext as does_not_raise import pandas as pd @@ -7,6 +8,7 @@ import pytest import numpy as np from numpy.testing import assert_allclose +from scipy.stats import PearsonRConstantInputWarning from solarforecastarbiter import datamodel @@ -147,16 +149,23 @@ def test_r(obs, fx, value): assert r == value -@pytest.mark.parametrize("obs,fx", [ +@pytest.mark.parametrize("obs,fx,context", [ # len(obs) < 2 or len(fx) < 2 - (np.array([0]), np.array([1])), + (np.array([0]), np.array([1]), does_not_raise()), # len(obs) != len(fx) - (np.array([0, 1, 2]), np.array([0, 1, 2, 3])), - (np.array([2, 3, 4]), np.array([2, 3, 5, 6])), + (np.array([0, 1, 2]), np.array([0, 1, 2, 3]), does_not_raise()), + (np.array([2, 3, 4]), np.array([2, 3, 5, 6]), does_not_raise()), + + # obs or fx have the same values + (np.array([1, 1, 1]), np.array([1, 2, 3]), + pytest.warns(PearsonRConstantInputWarning)), + (np.array([1, 2, 3]), np.array([1, 1, 1]), + pytest.warns(PearsonRConstantInputWarning)), ]) -def test_r_nan(obs, fx): - r = deterministic.pearson_correlation_coeff(obs, fx) +def test_r_nan(obs, fx, context): + with context: + r = deterministic.pearson_correlation_coeff(obs, fx) assert np.isnan(r) @@ -179,6 +188,29 @@ def test_crmse(obs, fx, value): assert crmse == value +@pytest.mark.parametrize("obs,fx,value,context", [ + (np.array([0, 1]), np.array([0, 1]), 0.0, does_not_raise()), + (np.array([0, 1]), np.array([1, 2]), 2.0, does_not_raise()), + (np.array([1, 2]), np.array([0, 1]), 0.6666666666666667, does_not_raise()), + (np.array([-1, 1]), np.array([2, 3]), np.Inf, does_not_raise()), + (np.array([-2, 2]), np.array([3, -3]), 2.0615528128088303, + does_not_raise()), + (np.array([1, 1]), np.array([2, 3]), np.nan, + pytest.warns(PearsonRConstantInputWarning)), # corr = nan + (np.array([2, 3]), np.array([1, 1]), np.nan, + pytest.warns(PearsonRConstantInputWarning)), # corr = nan + (np.array([0, 0]), np.array([1, 2]), np.nan, + pytest.warns(PearsonRConstantInputWarning)), # corr = nan +]) +def test_relative_euclidean_distance(obs, fx, value, context): + with context: + out = deterministic.relative_euclidean_distance(obs, fx) + if np.isnan(value): + assert np.isnan(out) + else: + assert out == value + + @pytest.mark.parametrize("obs,fx,value", [ ([0, 1], [0, 1], 0.0), ([1, 2], [1, 2], 0.0),