diff --git a/docs/source/api.rst b/docs/source/api.rst index be06a0036..701cc2563 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -754,6 +754,7 @@ Time series plotting. plotting.timeseries.make_basic_timeseries plotting.timeseries.generate_forecast_figure plotting.timeseries.generate_observation_figure + plotting.timeseries.generate_probabilistic_forecast_figure Utils ----- diff --git a/docs/source/whatsnew/1.0.0rc3.rst b/docs/source/whatsnew/1.0.0rc3.rst index eacfd54b5..d0dab27b4 100644 --- a/docs/source/whatsnew/1.0.0rc3.rst +++ b/docs/source/whatsnew/1.0.0rc3.rst @@ -18,6 +18,12 @@ Enhancements * Add :py:func:`solarforecastarbiter.metrics.deterministic.relative_euclidean_distance` (:issue:`542`, :pull:`549`) +* Add :py:func:`solarforecastarbiter.plotting.timeseries.generate_probabilistic_forecast_figure` + to create timeseries plots for probabilistic forecasts. It should be noted + that this new function generates a Plotly figure, as opposed to Bokeh figures + which are generated by other functions in this module. + (:issue:`550`, :pull:`551`) + Bug fixes ~~~~~~~~~ diff --git a/solarforecastarbiter/plotting/tests/test_plotting_utils.py b/solarforecastarbiter/plotting/tests/test_plotting_utils.py index f0dc488e2..41a29ab08 100644 --- a/solarforecastarbiter/plotting/tests/test_plotting_utils.py +++ b/solarforecastarbiter/plotting/tests/test_plotting_utils.py @@ -3,6 +3,8 @@ import pytest +from matplotlib import cm +from matplotlib.colors import Normalize from solarforecastarbiter.plotting import utils @@ -73,3 +75,34 @@ def test_line_or_step(label, method): def test_line_or_step_plotly(label): out = utils.line_or_step_plotly(label) assert isinstance(out, dict) + + +color_map = cm.get_cmap('viridis') +color_scaler = cm.ScalarMappable( + Normalize(vmin=0, vmax=1), + color_map, +) + + +@pytest.mark.parametrize('percentile,expected', [ + (100, '#fde725'), + (90, '#bddf26'), + (50, '#21918c'), + (20, '#414487'), + (5, '#471365'), + (1, '#450457'), +]) +def test_distribution_fill_color(percentile, expected): + assert utils.distribution_fill_color(color_scaler, percentile) == expected + + +@pytest.mark.parametrize('cvs,expected', [ + ([5, 20, 50, 80, 95], True), + ([5.0, 20.0, 50.0, 80.0, 95.0], True), + ([5.0, 50.0, 80.0, 95.0], False), + ([5, 50, 8, 9], False), + ([5, 10, 20, 30, 50, 60, 70, 75, 80, 90], False), + ([5], False), +]) +def test_percentiles_are_symmetric(cvs, expected): + assert utils.percentiles_are_symmetric(cvs) == expected diff --git a/solarforecastarbiter/plotting/tests/test_timeseries.py b/solarforecastarbiter/plotting/tests/test_timeseries.py index 0ffd2676b..41198a40b 100644 --- a/solarforecastarbiter/plotting/tests/test_timeseries.py +++ b/solarforecastarbiter/plotting/tests/test_timeseries.py @@ -1,7 +1,9 @@ import datetime as dt +import json import bokeh +import numpy as np import pandas as pd import pytest @@ -163,3 +165,97 @@ def test_generate_observation_figure_empty(ghi_observation_metadata, rc): assert timeseries.generate_observation_figure(ghi_observation_metadata, pd.DataFrame(), return_components=rc) is None + + +@pytest.fixture +def prob_forecast_random_data(): + def f(forecast): + frequency = pd.tseries.frequencies.to_offset(forecast.interval_length) + start = pd.Timestamp('2020-01-01T00:00Z') + end = pd.Timestamp('2020-01-03T00:00Z') + idx = pd.date_range(start, end, freq=frequency) + df = pd.DataFrame(index=idx) + for cv in [c.constant_value for c in forecast.constant_values]: + df[str(cv)] = np.random.rand(idx.size) + return df + return f + + +def test_generate_probabilistic_forecast_figure_x_forecast( + prob_forecasts, prob_forecast_random_data): + values = prob_forecast_random_data(prob_forecasts) + fig = timeseries.generate_probabilistic_forecast_figure( + prob_forecasts, values) + assert fig['layout']['title']['text'] == 'DA GHI 2020-01-01 00:00 to 2020-01-03 00:00 UTC' # NOQA: E501 + assert fig['layout']['xaxis']['title']['text'] == 'Time (UTC)' + assert fig['layout']['yaxis']['title']['text'] == 'Probability (%)' + fig_data = fig['data'] + assert len(fig_data) == 1 + assert len(fig_data[0]['x']) == values.index.size + assert len(fig_data[0]['y']) == values.index.size + assert fig_data[0]['showlegend'] + + +def test_generate_probabilistic_forecast_figure_y_forecast( + prob_forecasts_y, + prob_forecast_constant_value_y_factory, + prob_forecast_random_data, + ): + new_constant_values = [prob_forecast_constant_value_y_factory(5.0)] + prob_forecast = prob_forecasts_y.replace( + constant_values=new_constant_values) + values = prob_forecast_random_data(prob_forecast) + fig = timeseries.generate_probabilistic_forecast_figure( + prob_forecasts_y, values) + assert fig['layout']['title']['text'] == 'DA GHI 2020-01-01 00:00 to 2020-01-03 00:00 UTC' # NOQA: E501 + assert fig['layout']['xaxis']['title']['text'] == 'Time (UTC)' + assert fig['layout']['yaxis']['title']['text'] == 'GHI (W/m^2)' + fig_data = fig['data'] + assert len(fig_data) == 1 + assert len(fig_data[0]['x']) == values.index.size + assert len(fig_data[0]['y']) == values.index.size + assert not fig_data[0]['showlegend'] + + +@pytest.fixture +def prob_forecast_constant_value_y_factory( + prob_forecast_constant_value_y_text, + _prob_forecast_constant_value_from_dict): + def f(new_constant_value): + fx_dict = json.loads(prob_forecast_constant_value_y_text) + fx_dict['constant_value'] = new_constant_value + return _prob_forecast_constant_value_from_dict(fx_dict) + return f + + +def test_generate_probabilistic_forecast_figure_y_forecast_symmetric( + prob_forecasts_y, + prob_forecast_constant_value_y_factory, + prob_forecast_random_data, + ): + new_constant_values = [prob_forecast_constant_value_y_factory(x) + for x in [5.0, 10.0, 50.0, 90.0, 95.0]] + prob_forecast = prob_forecasts_y.replace( + constant_values=new_constant_values) + values = prob_forecast_random_data(prob_forecast) + fig = timeseries.generate_probabilistic_forecast_figure( + prob_forecasts_y, values) + assert fig['layout']['title']['text'] == 'DA GHI 2020-01-01 00:00 to 2020-01-03 00:00 UTC' # NOQA: E501 + assert fig['layout']['xaxis']['title']['text'] == 'Time (UTC)' + assert fig['layout']['yaxis']['title']['text'] == 'GHI (W/m^2)' + fig_data = fig['data'] + assert len(fig_data) == 5 + for trace in fig_data: + assert len(trace['x']) == values.index.size + assert len(trace['y']) == values.index.size + assert fig_data[0]['fill'] is None + for trace in fig_data[1:]: + assert trace['fill'] == 'tonexty' + + +def test_generate_probabilistic_forecast_figure_empty_values( + prob_forecasts_y, prob_forecast_random_data): + values = pd.DataFrame() + fig = timeseries.generate_probabilistic_forecast_figure( + prob_forecasts_y, values) + assert fig is None diff --git a/solarforecastarbiter/plotting/timeseries.py b/solarforecastarbiter/plotting/timeseries.py index fa93b0804..efcc95e57 100644 --- a/solarforecastarbiter/plotting/timeseries.py +++ b/solarforecastarbiter/plotting/timeseries.py @@ -8,6 +8,9 @@ from bokeh.models import ColumnDataSource, Label, HoverTool from bokeh.plotting import figure from bokeh import palettes +from matplotlib import cm +from matplotlib.colors import Normalize +import plotly.graph_objects as go import pandas as pd import pytz @@ -336,3 +339,169 @@ def generate_observation_figure(observation, data, limit=pd.Timedelta('3d')): layout = _make_layout(figs) logger.info('Figure generated succesfully') return layout + + +PLOTLY_MARGINS = {'l': 50, 'r': 50, 'b': 50, 't': 100, 'pad': 4} +PLOTLY_LAYOUT_DEFAULTS = { + 'autosize': True, + 'height': 300, + 'margin': PLOTLY_MARGINS, + 'plot_bgcolor': '#FFF', + 'font': {'size': 14} +} + + +def _plot_probabilsitic_distribution_axis_y(fig, forecast, data): + """ + Plot all probabilistic forecast values for axis='y' by adding traces to + fig. + + Parameters + ---------- + fig: plotly.graph_objects.Figure + forecast: :py:class`solarforecastarbiter.datamodel.ProbabilisticForecast` + data: pd.DataFrame + """ + color_map = cm.get_cmap('viridis') + color_scaler = cm.ScalarMappable( + Normalize(vmin=0, vmax=1), + color_map, + ) + + units = forecast.units + + percentiles_are_symmetric = plot_utils.percentiles_are_symmetric( + data.columns.values.astype('float')) + + # may not work for constant values that don't convert nicely from str/float + constant_values = data.columns.astype('float').sort_values() + for i, constant_value in enumerate(constant_values): + if i == 0: + fill = None + else: + fill = 'tonexty' + + if percentiles_are_symmetric: + if constant_value <= 50 and i != 0: + fill_value = constant_values[i - 1] + else: + fill_value = constant_value + fill_value = 2 * abs(fill_value - 50) + else: + fill_value = 100 - constant_value + + fill_color = plot_utils.distribution_fill_color( + color_scaler, fill_value) + + plot_kwargs = plot_utils.line_or_step_plotly(forecast.interval_label) + + forecast_name = f'Prob(f <= x) = {str(constant_value)}%' + + go_ = go.Scatter( + x=data.index, + y=data[str(constant_value)], + name=f'{str(constant_value)} %', + hovertemplate=( + f'{forecast_name}
' + 'Value: %{y} '+f'{units}
' + 'Time: %{x}
'), + connectgaps=False, + showlegend=False, + mode='lines', + fill=fill, + fillcolor=fill_color, + line=dict( + color=fill_color, + ), + **plot_kwargs, + ) + fig.add_trace(go_) + + +def _plot_probabilsitic_distribution_axis_x(fig, forecast, data): + """ + Plot all probabilistic forecast values for axis='x' by adding traces to + fig. + + Parameters + ---------- + fig: plotly.graph_objects.Figure + forecast: :py:class`solarforecastarbiter.datamodel.ProbabilisticForecast` + data: pd.DataFrame + """ + palette = iter(PALETTE * 3) + + units = forecast.units + + for constant_value in data.columns: + line_color = next(palette) + + plot_kwargs = plot_utils.line_or_step_plotly(forecast.interval_label) + + forecast_name = f'Prob(x <= {str(constant_value)} {units})' + go_ = go.Scatter( + x=data.index, + y=data[str(constant_value)], + name=forecast_name, + hovertemplate=( + f'{forecast_name}
' + 'Value: %{y} %
' + 'Time: %{x}
'), + connectgaps=False, + showlegend=True, + mode='lines', + line=dict( + color=line_color, + ), + **plot_kwargs, + ) + fig.add_trace(go_) + + +def generate_probabilistic_forecast_figure( + forecast, data, limit=pd.Timedelta('3d')): + """ + Creates a plotly figure spec from api response for a probabilistic forecast + group. + + Parameters + ---------- + forecast : datamodel.ProbabilisticForecast + data : pandas.DataFrame + DataFrame with forecast values in each column, column names as the + constant values and a datetime index. + limit : pandas.Timedelta or None + + Returns + ------- + None + When the data is empty. + figure: Plotly.graph_objects.Figure + Plotly json specification for the plot. + """ + logger.info('Starting probabilistic forecast figure generation...') + if len(data.index) == 0: + return None + + fig = go.Figure() + if 'x' in forecast.axis: + ylabel = 'Probability (%)' + _plot_probabilsitic_distribution_axis_x(fig, forecast, data) + else: + ylabel = plot_utils.format_variable_name(forecast.variable) + _plot_probabilsitic_distribution_axis_y(fig, forecast, data) + fig.update_xaxes(title_text=f'Time (UTC)', showgrid=True, + gridwidth=1, gridcolor='#CCC', showline=True, + linewidth=1, linecolor='black', ticks='outside') + fig.update_yaxes(title_text=ylabel, showgrid=True, + gridwidth=1, gridcolor='#CCC', showline=True, + linewidth=1, linecolor='black', ticks='outside', + fixedrange=True) + first = data.index[0] + last = data.index[-1] + fig.update_layout( + title=build_figure_title(forecast.name, first, last), + legend=dict(font=dict(size=10)), + **PLOTLY_LAYOUT_DEFAULTS, + ) + return fig diff --git a/solarforecastarbiter/plotting/utils.py b/solarforecastarbiter/plotting/utils.py index eae84a87a..73831f0ef 100644 --- a/solarforecastarbiter/plotting/utils.py +++ b/solarforecastarbiter/plotting/utils.py @@ -1,4 +1,5 @@ import pandas as pd +from matplotlib.colors import rgb2hex from solarforecastarbiter.datamodel import ALLOWED_VARIABLES, COMMON_NAMES @@ -98,3 +99,44 @@ def line_or_step_plotly(interval_label): '"event", or "ending"') return plot_kwargs + + +def distribution_fill_color(color_scaler, percentile): + """Returns a hex code for shading percentiles. + Parameters + ---------- + color_scaler: matplotlib.cm.ScalarMappable + + percentile: float + + Returns + ------- + str + Hex value of the color to use for shading. + """ + normalized_value = percentile / 100 + return rgb2hex(color_scaler.to_rgba(normalized_value)) + + +def percentiles_are_symmetric(constant_values): + """Determines if a list of percentile constant values are symmetric about + the 50th percentile. + + Parameters + ---------- + constant_values: list + List of float constant values + Returns + ------- + bool + True if percentiles are symmetric about 50. + """ + constant_values = sorted(constant_values) + lower_bounds = [cv for cv in constant_values if cv < 50] + upper_bounds = [cv for cv in constant_values if cv > 50][::-1] + if len(upper_bounds) != len(lower_bounds): + return False + for l, u in zip(lower_bounds, upper_bounds): + if abs(50 - l) != abs(50 - u): + return False + return True diff --git a/solarforecastarbiter/reports/figures/plotly_figures.py b/solarforecastarbiter/reports/figures/plotly_figures.py index bcc941692..47888ebdc 100644 --- a/solarforecastarbiter/reports/figures/plotly_figures.py +++ b/solarforecastarbiter/reports/figures/plotly_figures.py @@ -16,12 +16,12 @@ import plotly.graph_objects as go import numpy as np from matplotlib import cm -from matplotlib.colors import Normalize, rgb2hex +from matplotlib.colors import Normalize from solarforecastarbiter import datamodel from solarforecastarbiter.metrics.event import _event2count -from solarforecastarbiter.plotting.utils import line_or_step_plotly +import solarforecastarbiter.plotting.utils as plot_utils logger = logging.getLogger(__name__) @@ -327,7 +327,8 @@ def _plot_obs_timeseries(fig, timeseries_value_df, timeseries_meta_df): metadata = _extract_metadata_from_df( timeseries_meta_df, obs_hash, 'observation_hash') pair_idcs = timeseries_value_df['pair_index'] == metadata['pair_index'] - plot_kwargs = line_or_step_plotly(metadata['interval_label']) + plot_kwargs = plot_utils.line_or_step_plotly( + metadata['interval_label']) data = _fill_timeseries( timeseries_value_df[pair_idcs], metadata['interval_length'], @@ -365,7 +366,7 @@ def _plot_fx_timeseries(fig, timeseries_value_df, timeseries_meta_df, axis): # probably treat axis == None and axis == y separately in the future. # currently no need for a separate axis == x treatment either, so # removed an if statement on the axis. - plot_kwargs = line_or_step_plotly( + plot_kwargs = plot_utils.line_or_step_plotly( metadata['interval_label']) data = _fill_timeseries( timeseries_value_df[pair_idcs], @@ -407,11 +408,8 @@ def _plot_fx_distribution_timeseries( color_map, ) - def _get_fill_color(percentile): - normalized_value = percentile / 100 - return rgb2hex(color_scaler.to_rgba(normalized_value)) - - symmetric_percentiles = _percentiles_are_symmetric(cv_metadata) + symmetric_percentiles = plot_utils.percentiles_are_symmetric( + cv_metadata['constant_value'].tolist()) # Plot confidence intervals for idx, cv in cv_metadata.iterrows(): pair_idcs = timeseries_value_df['pair_index'] == cv['pair_index'] @@ -454,9 +452,10 @@ def _get_fill_color(percentile): # bright colors appear at 0 and dark at 100 when plotted. fill_value = 100 - cv['constant_value'] - fill_color = _get_fill_color(fill_value) + fill_color = plot_utils.distribution_fill_color( + color_scaler, fill_value) - plot_kwargs = line_or_step_plotly(cv['interval_label']) + plot_kwargs = plot_utils.line_or_step_plotly(cv['interval_label']) go_ = go.Scatter( x=data.index, @@ -1180,25 +1179,3 @@ def timeseries_plots(report): for pfxob in pfxobs) return (ts_fig.to_json(), scat_fig.to_json(), ts_prob_fig_json, includes_distribution) - - -def _percentiles_are_symmetric(cv_df): - """Determines if a set of percentiles are symmetric around the 50th - percentile. - - Parameters - ---------- - cv_df: pandas.DataFrame - A dataframe containing metadata of all of the constant values for the - distribution. - Returns - ------- - bool - """ - constant_values = cv_df['constant_value'].sort_values() - lower_bounds = constant_values[constant_values < 50] - upper_bounds = constant_values[constant_values > 50][::-1] - for l, u in zip(lower_bounds, upper_bounds): - if abs(50 - l) != abs(50 - u): - return False - return True