Merge pull request #551 from lboeman/probabilistic-timeseries-plots

Probabilistic Forecast timeseries in plotting module
SolarArbiter · Sep 4, 2020 · 6200ec0 · 6200ec0
2 parents ea60a16 + 282276d
commit 6200ec0
Show file tree

Hide file tree

Showing 7 changed files with 357 additions and 33 deletions.
diff --git a/docs/source/api.rst b/docs/source/api.rst
@@ -754,6 +754,7 @@ Time series plotting.
    plotting.timeseries.make_basic_timeseries
    plotting.timeseries.generate_forecast_figure
    plotting.timeseries.generate_observation_figure
+   plotting.timeseries.generate_probabilistic_forecast_figure
 
 Utils
 -----

diff --git a/docs/source/whatsnew/1.0.0rc3.rst b/docs/source/whatsnew/1.0.0rc3.rst
@@ -18,6 +18,12 @@ Enhancements
 * Add :py:func:`solarforecastarbiter.metrics.deterministic.relative_euclidean_distance`
   (:issue:`542`, :pull:`549`)
 
+* Add :py:func:`solarforecastarbiter.plotting.timeseries.generate_probabilistic_forecast_figure`
+  to create timeseries plots for probabilistic forecasts. It should be noted
+  that this new function generates a Plotly figure, as opposed to Bokeh figures
+  which are generated by other functions in this module.
+  (:issue:`550`, :pull:`551`)
+
 
 Bug fixes
 ~~~~~~~~~

diff --git a/solarforecastarbiter/plotting/tests/test_plotting_utils.py b/solarforecastarbiter/plotting/tests/test_plotting_utils.py
@@ -3,6 +3,8 @@
 import pytest
 
 
+from matplotlib import cm
+from matplotlib.colors import Normalize
 from solarforecastarbiter.plotting import utils
 
 
@@ -73,3 +75,34 @@ def test_line_or_step(label, method):
 def test_line_or_step_plotly(label):
     out = utils.line_or_step_plotly(label)
     assert isinstance(out, dict)
+
+
+color_map = cm.get_cmap('viridis')
+color_scaler = cm.ScalarMappable(
+    Normalize(vmin=0, vmax=1),
+    color_map,
+)
+
+
+@pytest.mark.parametrize('percentile,expected', [
+    (100, '#fde725'),
+    (90, '#bddf26'),
+    (50, '#21918c'),
+    (20, '#414487'),
+    (5, '#471365'),
+    (1, '#450457'),
+])
+def test_distribution_fill_color(percentile, expected):
+    assert utils.distribution_fill_color(color_scaler, percentile) == expected
+
+
+@pytest.mark.parametrize('cvs,expected', [
+    ([5, 20, 50, 80, 95], True),
+    ([5.0, 20.0, 50.0, 80.0, 95.0], True),
+    ([5.0, 50.0, 80.0, 95.0], False),
+    ([5, 50, 8, 9], False),
+    ([5, 10, 20, 30, 50, 60, 70, 75, 80, 90], False),
+    ([5], False),
+])
+def test_percentiles_are_symmetric(cvs, expected):
+    assert utils.percentiles_are_symmetric(cvs) == expected
diff --git a/solarforecastarbiter/plotting/tests/test_timeseries.py b/solarforecastarbiter/plotting/tests/test_timeseries.py
@@ -1,7 +1,9 @@
 import datetime as dt
+import json
 
 
 import bokeh
+import numpy as np
 import pandas as pd
 import pytest
 
@@ -163,3 +165,97 @@ def test_generate_observation_figure_empty(ghi_observation_metadata, rc):
     assert timeseries.generate_observation_figure(ghi_observation_metadata,
                                                   pd.DataFrame(),
                                                   return_components=rc) is None
+
+
+@pytest.fixture
+def prob_forecast_random_data():
+    def f(forecast):
+        frequency = pd.tseries.frequencies.to_offset(forecast.interval_length)
+        start = pd.Timestamp('2020-01-01T00:00Z')
+        end = pd.Timestamp('2020-01-03T00:00Z')
+        idx = pd.date_range(start, end, freq=frequency)
+        df = pd.DataFrame(index=idx)
+        for cv in [c.constant_value for c in forecast.constant_values]:
+            df[str(cv)] = np.random.rand(idx.size)
+        return df
+    return f
+
+
+def test_generate_probabilistic_forecast_figure_x_forecast(
+        prob_forecasts, prob_forecast_random_data):
+    values = prob_forecast_random_data(prob_forecasts)
+    fig = timeseries.generate_probabilistic_forecast_figure(
+        prob_forecasts, values)
+    assert fig['layout']['title']['text'] == 'DA GHI 2020-01-01 00:00 to 2020-01-03 00:00 UTC'  # NOQA: E501
+    assert fig['layout']['xaxis']['title']['text'] == 'Time (UTC)'
+    assert fig['layout']['yaxis']['title']['text'] == 'Probability (%)'
+    fig_data = fig['data']
+    assert len(fig_data) == 1
+    assert len(fig_data[0]['x']) == values.index.size
+    assert len(fig_data[0]['y']) == values.index.size
+    assert fig_data[0]['showlegend']
+
+
+def test_generate_probabilistic_forecast_figure_y_forecast(
+        prob_forecasts_y,
+        prob_forecast_constant_value_y_factory,
+        prob_forecast_random_data,
+        ):
+    new_constant_values = [prob_forecast_constant_value_y_factory(5.0)]
+    prob_forecast = prob_forecasts_y.replace(
+        constant_values=new_constant_values)
+    values = prob_forecast_random_data(prob_forecast)
+    fig = timeseries.generate_probabilistic_forecast_figure(
+        prob_forecasts_y, values)
+    assert fig['layout']['title']['text'] == 'DA GHI 2020-01-01 00:00 to 2020-01-03 00:00 UTC'  # NOQA: E501
+    assert fig['layout']['xaxis']['title']['text'] == 'Time (UTC)'
+    assert fig['layout']['yaxis']['title']['text'] == 'GHI (W/m^2)'
+    fig_data = fig['data']
+    assert len(fig_data) == 1
+    assert len(fig_data[0]['x']) == values.index.size
+    assert len(fig_data[0]['y']) == values.index.size
+    assert not fig_data[0]['showlegend']
+
+
+@pytest.fixture
+def prob_forecast_constant_value_y_factory(
+        prob_forecast_constant_value_y_text,
+        _prob_forecast_constant_value_from_dict):
+    def f(new_constant_value):
+        fx_dict = json.loads(prob_forecast_constant_value_y_text)
+        fx_dict['constant_value'] = new_constant_value
+        return _prob_forecast_constant_value_from_dict(fx_dict)
+    return f
+
+
+def test_generate_probabilistic_forecast_figure_y_forecast_symmetric(
+        prob_forecasts_y,
+        prob_forecast_constant_value_y_factory,
+        prob_forecast_random_data,
+        ):
+    new_constant_values = [prob_forecast_constant_value_y_factory(x)
+                           for x in [5.0, 10.0, 50.0, 90.0, 95.0]]
+    prob_forecast = prob_forecasts_y.replace(
+        constant_values=new_constant_values)
+    values = prob_forecast_random_data(prob_forecast)
+    fig = timeseries.generate_probabilistic_forecast_figure(
+        prob_forecasts_y, values)
+    assert fig['layout']['title']['text'] == 'DA GHI 2020-01-01 00:00 to 2020-01-03 00:00 UTC'  # NOQA: E501
+    assert fig['layout']['xaxis']['title']['text'] == 'Time (UTC)'
+    assert fig['layout']['yaxis']['title']['text'] == 'GHI (W/m^2)'
+    fig_data = fig['data']
+    assert len(fig_data) == 5
+    for trace in fig_data:
+        assert len(trace['x']) == values.index.size
+        assert len(trace['y']) == values.index.size
+    assert fig_data[0]['fill'] is None
+    for trace in fig_data[1:]:
+        assert trace['fill'] == 'tonexty'
+
+
+def test_generate_probabilistic_forecast_figure_empty_values(
+        prob_forecasts_y, prob_forecast_random_data):
+    values = pd.DataFrame()
+    fig = timeseries.generate_probabilistic_forecast_figure(
+        prob_forecasts_y, values)
+    assert fig is None
diff --git a/solarforecastarbiter/plotting/timeseries.py b/solarforecastarbiter/plotting/timeseries.py
@@ -8,6 +8,9 @@
 from bokeh.models import ColumnDataSource, Label, HoverTool
 from bokeh.plotting import figure
 from bokeh import palettes
+from matplotlib import cm
+from matplotlib.colors import Normalize
+import plotly.graph_objects as go
 import pandas as pd
 import pytz
 
@@ -336,3 +339,169 @@ def generate_observation_figure(observation, data, limit=pd.Timedelta('3d')):
     layout = _make_layout(figs)
     logger.info('Figure generated succesfully')
     return layout
+
+
+PLOTLY_MARGINS = {'l': 50, 'r': 50, 'b': 50, 't': 100, 'pad': 4}
+PLOTLY_LAYOUT_DEFAULTS = {
+    'autosize': True,
+    'height': 300,
+    'margin': PLOTLY_MARGINS,
+    'plot_bgcolor': '#FFF',
+    'font': {'size': 14}
+}
+
+
+def _plot_probabilsitic_distribution_axis_y(fig, forecast, data):
+    """
+    Plot all probabilistic forecast values for axis='y' by adding traces to
+    fig.
+
+    Parameters
+    ----------
+    fig: plotly.graph_objects.Figure
+    forecast: :py:class`solarforecastarbiter.datamodel.ProbabilisticForecast`
+    data: pd.DataFrame
+    """
+    color_map = cm.get_cmap('viridis')
+    color_scaler = cm.ScalarMappable(
+        Normalize(vmin=0, vmax=1),
+        color_map,
+    )
+
+    units = forecast.units
+
+    percentiles_are_symmetric = plot_utils.percentiles_are_symmetric(
+        data.columns.values.astype('float'))
+
+    # may not work for constant values that don't convert nicely from str/float
+    constant_values = data.columns.astype('float').sort_values()
+    for i, constant_value in enumerate(constant_values):
+        if i == 0:
+            fill = None
+        else:
+            fill = 'tonexty'
+
+        if percentiles_are_symmetric:
+            if constant_value <= 50 and i != 0:
+                fill_value = constant_values[i - 1]
+            else:
+                fill_value = constant_value
+            fill_value = 2 * abs(fill_value - 50)
+        else:
+            fill_value = 100 - constant_value
+
+        fill_color = plot_utils.distribution_fill_color(
+            color_scaler, fill_value)
+
+        plot_kwargs = plot_utils.line_or_step_plotly(forecast.interval_label)
+
+        forecast_name = f'Prob(f <= x) = {str(constant_value)}%'
+
+        go_ = go.Scatter(
+            x=data.index,
+            y=data[str(constant_value)],
+            name=f'{str(constant_value)} %',
+            hovertemplate=(
+                f'<b>{forecast_name}</b><br>'
+                '<b>Value</b>: %{y} '+f'{units}<br>'
+                '<b>Time</b>: %{x}<br>'),
+            connectgaps=False,
+            showlegend=False,
+            mode='lines',
+            fill=fill,
+            fillcolor=fill_color,
+            line=dict(
+                color=fill_color,
+            ),
+            **plot_kwargs,
+        )
+        fig.add_trace(go_)
+
+
+def _plot_probabilsitic_distribution_axis_x(fig, forecast, data):
+    """
+    Plot all probabilistic forecast values for axis='x' by adding traces to
+    fig.
+
+    Parameters
+    ----------
+    fig: plotly.graph_objects.Figure
+    forecast: :py:class`solarforecastarbiter.datamodel.ProbabilisticForecast`
+    data: pd.DataFrame
+    """
+    palette = iter(PALETTE * 3)
+
+    units = forecast.units
+
+    for constant_value in data.columns:
+        line_color = next(palette)
+
+        plot_kwargs = plot_utils.line_or_step_plotly(forecast.interval_label)
+
+        forecast_name = f'Prob(x <= {str(constant_value)} {units})'
+        go_ = go.Scatter(
+            x=data.index,
+            y=data[str(constant_value)],
+            name=forecast_name,
+            hovertemplate=(
+                f'<b>{forecast_name}</b><br>'
+                '<b>Value</b>: %{y} %<br>'
+                '<b>Time</b>: %{x}<br>'),
+            connectgaps=False,
+            showlegend=True,
+            mode='lines',
+            line=dict(
+                color=line_color,
+            ),
+            **plot_kwargs,
+        )
+        fig.add_trace(go_)
+
+
+def generate_probabilistic_forecast_figure(
+        forecast, data, limit=pd.Timedelta('3d')):
+    """
+    Creates a plotly figure spec from api response for a probabilistic forecast
+    group.
+
+    Parameters
+    ----------
+    forecast : datamodel.ProbabilisticForecast
+    data : pandas.DataFrame
+        DataFrame with forecast values in each column, column names as the
+        constant values and a datetime index.
+    limit : pandas.Timedelta or None
+
+    Returns
+    -------
+    None
+        When the data is empty.
+    figure: Plotly.graph_objects.Figure
+        Plotly json specification for the plot.
+    """
+    logger.info('Starting probabilistic forecast figure generation...')
+    if len(data.index) == 0:
+        return None
+
+    fig = go.Figure()
+    if 'x' in forecast.axis:
+        ylabel = 'Probability (%)'
+        _plot_probabilsitic_distribution_axis_x(fig, forecast, data)
+    else:
+        ylabel = plot_utils.format_variable_name(forecast.variable)
+        _plot_probabilsitic_distribution_axis_y(fig, forecast, data)
+    fig.update_xaxes(title_text=f'Time (UTC)', showgrid=True,
+                     gridwidth=1, gridcolor='#CCC', showline=True,
+                     linewidth=1, linecolor='black', ticks='outside')
+    fig.update_yaxes(title_text=ylabel, showgrid=True,
+                     gridwidth=1, gridcolor='#CCC', showline=True,
+                     linewidth=1, linecolor='black', ticks='outside',
+                     fixedrange=True)
+    first = data.index[0]
+    last = data.index[-1]
+    fig.update_layout(
+        title=build_figure_title(forecast.name, first, last),
+        legend=dict(font=dict(size=10)),
+        **PLOTLY_LAYOUT_DEFAULTS,
+    )
+    return fig