Merged in bugfix/empty-revenue_costs-data (pull request #93)

Empty revenue/costs data should not cause UI error * Revenue/costs should be an empty DataFrame when power or price is empty. * Upgrade timely-beliefs version. Add test for dtype in empty BeliefsDataFrame. Remove print statement. * Construct costs/revenues DataFrame with outer join (see PR #93). Simplify get_revenues_costs_data. * Point to dev dependency. * Flake8. * Upgrade to released timely-beliefs dependency. * Fix logic for determining belief horizon upon multiplication. Clarify docstring of multiplication util function. Add test for multiplication util function. * Add comment. Approved-by: Nicolas Höning
FlexMeasures · Nov 3, 2020 · 6a0e251 · 6a0e251
1 parent 52bc856
commit 6a0e251
Show file tree

Hide file tree

Showing 5 changed files with 109 additions and 62 deletions.
diff --git a/bvp/data/queries/analytics.py b/bvp/data/queries/analytics.py
@@ -6,7 +6,10 @@
 import pandas as pd
 import timely_beliefs as tb
 
-from bvp.data.queries.utils import simplify_index, new_dataframe_aligned_with
+from bvp.data.queries.utils import (
+    simplify_index,
+    multiply_dataframe_with_deterministic_beliefs,
+)
 from bvp.utils import calculations, time_utils
 from bvp.data.services.resources import Resource, find_closest_weather_sensor
 from bvp.data.models.assets import Asset
@@ -323,64 +326,32 @@ def get_revenues_costs_data(
     - weighted absolute percentage error
     """
     power_hour_factor = time_utils.resolution_to_hour_factor(session["resolution"])
-    rev_cost_data = new_dataframe_aligned_with(power_data)
-    rev_cost_forecasts = new_dataframe_aligned_with(
+
+    rev_cost_data = multiply_dataframe_with_deterministic_beliefs(
         power_data,
-        columns=["event_value", "yhat_upper", "yhat_lower"],
+        prices_data,
+        result_source="Calculated from power and price data",
+        multiplication_factor=power_hour_factor * unit_factor,
     )
-
     if power_data.empty or prices_data.empty:
         metrics["realised_revenues_costs"] = np.NaN
     else:
-        rev_cost_data = new_dataframe_aligned_with(
-            power_data,
-            column_values=dict(
-                event_value=power_data["event_value"]
-                * power_hour_factor
-                * prices_data["event_value"]
-                * unit_factor
-            ),
-        )
-        if (
-            "belief_horizon" in power_data.columns
-            and "belief_horizon" in prices_data.columns
-        ):
-            rev_cost_data["belief_horizon"] = pd.DataFrame(
-                [power_data["belief_horizon"], prices_data["belief_horizon"]]
-            ).min()
-        if "source" in power_data.columns and "source" in prices_data.columns:
-            rev_cost_data["source"] = "Calculated from power and price data"
         metrics["realised_revenues_costs"] = np.nansum(
             rev_cost_data["event_value"].values
         )
 
-    if (
-        power_data.empty
-        or prices_data.empty
-        or power_forecast_data.empty
-        or prices_forecast_data.empty
-        or not (power_data["event_value"].size == prices_data["event_value"].size)
-        or not (
-            power_forecast_data["event_value"].size
-            == prices_forecast_data["event_value"].size
-        )
-    ):
+    rev_cost_forecasts = multiply_dataframe_with_deterministic_beliefs(
+        power_forecast_data,
+        prices_forecast_data,
+        result_source="Calculated from power and price data",
+        multiplication_factor=power_hour_factor * unit_factor,
+    )
+    if power_forecast_data.empty or prices_forecast_data.empty:
         metrics["expected_revenues_costs"] = np.NaN
         metrics["mae_revenues_costs"] = np.NaN
         metrics["mape_revenues_costs"] = np.NaN
         metrics["wape_revenues_costs"] = np.NaN
     else:
-        rev_cost_forecasts = new_dataframe_aligned_with(
-            power_data,
-            columns=["event_value", "yhat_upper", "yhat_lower"],
-        )
-        if not (power_forecast_data.empty and prices_forecast_data.empty):
-            rev_cost_forecasts["event_value"] = (
-                power_forecast_data["event_value"]
-                * power_hour_factor
-                * prices_forecast_data["event_value"]
-                * unit_factor
-            )
         metrics["expected_revenues_costs"] = np.nansum(
             rev_cost_forecasts["event_value"]
         )

diff --git a/bvp/data/queries/utils.py b/bvp/data/queries/utils.py
@@ -2,7 +2,6 @@
 from datetime import datetime, timedelta
 
 import pandas as pd
-import numpy as np
 import timely_beliefs as tb
 
 from sqlalchemy.orm import Query, Session
@@ -158,21 +157,43 @@ def simplify_index(
     return bdf
 
 
-def new_dataframe_aligned_with(
-    df: tb.BeliefsDataFrame, columns: List[str] = None, column_values=np.nan
+def multiply_dataframe_with_deterministic_beliefs(
+    df1: pd.DataFrame,
+    df2: pd.DataFrame,
+    multiplication_factor: float = 1,
+    result_source: Optional[str] = None,
 ) -> pd.DataFrame:
     """
-    Create new DataFrame, where index & columns are aligned with the
-    given BeliefsDataFrame. Index is simplified to "event_start".
-    The default values for columns is NaN, but you can pass in others.
-    Standard columns are event_value, belief_horizon and source.
+    Create new DataFrame where the event_value columns of df1 and df2 are multiplied.
+
+    If df1 and df2 have belief_horizon columns, the belief_horizon column of the new DataFrame is
+    determined as the minimum of the input horizons.
+    The source columns of df1 and df2 are not used. A source column for the new DataFrame can be set
+    by passing a result_source (string).
+
+    The index of the resulting DataFrame contains the outer join of the indices of df1 and df2.
+    Event values are np.nan for rows that are not in both DataFrames.
+
+    :param df1: DataFrame with "event_value" column and optional "belief_horizon" and "source" columns
+    :param df2: DataFrame with "event_value" column and optional "belief_horizon" and "source" columns
+    :param multiplication_factor: extra scalar to determine the event_value of the resulting DataFrame
+    :param result_source: string label for the source of the resulting DataFrame
+    :returns: DataFrame with "event_value" column,
+              an additional "belief_horizon" column if both df1 and df2 contain this column, and
+              an additional "source" column if result_source is set.
     """
-    if columns is None:
-        columns = ["event_value", "belief_horizon", "source"]
-    bdf = tb.BeliefsDataFrame(
-        column_values,  # init with NaN values
-        index=df.index,
-        sensor=df.sensor,
-        columns=columns,
+    df = (df1["event_value"] * df2["event_value"] * multiplication_factor).to_frame(
+        name="event_value"
     )
-    return simplify_index(bdf)
+    if "belief_horizon" in df1.columns and "belief_horizon" in df2.columns:
+        df["belief_horizon"] = (
+            df1["belief_horizon"]
+            .rename("belief_horizon1")
+            .to_frame()
+            .join(df2["belief_horizon"], how="outer")
+            .min(axis=1)
+            .rename("belief_horizon")
+        )  # Add existing belief_horizon information, keeping only the smaller horizon per row
+    if result_source:
+        df["source"] = result_source  # also for rows with nan event_value
+    return df
diff --git a/bvp/data/tests/test_queries.py b/bvp/data/tests/test_queries.py
@@ -1,10 +1,13 @@
 from datetime import datetime, timedelta
 
+import numpy as np
+import pandas as pd
 import pytest
 import pytz
 import timely_beliefs as tb
 
 from bvp.data.models.assets import Asset, Power
+from bvp.data.queries.utils import multiply_dataframe_with_deterministic_beliefs
 
 
 @pytest.mark.parametrize(
@@ -24,6 +27,11 @@
             48,
         ),
         (None, datetime(2015, 1, 1, 12, tzinfo=pytz.utc), 48),
+        (
+            datetime(1957, 1, 1, tzinfo=pytz.utc),
+            datetime(1957, 1, 2, tzinfo=pytz.utc),
+            0,
+        ),
     ],
 )
 def test_collect_power(db, app, query_start, query_end, num_values):
@@ -37,6 +45,9 @@ def test_collect_power(db, app, query_start, query_end, num_values):
     assert (
         bdf.index.names[0] == "event_start"
     )  # first index level of collect function should be event_start, so that df.loc[] refers to event_start
+    assert pd.api.types.is_timedelta64_dtype(
+        bdf.index.get_level_values("belief_horizon")
+    )  # dtype of belief_horizon is timedelta64[ns], so the minimum horizon on an empty BeliefsDataFrame is NaT instead of NaN
     assert len(bdf) == num_values
     for v1, v2 in zip(bdf.values, data):
         assert abs(v1[0] - v2.value) < 10 ** -6
@@ -80,3 +91,48 @@ def test_collect_power_resampled(
     )
     print(bdf)
     assert len(bdf) == num_values
+
+
+def test_multiplication():
+    df1 = pd.DataFrame(
+        [[30.0, timedelta(hours=3)]],
+        index=pd.date_range(
+            "2000-01-01 10:00", "2000-01-01 15:00", freq="1h", closed="left"
+        ),
+        columns=["event_value", "belief_horizon"],
+    )
+    df2 = pd.DataFrame(
+        [[10.0, timedelta(hours=1)]],
+        index=pd.date_range(
+            "2000-01-01 13:00", "2000-01-01 18:00", freq="1h", closed="left"
+        ),
+        columns=["event_value", "belief_horizon"],
+    )
+    df = multiply_dataframe_with_deterministic_beliefs(df1, df2)
+    df_compare = pd.concat(
+        [
+            pd.DataFrame(
+                [[np.nan, timedelta(hours=3)]],
+                index=pd.date_range(
+                    "2000-01-01 10:00", "2000-01-01 13:00", freq="1h", closed="left"
+                ),
+                columns=["event_value", "belief_horizon"],
+            ),
+            pd.DataFrame(
+                [[300.0, timedelta(hours=1)]],
+                index=pd.date_range(
+                    "2000-01-01 13:00", "2000-01-01 15:00", freq="1h", closed="left"
+                ),
+                columns=["event_value", "belief_horizon"],
+            ),
+            pd.DataFrame(
+                [[np.nan, timedelta(hours=1)]],
+                index=pd.date_range(
+                    "2000-01-01 15:00", "2000-01-01 18:00", freq="1h", closed="left"
+                ),
+                columns=["event_value", "belief_horizon"],
+            ),
+        ],
+        axis=0,
+    )
+    pd.testing.assert_frame_equal(df, df_compare)
diff --git a/bvp/ui/utils/plotting_utils.py b/bvp/ui/utils/plotting_utils.py
@@ -302,7 +302,6 @@ def create_graph(  # noqa: C901
 
     if data.empty:
         current_app.logger.warning("No data to show for %s" % title)
-        print(data)
 
     # Format y floats
     if (

diff --git a/requirements/app.in b/requirements/app.in
@@ -27,7 +27,7 @@ pyomo>=5.6
 forecastiopy
 pysolar
 timetomodel>=0.6.8
-timely-beliefs>=0.1.0
+timely-beliefs>=0.1.2
 python-dotenv
 Flask-SSLify
 Flask_JSON