Skip to content

Commit

Permalink
Merged in bugfix/empty-revenue_costs-data (pull request #93)
Browse files Browse the repository at this point in the history
Empty revenue/costs data should not cause UI error

* Revenue/costs should be an empty DataFrame when power or price is empty.

* Upgrade timely-beliefs version.
Add test for dtype in empty BeliefsDataFrame.
Remove print statement.

* Construct costs/revenues DataFrame with outer join (see PR #93).
Simplify get_revenues_costs_data.

* Point to dev dependency.

* Flake8.

* Upgrade to released timely-beliefs dependency.

* Fix logic for determining belief horizon upon multiplication.
Clarify docstring of multiplication util function.
Add test for multiplication util function.

* Add comment.

Approved-by: Nicolas Höning
  • Loading branch information
Flix6x committed Nov 3, 2020
1 parent 52bc856 commit 6a0e251
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 62 deletions.
61 changes: 16 additions & 45 deletions bvp/data/queries/analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
import pandas as pd
import timely_beliefs as tb

from bvp.data.queries.utils import simplify_index, new_dataframe_aligned_with
from bvp.data.queries.utils import (
simplify_index,
multiply_dataframe_with_deterministic_beliefs,
)
from bvp.utils import calculations, time_utils
from bvp.data.services.resources import Resource, find_closest_weather_sensor
from bvp.data.models.assets import Asset
Expand Down Expand Up @@ -323,64 +326,32 @@ def get_revenues_costs_data(
- weighted absolute percentage error
"""
power_hour_factor = time_utils.resolution_to_hour_factor(session["resolution"])
rev_cost_data = new_dataframe_aligned_with(power_data)
rev_cost_forecasts = new_dataframe_aligned_with(

rev_cost_data = multiply_dataframe_with_deterministic_beliefs(
power_data,
columns=["event_value", "yhat_upper", "yhat_lower"],
prices_data,
result_source="Calculated from power and price data",
multiplication_factor=power_hour_factor * unit_factor,
)

if power_data.empty or prices_data.empty:
metrics["realised_revenues_costs"] = np.NaN
else:
rev_cost_data = new_dataframe_aligned_with(
power_data,
column_values=dict(
event_value=power_data["event_value"]
* power_hour_factor
* prices_data["event_value"]
* unit_factor
),
)
if (
"belief_horizon" in power_data.columns
and "belief_horizon" in prices_data.columns
):
rev_cost_data["belief_horizon"] = pd.DataFrame(
[power_data["belief_horizon"], prices_data["belief_horizon"]]
).min()
if "source" in power_data.columns and "source" in prices_data.columns:
rev_cost_data["source"] = "Calculated from power and price data"
metrics["realised_revenues_costs"] = np.nansum(
rev_cost_data["event_value"].values
)

if (
power_data.empty
or prices_data.empty
or power_forecast_data.empty
or prices_forecast_data.empty
or not (power_data["event_value"].size == prices_data["event_value"].size)
or not (
power_forecast_data["event_value"].size
== prices_forecast_data["event_value"].size
)
):
rev_cost_forecasts = multiply_dataframe_with_deterministic_beliefs(
power_forecast_data,
prices_forecast_data,
result_source="Calculated from power and price data",
multiplication_factor=power_hour_factor * unit_factor,
)
if power_forecast_data.empty or prices_forecast_data.empty:
metrics["expected_revenues_costs"] = np.NaN
metrics["mae_revenues_costs"] = np.NaN
metrics["mape_revenues_costs"] = np.NaN
metrics["wape_revenues_costs"] = np.NaN
else:
rev_cost_forecasts = new_dataframe_aligned_with(
power_data,
columns=["event_value", "yhat_upper", "yhat_lower"],
)
if not (power_forecast_data.empty and prices_forecast_data.empty):
rev_cost_forecasts["event_value"] = (
power_forecast_data["event_value"]
* power_hour_factor
* prices_forecast_data["event_value"]
* unit_factor
)
metrics["expected_revenues_costs"] = np.nansum(
rev_cost_forecasts["event_value"]
)
Expand Down
51 changes: 36 additions & 15 deletions bvp/data/queries/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from datetime import datetime, timedelta

import pandas as pd
import numpy as np
import timely_beliefs as tb

from sqlalchemy.orm import Query, Session
Expand Down Expand Up @@ -158,21 +157,43 @@ def simplify_index(
return bdf


def new_dataframe_aligned_with(
df: tb.BeliefsDataFrame, columns: List[str] = None, column_values=np.nan
def multiply_dataframe_with_deterministic_beliefs(
df1: pd.DataFrame,
df2: pd.DataFrame,
multiplication_factor: float = 1,
result_source: Optional[str] = None,
) -> pd.DataFrame:
"""
Create new DataFrame, where index & columns are aligned with the
given BeliefsDataFrame. Index is simplified to "event_start".
The default values for columns is NaN, but you can pass in others.
Standard columns are event_value, belief_horizon and source.
Create new DataFrame where the event_value columns of df1 and df2 are multiplied.
If df1 and df2 have belief_horizon columns, the belief_horizon column of the new DataFrame is
determined as the minimum of the input horizons.
The source columns of df1 and df2 are not used. A source column for the new DataFrame can be set
by passing a result_source (string).
The index of the resulting DataFrame contains the outer join of the indices of df1 and df2.
Event values are np.nan for rows that are not in both DataFrames.
:param df1: DataFrame with "event_value" column and optional "belief_horizon" and "source" columns
:param df2: DataFrame with "event_value" column and optional "belief_horizon" and "source" columns
:param multiplication_factor: extra scalar to determine the event_value of the resulting DataFrame
:param result_source: string label for the source of the resulting DataFrame
:returns: DataFrame with "event_value" column,
an additional "belief_horizon" column if both df1 and df2 contain this column, and
an additional "source" column if result_source is set.
"""
if columns is None:
columns = ["event_value", "belief_horizon", "source"]
bdf = tb.BeliefsDataFrame(
column_values, # init with NaN values
index=df.index,
sensor=df.sensor,
columns=columns,
df = (df1["event_value"] * df2["event_value"] * multiplication_factor).to_frame(
name="event_value"
)
return simplify_index(bdf)
if "belief_horizon" in df1.columns and "belief_horizon" in df2.columns:
df["belief_horizon"] = (
df1["belief_horizon"]
.rename("belief_horizon1")
.to_frame()
.join(df2["belief_horizon"], how="outer")
.min(axis=1)
.rename("belief_horizon")
) # Add existing belief_horizon information, keeping only the smaller horizon per row
if result_source:
df["source"] = result_source # also for rows with nan event_value
return df
56 changes: 56 additions & 0 deletions bvp/data/tests/test_queries.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
import pytest
import pytz
import timely_beliefs as tb

from bvp.data.models.assets import Asset, Power
from bvp.data.queries.utils import multiply_dataframe_with_deterministic_beliefs


@pytest.mark.parametrize(
Expand All @@ -24,6 +27,11 @@
48,
),
(None, datetime(2015, 1, 1, 12, tzinfo=pytz.utc), 48),
(
datetime(1957, 1, 1, tzinfo=pytz.utc),
datetime(1957, 1, 2, tzinfo=pytz.utc),
0,
),
],
)
def test_collect_power(db, app, query_start, query_end, num_values):
Expand All @@ -37,6 +45,9 @@ def test_collect_power(db, app, query_start, query_end, num_values):
assert (
bdf.index.names[0] == "event_start"
) # first index level of collect function should be event_start, so that df.loc[] refers to event_start
assert pd.api.types.is_timedelta64_dtype(
bdf.index.get_level_values("belief_horizon")
) # dtype of belief_horizon is timedelta64[ns], so the minimum horizon on an empty BeliefsDataFrame is NaT instead of NaN
assert len(bdf) == num_values
for v1, v2 in zip(bdf.values, data):
assert abs(v1[0] - v2.value) < 10 ** -6
Expand Down Expand Up @@ -80,3 +91,48 @@ def test_collect_power_resampled(
)
print(bdf)
assert len(bdf) == num_values


def test_multiplication():
df1 = pd.DataFrame(
[[30.0, timedelta(hours=3)]],
index=pd.date_range(
"2000-01-01 10:00", "2000-01-01 15:00", freq="1h", closed="left"
),
columns=["event_value", "belief_horizon"],
)
df2 = pd.DataFrame(
[[10.0, timedelta(hours=1)]],
index=pd.date_range(
"2000-01-01 13:00", "2000-01-01 18:00", freq="1h", closed="left"
),
columns=["event_value", "belief_horizon"],
)
df = multiply_dataframe_with_deterministic_beliefs(df1, df2)
df_compare = pd.concat(
[
pd.DataFrame(
[[np.nan, timedelta(hours=3)]],
index=pd.date_range(
"2000-01-01 10:00", "2000-01-01 13:00", freq="1h", closed="left"
),
columns=["event_value", "belief_horizon"],
),
pd.DataFrame(
[[300.0, timedelta(hours=1)]],
index=pd.date_range(
"2000-01-01 13:00", "2000-01-01 15:00", freq="1h", closed="left"
),
columns=["event_value", "belief_horizon"],
),
pd.DataFrame(
[[np.nan, timedelta(hours=1)]],
index=pd.date_range(
"2000-01-01 15:00", "2000-01-01 18:00", freq="1h", closed="left"
),
columns=["event_value", "belief_horizon"],
),
],
axis=0,
)
pd.testing.assert_frame_equal(df, df_compare)
1 change: 0 additions & 1 deletion bvp/ui/utils/plotting_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,6 @@ def create_graph( # noqa: C901

if data.empty:
current_app.logger.warning("No data to show for %s" % title)
print(data)

# Format y floats
if (
Expand Down
2 changes: 1 addition & 1 deletion requirements/app.in
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ pyomo>=5.6
forecastiopy
pysolar
timetomodel>=0.6.8
timely-beliefs>=0.1.0
timely-beliefs>=0.1.2
python-dotenv
Flask-SSLify
Flask_JSON
Expand Down

0 comments on commit 6a0e251

Please sign in to comment.