Skip to content

Commit

Permalink
feature: upsampling to zero resolution
Browse files Browse the repository at this point in the history
Signed-off-by: F.N. Claessen <felix@seita.nl>
  • Loading branch information
Flix6x committed Mar 12, 2024
1 parent b4d7fce commit 3a78a54
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 2 deletions.
8 changes: 7 additions & 1 deletion timely_beliefs/beliefs/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1419,6 +1419,7 @@ def resample_events(
distribution: str | None = None,
keep_only_most_recent_belief: bool = False,
keep_nan_values: bool = False,
boundary_policy: str = "first",
) -> "BeliefsDataFrame":
"""Aggregate over multiple events (downsample) or split events into multiple sub-events (upsample).
Expand Down Expand Up @@ -1469,6 +1470,8 @@ def resample_events(
:param keep_only_most_recent_belief: If True, assign the most recent belief time to each event after resampling.
Only applies in case of multiple beliefs per event.
:param keep_nan_values: If True, place back resampled NaN values. Drops NaN values by default.
:param boundary_policy: When upsampling to instantaneous events,
take the 'max', 'min' or 'first' value at event boundaries.
"""

if self.empty:
Expand Down Expand Up @@ -1522,7 +1525,10 @@ def resample_events(
level=[belief_timing_col, "source", "cumulative_probability"]
)
df = belief_utils.upsample_beliefs_data_frame(
df, event_resolution, keep_nan_values
df=df,
event_resolution=event_resolution,
keep_nan_values=keep_nan_values,
boundary_policy=boundary_policy,
)
df = df.set_index(
[belief_timing_col, "source", "cumulative_probability"], append=True
Expand Down
33 changes: 32 additions & 1 deletion timely_beliefs/beliefs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1054,22 +1054,53 @@ def meta_repr(
)


def convert_to_instantaneous(
df: "classes.BeliefsDataFrame",
boundary_policy: str,
):
"""Convert non-instantaneous events to instantaneous events.
Expects event_start as the sole index, and belief_time, source, cumulative_probability and event_value as columns.
:param df: frame to convert
:param boundary_policy: 'min', 'max' or 'first'
"""
df2 = df.copy()
df2.index = df2.index + df.event_resolution
df = df.reset_index().set_index(["event_start", "belief_time", "source", "cumulative_probability"])
df2 = df2.reset_index().set_index(["event_start", "belief_time", "source", "cumulative_probability"])
df = pd.concat([df, df2], axis=1)
if boundary_policy == "first":
s = df.fillna(method='bfill', axis=1).iloc[:, 0]
else:
s = getattr(df, boundary_policy)(axis=1).rename("event_value")
df = s.sort_index().reset_index().set_index("event_start")
df.event_resolution = timedelta(0)
return df


def upsample_beliefs_data_frame(
df: "classes.BeliefsDataFrame" | pd.DataFrame,
event_resolution: timedelta,
keep_nan_values: bool = False,
boundary_policy: str = "first",
) -> "classes.BeliefsDataFrame":
"""Because simply doing df.resample().ffill() does not correctly resample the last event in the data frame.
:param df: In case of a regular pd.DataFrame, make sure to set df.event_resolution before passing it to this function.
:param event_resolution: Resolution to upsample to.
:param keep_nan_values: If True, place back resampled NaN values. Drops NaN values by default.
:param boundary_policy: When upsampling to instantaneous events,
take the 'max', 'min' or 'first' value at event boundaries.
"""
if df.empty:
df.event_resolution = event_resolution
return df
if event_resolution == timedelta(0):
raise NotImplementedError("Cannot upsample to zero event resolution.")
return convert_to_instantaneous(
df=df,
boundary_policy=boundary_policy,
)
from_event_resolution = df.event_resolution
if from_event_resolution == timedelta(0):
raise NotImplementedError("Cannot upsample from zero event resolution.")
Expand Down
41 changes: 41 additions & 0 deletions timely_beliefs/tests/test_df_resampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,24 @@ def df_4323(
return df_wxyz(time_slot_sensor, 4, 3, 2, 3, start)


@pytest.fixture(scope="function", autouse=True)
def df_4111(
time_slot_sensor: Sensor,
test_source_a: BeliefSource,
test_source_b: BeliefSource,
df_wxyz: Callable[
[Sensor, int, int, int, int, Optional[datetime]], BeliefsDataFrame
],
) -> BeliefsDataFrame:
"""Convenient BeliefsDataFrame to run tests on.
For a single sensor, it contains 4 events, for each of which 1 belief by 1 source, described by 1
deterministic value.
Note that the event resolution of the sensor is 15 minutes.
"""
start = pytz.timezone("utc").localize(datetime(2000, 1, 3, 9))
return df_wxyz(time_slot_sensor, 4, 1, 1, 1, start)


@pytest.fixture(scope="function", autouse=True)
def df_instantaneous_8111(
instantaneous_sensor: Sensor,
Expand Down Expand Up @@ -411,3 +429,26 @@ def test_downsample_instantaneous(df_instantaneous_8111):
assert df_resampled_2.event_resolution == downsampled_event_resolution
# frequency updated
assert df_resampled_2.event_frequency == downsampled_event_resolution


def test_upsample_to_instantaneous(df_4111, test_source_a: BeliefSource):
"""Test upsampling deterministic beliefs about time slot event to instantaneous events."""
df = df_4111
df = df.resample_events(timedelta(minutes=0))
assert df.event_resolution == timedelta(minutes=0)
expected_values = [0, 0, 1000, 1000, 2000, 2000, 3000, 3000]
expected_event_starts = [
pd.Timestamp("2000-01-03T09:00+00"),
pd.Timestamp("2000-01-03T09:15+00"),
pd.Timestamp("2000-01-03T10:00+00"),
pd.Timestamp("2000-01-03T10:15+00"),
pd.Timestamp("2000-01-03T11:00+00"),
pd.Timestamp("2000-01-03T11:15+00"),
pd.Timestamp("2000-01-03T12:00+00"),
pd.Timestamp("2000-01-03T12:15+00"),
]
pd.testing.assert_index_equal(
df.index.get_level_values(level="event_start"),
pd.DatetimeIndex(expected_event_starts, name="event_start")
)
assert df["event_value"].values.tolist() == expected_values

0 comments on commit 3a78a54

Please sign in to comment.