From da0098a31d76a9e154c76c7599a79e6d1db7592e Mon Sep 17 00:00:00 2001 From: lschilders Date: Fri, 6 Mar 2026 17:21:31 +0100 Subject: [PATCH 1/5] feature: add apply() to AvailableAt and tz-aware option --- .../openstef-core/src/openstef_core/types.py | 114 +++++++++++++---- .../openstef-core/tests/unit/test_types.py | 116 ++++++++++++++++-- 2 files changed, 191 insertions(+), 39 deletions(-) diff --git a/packages/openstef-core/src/openstef_core/types.py b/packages/openstef-core/src/openstef_core/types.py index fa01686e6..1b557d3de 100644 --- a/packages/openstef-core/src/openstef_core/types.py +++ b/packages/openstef-core/src/openstef_core/types.py @@ -10,11 +10,12 @@ """ import re -from datetime import timedelta +from datetime import datetime, time, timedelta from enum import StrEnum from functools import total_ordering from typing import Any, Literal, Self, override +import pytz from pydantic import GetCoreSchemaHandler, TypeAdapter from pydantic_core import CoreSchema, core_schema @@ -119,52 +120,77 @@ def to_hours(self) -> float: class AvailableAt(PydanticStringPrimitive): """Represents a time point available relative to a reference day. - Uses a specialized string format 'DnTHHMM' where: - - n is the day offset (negative indicates prior days) - - HHMM is the time of day + Uses a specialized string format ``DnTHHMM`` where: - For example, 'D-1T0600' means "6:00 AM on the previous day". - The legacy 'DnTHH:MM' format (with colon) is also accepted by from_string(). + - *n* is the day offset (negative indicates prior days) + - *HHMM* is the time of day - Example: - Creating and using availability times: + For example, ``D-1T0600`` means "6:00 AM on the previous day". + The legacy ``DnTHH:MM`` format (with colon) is also accepted by + ``from_string()``. + + An optional pytz timezone can be attached to make the availability time + timezone-aware. Only ``pytz.BaseTzInfo`` subclasses are accepted + (e.g. ``pytz.timezone("Europe/Amsterdam")``, ``pytz.UTC``). + Example: >>> from datetime import timedelta - >>> # Available at 6 AM on the previous day - >>> at = AvailableAt(timedelta(hours=18)) # 18 hours before day end + >>> at = AvailableAt(timedelta(hours=18)) >>> str(at) 'D-1T0600' - >>> # Available at midnight of the current day - >>> AvailableAt.from_string('D0T00:00').lag_from_day - datetime.timedelta(0) + >>> at.apply(datetime(2026, 3, 6)) + datetime.datetime(2026, 3, 5, 6, 0) """ - def __init__(self, lag_from_day: timedelta): - """Initializes with a lag from the reference day start.""" + def __init__(self, lag_from_day: timedelta, *, tzinfo: pytz.BaseTzInfo | None = None): + """Initializes with a lag from the reference day start. + + Args: + lag_from_day: Timedelta representing how far before the reference day + the data becomes available. + tzinfo: Optional pytz timezone for the availability time + (e.g. ``pytz.timezone("Europe/Amsterdam")``, ``pytz.UTC``). + """ self.lag_from_day = lag_from_day + self.tzinfo = tzinfo + + @property + def day_offset(self) -> int: + """Day offset from the reference day (negative or zero).""" + return -int(self.lag_from_day / timedelta(days=1)) - 1 + + @property + def time_of_day(self) -> time: + """Time of day when data becomes available (optionally tz-aware).""" + offset = timedelta(hours=24) - (self.lag_from_day % timedelta(days=1)) + return time( + hour=offset.seconds // 3600, + minute=(offset.seconds // 60) % 60, + tzinfo=self.tzinfo, + ) def __str__(self) -> str: - """Converts to string in 'DnTHHMM' format (Windows-safe, no colon). + """Converts to string in ``DnTHHMM`` format (Windows-safe, no colon). Returns: - String representation in 'DnTHHMM' format. + String representation in ``DnTHHMM`` format. """ - lag_days = -int(self.lag_from_day / timedelta(days=1)) - 1 - time = timedelta(hours=24) - (self.lag_from_day % timedelta(days=1)) - return f"D{lag_days}T{time.seconds // 3600:02}{(time.seconds // 60) % 60:02}" + t = self.time_of_day + return f"D{self.day_offset}T{t.hour:02}{t.minute:02}" @classmethod - def from_string(cls, s: str) -> Self: - """Creates an instance from a string in 'DnTHHMM' or 'DnTHH:MM' format. + def from_string(cls, s: str, *, tzinfo: pytz.BaseTzInfo | None = None) -> Self: + """Creates an instance from a string in ``DnTHHMM`` or ``DnTHH:MM`` format. Args: - s: String in 'DnTHHMM' or 'DnTHH:MM' format to parse. + s: String in ``DnTHHMM`` or ``DnTHH:MM`` format to parse. + tzinfo: Optional pytz timezone to attach to the parsed instance. Returns: AvailableAt instance parsed from the string. Raises: - ValueError: If the string format is invalid. + ValueError: If the string format is invalid or day offset is positive. """ match = re.match(r"D(-?\d+)T(\d{2}):?(\d{2})", s) if not match: @@ -173,12 +199,46 @@ def from_string(cls, s: str) -> Self: days_part, hours_part, minutes_part = match.groups() + # Day offset must be negative or zero (available before/at the reference day) + if int(days_part) > 0: + msg = f"Day offset must be negative or zero, got {days_part}" + raise ValueError(msg) + # Calculate lag_from_day lag_days = -int(days_part) - 1 - time = timedelta(hours=int(hours_part), minutes=int(minutes_part)) - lag_from_day = timedelta(days=lag_days) + (timedelta(hours=24) - time) + time_offset = timedelta(hours=int(hours_part), minutes=int(minutes_part)) + lag_from_day = timedelta(days=lag_days) + (timedelta(hours=24) - time_offset) + + return cls(lag_from_day=lag_from_day, tzinfo=tzinfo) + + def apply(self, date: datetime, *, output_tz: pytz.BaseTzInfo | None = None) -> datetime: + """Apply this availability offset to a reference date. + + Computes the actual datetime when data becomes available, given a + reference date. Works with both timezone-aware and naive datetimes. + + Timezone resolution: ``output_tz`` > ``self.tzinfo`` > ``date.tzinfo`` > ``None``. + Timezones must be pytz objects (``pytz.timezone()``, ``pytz.UTC``) — + ``tz.localize()`` is used for DST correctness. + + Args: + date: The reference date to apply the availability offset to. + output_tz: Explicit pytz timezone for the result. When set, overrides + ``self.tzinfo`` and ``date.tzinfo``. + + Returns: + The datetime when data is available. + """ + result_date = (date + timedelta(days=self.day_offset)).date() + naive_time = self.time_of_day.replace(tzinfo=None) + naive_result = datetime.combine(result_date, naive_time) + + tz = output_tz if output_tz is not None else (self.tzinfo if self.tzinfo is not None else date.tzinfo) + + if tz is None: + return naive_result - return cls(lag_from_day=lag_from_day) + return tz.localize(naive_result) # type: ignore[union-attr] @classmethod @override diff --git a/packages/openstef-core/tests/unit/test_types.py b/packages/openstef-core/tests/unit/test_types.py index 0cbb40077..34b02f910 100644 --- a/packages/openstef-core/tests/unit/test_types.py +++ b/packages/openstef-core/tests/unit/test_types.py @@ -2,9 +2,10 @@ # # SPDX-License-Identifier: MPL-2.0 -from datetime import timedelta +from datetime import datetime, time, timedelta import pytest +import pytz from openstef_core.types import AvailableAt, LeadTime @@ -53,11 +54,8 @@ def test_lead_time_from_string_roundtrip(input_delta: timedelta): ], ) def test_available_at_str(lag_from_day: timedelta, expected_string: str): - # Arrange - available_at = AvailableAt(lag_from_day=lag_from_day) - # Act - result = str(available_at) + result = str(AvailableAt(lag_from_day=lag_from_day)) # Assert assert result == expected_string @@ -66,17 +64,111 @@ def test_available_at_str(lag_from_day: timedelta, expected_string: str): @pytest.mark.parametrize( "available_at", [ - pytest.param(AvailableAt(lag_from_day=timedelta(hours=18)), id="D-1T06:00"), - pytest.param(AvailableAt(lag_from_day=timedelta(hours=12 + 24)), id="D-2T12:00"), + pytest.param(AvailableAt(lag_from_day=timedelta(hours=18)), id="D-1T0600"), + pytest.param(AvailableAt(lag_from_day=timedelta(hours=12 + 24)), id="D-2T1200"), ], ) def test_available_at_from_string_roundtrip(available_at: AvailableAt): - # Arrange - original = available_at + # Act + reconstructed = AvailableAt.from_string(str(available_at)) + + # Assert + assert reconstructed.lag_from_day == available_at.lag_from_day + + +def test_available_at_from_string_rejects_positive_days_part(): + with pytest.raises(ValueError, match="Day offset must be negative or zero"): + AvailableAt.from_string("D1T0600") + + +_AMS = pytz.timezone("Europe/Amsterdam") + +@pytest.mark.parametrize( + ("available_at", "reference_date", "output_tz", "expected", "expected_tz"), + [ + pytest.param( + AvailableAt(lag_from_day=timedelta(hours=18)), + datetime(2026, 3, 6), # noqa: DTZ001 + None, + datetime(2026, 3, 5, 6, 0), # noqa: DTZ001 + None, + id="naive", + ), + pytest.param( + AvailableAt(lag_from_day=timedelta(hours=36)), + datetime(2026, 3, 6), # noqa: DTZ001 + None, + datetime(2026, 3, 4, 12, 0), # noqa: DTZ001 + None, + id="naive_D-2", + ), + pytest.param( + AvailableAt(lag_from_day=timedelta(hours=18), tzinfo=pytz.UTC), + datetime(2026, 3, 6, tzinfo=pytz.UTC), + None, + datetime(2026, 3, 5, 6, 0, tzinfo=pytz.UTC), + "UTC", + id="utc", + ), + pytest.param( + AvailableAt(lag_from_day=timedelta(hours=18), tzinfo=_AMS), + datetime(2026, 3, 6), # noqa: DTZ001 + None, + _AMS.localize(datetime(2026, 3, 5, 6, 0)), # noqa: DTZ001 + "Europe/Amsterdam", + id="named_tz", + ), + pytest.param( + AvailableAt(lag_from_day=timedelta(hours=18), tzinfo=_AMS), + datetime(2026, 3, 6, tzinfo=pytz.UTC), + None, + _AMS.localize(datetime(2026, 3, 5, 6, 0)), # noqa: DTZ001 + "Europe/Amsterdam", + id="own_tz_over_date_tz", + ), + pytest.param( + AvailableAt(lag_from_day=timedelta(hours=18)), + datetime(2026, 3, 6, tzinfo=pytz.UTC), + None, + datetime(2026, 3, 5, 6, 0, tzinfo=pytz.UTC), + "UTC", + id="fallback_to_date_tz", + ), + pytest.param( + AvailableAt(lag_from_day=timedelta(hours=18), tzinfo=pytz.UTC), + datetime(2026, 3, 6, tzinfo=pytz.UTC), + _AMS, + _AMS.localize(datetime(2026, 3, 5, 6, 0)), # noqa: DTZ001 + "Europe/Amsterdam", + id="output_tz_overrides", + ), + ], +) +def test_available_at_apply( + available_at: AvailableAt, + reference_date: datetime, + output_tz: pytz.BaseTzInfo | None, + expected: datetime, + expected_tz: str | None, +): # Act - str_repr = str(original) - reconstructed = AvailableAt.from_string(str_repr) + result = available_at.apply(reference_date, output_tz=output_tz) # Assert - assert reconstructed.lag_from_day == original.lag_from_day + assert result == expected + if expected_tz is None: + assert result.tzinfo is None + else: + assert str(result.tzinfo) == expected_tz + + +def test_available_at_day_offset(): + assert AvailableAt(lag_from_day=timedelta(hours=18)).day_offset == -1 + assert AvailableAt(lag_from_day=timedelta(hours=36)).day_offset == -2 + + +def test_available_at_time_of_day(): + assert AvailableAt(lag_from_day=timedelta(hours=18)).time_of_day == time(6, 0) + assert AvailableAt(lag_from_day=timedelta(hours=36)).time_of_day == time(12, 0) + assert AvailableAt(lag_from_day=timedelta(hours=18, minutes=30)).time_of_day == time(5, 30) From 796ff0ce780860521700164d94e88320bf008bf5 Mon Sep 17 00:00:00 2001 From: lschilders Date: Mon, 9 Mar 2026 10:30:27 +0100 Subject: [PATCH 2/5] feature: refactor AvailableAt to use day_offset and time_of_day rather than lag_from_day --- .../datasets/timeseries_dataset.py | 22 +++- .../openstef-core/src/openstef_core/types.py | 118 +++++++----------- .../unit/datasets/test_timeseries_dataset.py | 34 ++++- .../test_versioned_timeseries_dataset.py | 4 +- .../openstef-core/tests/unit/test_types.py | 116 +++++++++-------- 5 files changed, 163 insertions(+), 131 deletions(-) diff --git a/packages/openstef-core/src/openstef_core/datasets/timeseries_dataset.py b/packages/openstef-core/src/openstef_core/datasets/timeseries_dataset.py index 5fa85e8e9..2a142c1ad 100644 --- a/packages/openstef-core/src/openstef_core/datasets/timeseries_dataset.py +++ b/packages/openstef-core/src/openstef_core/datasets/timeseries_dataset.py @@ -266,7 +266,27 @@ def filter_by_available_at(self, available_at: AvailableAt) -> Self: if available_at_series is None: return self - cutoff = self.index.floor("D") - pd.Timedelta(available_at.lag_from_day) + source_tz = available_at.tzinfo + + if source_tz is not None and self.index.tz is not None: + # DST-correct: interpret the date in AvailableAt's timezone + index_in_tz = self.index.tz_convert(source_tz) + else: + index_in_tz = self.index + + # Step 1: cutoff date = reference date + day_offset (mirrors apply()) + cutoff_day = index_in_tz.floor("D") + pd.Timedelta(days=available_at.day_offset) + + # Step 2: set the time of day (mirrors datetime.combine in apply()) + cutoff = cutoff_day + pd.Timedelta( + hours=available_at.time_of_day.hour, + minutes=available_at.time_of_day.minute, + ) + + # Convert back to the data's timezone for comparison + if source_tz is not None and self.index.tz is not None: + cutoff = cutoff.tz_convert(self.index.tz) + data_filtered = self.data[available_at_series <= cutoff] return self._copy_with_data(data=data_filtered) diff --git a/packages/openstef-core/src/openstef_core/types.py b/packages/openstef-core/src/openstef_core/types.py index 1b557d3de..d312d7c0a 100644 --- a/packages/openstef-core/src/openstef_core/types.py +++ b/packages/openstef-core/src/openstef_core/types.py @@ -11,6 +11,7 @@ import re from datetime import datetime, time, timedelta +from datetime import tzinfo as dt_tzinfo from enum import StrEnum from functools import total_ordering from typing import Any, Literal, Self, override @@ -122,69 +123,62 @@ class AvailableAt(PydanticStringPrimitive): Uses a specialized string format ``DnTHHMM`` where: - - *n* is the day offset (negative indicates prior days) + - *n* is the day offset (negative or zero) - *HHMM* is the time of day For example, ``D-1T0600`` means "6:00 AM on the previous day". The legacy ``DnTHH:MM`` format (with colon) is also accepted by ``from_string()``. - An optional pytz timezone can be attached to make the availability time - timezone-aware. Only ``pytz.BaseTzInfo`` subclasses are accepted - (e.g. ``pytz.timezone("Europe/Amsterdam")``, ``pytz.UTC``). + An optional timezone can be attached so that the time-of-day is + interpreted in that timezone (important for DST correctness). + Both pytz and stdlib ``datetime.timezone`` objects are supported. Example: - >>> from datetime import timedelta - >>> at = AvailableAt(timedelta(hours=18)) + >>> from datetime import time + >>> at = AvailableAt(day_offset=-1, time_of_day=time(6, 0)) >>> str(at) 'D-1T0600' >>> at.apply(datetime(2026, 3, 6)) datetime.datetime(2026, 3, 5, 6, 0) """ - def __init__(self, lag_from_day: timedelta, *, tzinfo: pytz.BaseTzInfo | None = None): - """Initializes with a lag from the reference day start. + def __init__(self, day_offset: int, time_of_day: time, *, tzinfo: dt_tzinfo | None = None): + """Initialise with a day offset and time of day. Args: - lag_from_day: Timedelta representing how far before the reference day - the data becomes available. - tzinfo: Optional pytz timezone for the availability time - (e.g. ``pytz.timezone("Europe/Amsterdam")``, ``pytz.UTC``). + day_offset: Day offset from the reference day (must be ≤ 0). + ``-1`` means "the previous day", ``0`` means "the same day". + time_of_day: Clock time when data becomes available. + tzinfo: Optional timezone for the availability time + (e.g. ``pytz.timezone("Europe/Amsterdam")``, ``pytz.UTC``, + or ``datetime.timezone.utc``). + + Raises: + ValueError: If day_offset is positive. """ - self.lag_from_day = lag_from_day + if day_offset > 0: + msg = f"Day offset must be negative or zero, got {day_offset}" + raise ValueError(msg) + self.day_offset = day_offset + self.time_of_day = time_of_day self.tzinfo = tzinfo - @property - def day_offset(self) -> int: - """Day offset from the reference day (negative or zero).""" - return -int(self.lag_from_day / timedelta(days=1)) - 1 - - @property - def time_of_day(self) -> time: - """Time of day when data becomes available (optionally tz-aware).""" - offset = timedelta(hours=24) - (self.lag_from_day % timedelta(days=1)) - return time( - hour=offset.seconds // 3600, - minute=(offset.seconds // 60) % 60, - tzinfo=self.tzinfo, - ) - def __str__(self) -> str: """Converts to string in ``DnTHHMM`` format (Windows-safe, no colon). Returns: String representation in ``DnTHHMM`` format. """ - t = self.time_of_day - return f"D{self.day_offset}T{t.hour:02}{t.minute:02}" + return f"D{self.day_offset}T{self.time_of_day.hour:02}{self.time_of_day.minute:02}" @classmethod - def from_string(cls, s: str, *, tzinfo: pytz.BaseTzInfo | None = None) -> Self: + def from_string(cls, s: str, *, tzinfo: dt_tzinfo | None = None) -> Self: """Creates an instance from a string in ``DnTHHMM`` or ``DnTHH:MM`` format. Args: s: String in ``DnTHHMM`` or ``DnTHH:MM`` format to parse. - tzinfo: Optional pytz timezone to attach to the parsed instance. + tzinfo: Optional timezone to attach to the parsed instance. Returns: AvailableAt instance parsed from the string. @@ -204,58 +198,42 @@ def from_string(cls, s: str, *, tzinfo: pytz.BaseTzInfo | None = None) -> Self: msg = f"Day offset must be negative or zero, got {days_part}" raise ValueError(msg) - # Calculate lag_from_day - lag_days = -int(days_part) - 1 - time_offset = timedelta(hours=int(hours_part), minutes=int(minutes_part)) - lag_from_day = timedelta(days=lag_days) + (timedelta(hours=24) - time_offset) - - return cls(lag_from_day=lag_from_day, tzinfo=tzinfo) + return cls( + day_offset=int(days_part), + time_of_day=time(hour=int(hours_part), minute=int(minutes_part)), + tzinfo=tzinfo, + ) - def apply(self, date: datetime, *, output_tz: pytz.BaseTzInfo | None = None) -> datetime: + def apply(self, date: datetime) -> datetime: """Apply this availability offset to a reference date. - Computes the actual datetime when data becomes available, given a - reference date. Works with both timezone-aware and naive datetimes. - - Timezone resolution: ``output_tz`` > ``self.tzinfo`` > ``date.tzinfo`` > ``None``. - Timezones must be pytz objects (``pytz.timezone()``, ``pytz.UTC``) — - ``tz.localize()`` is used for DST correctness. + The time-of-day is interpreted in ``self.tzinfo`` (falls back to + ``date.tzinfo``). The result is returned in the reference date's + timezone, or naive when the reference date is naive. + Both pytz and stdlib ``datetime.timezone`` are supported. Args: date: The reference date to apply the availability offset to. - output_tz: Explicit pytz timezone for the result. When set, overrides - ``self.tzinfo`` and ``date.tzinfo``. Returns: - The datetime when data is available. + The datetime when data is available, in the reference date's + timezone (or naive when the reference date is naive). """ result_date = (date + timedelta(days=self.day_offset)).date() - naive_time = self.time_of_day.replace(tzinfo=None) - naive_result = datetime.combine(result_date, naive_time) + naive_result = datetime.combine(result_date, self.time_of_day) - tz = output_tz if output_tz is not None else (self.tzinfo if self.tzinfo is not None else date.tzinfo) - - if tz is None: + source_tz = self.tzinfo or date.tzinfo + if source_tz is None: return naive_result - return tz.localize(naive_result) # type: ignore[union-attr] - - @classmethod - @override - def validate(cls, v: Self | str | timedelta, _info: Any = None) -> Self: - """Validates and converts various input types to AvailableAt. - - Args: - v: Value to validate (AvailableAt, string, or timedelta). - _info: Additional validation info (unused). + if isinstance(source_tz, pytz.BaseTzInfo): + aware = source_tz.localize(naive_result) + else: + aware = naive_result.replace(tzinfo=source_tz) - Returns: - Validated AvailableAt instance. - """ - if isinstance(v, timedelta): - return cls(lag_from_day=v) - - return super().validate(v, _info) + if date.tzinfo is not None: + return aware.astimezone(date.tzinfo) + return naive_result class Quantile(float): diff --git a/packages/openstef-core/tests/unit/datasets/test_timeseries_dataset.py b/packages/openstef-core/tests/unit/datasets/test_timeseries_dataset.py index 8a72ca53c..e438a72d6 100644 --- a/packages/openstef-core/tests/unit/datasets/test_timeseries_dataset.py +++ b/packages/openstef-core/tests/unit/datasets/test_timeseries_dataset.py @@ -4,12 +4,13 @@ """Tests for TimeSeriesDataset parquet serialization.""" -from datetime import datetime, timedelta +from datetime import datetime, time, timedelta from pathlib import Path from typing import cast import pandas as pd import pytest +import pytz from openstef_core.datasets.timeseries_dataset import TimeSeriesDataset from openstef_core.testing import create_timeseries_dataset @@ -117,8 +118,8 @@ def test_filter_by_available_before( @pytest.mark.parametrize( ("available_at", "expected_values"), [ - (AvailableAt(timedelta(hours=-13)), [10, 20, 30]), - (AvailableAt(timedelta(hours=-15)), [10, 20, 30, 40, 55, 50]), + (AvailableAt(day_offset=0, time_of_day=time(13, 0)), [10, 20, 30]), + (AvailableAt(day_offset=0, time_of_day=time(15, 0)), [10, 20, 30, 40, 55, 50]), ], ) def test_filter_by_available_at( @@ -131,6 +132,33 @@ def test_filter_by_available_at( assert list(filtered.data["value1"]) == expected_values +def test_filter_by_available_at_dst_aware(): + """Cutoff shifts by 1h across CET→CEST transition (2026-03-29).""" + + available_at = AvailableAt(day_offset=-1, time_of_day=time(6, 0), tzinfo=pytz.timezone("Europe/Amsterdam")) + + dataset = TimeSeriesDataset( + data=pd.DataFrame( + data={ + "available_at": pd.to_datetime([ + "2026-03-28T04:30:00+00:00", # before cutoff 05:00 UTC → kept + "2026-03-29T04:30:00+00:00", # after cutoff 04:00 UTC → filtered + ]), + "value": [1, 2], + }, + index=pd.to_datetime([ + "2026-03-29T12:00:00+00:00", # cutoff = Mar 28 06:00 CET = 05:00 UTC + "2026-03-30T12:00:00+00:00", # cutoff = Mar 29 06:00 CEST = 04:00 UTC + ]), + ), + sample_interval=timedelta(hours=24), + ) + + filtered = dataset.filter_by_available_at(available_at) + + assert list(filtered.data["value"]) == [1] + + @pytest.mark.parametrize( ("lead_time", "expected_values"), [ diff --git a/packages/openstef-core/tests/unit/datasets/test_versioned_timeseries_dataset.py b/packages/openstef-core/tests/unit/datasets/test_versioned_timeseries_dataset.py index fae83f8b1..d9053e279 100644 --- a/packages/openstef-core/tests/unit/datasets/test_versioned_timeseries_dataset.py +++ b/packages/openstef-core/tests/unit/datasets/test_versioned_timeseries_dataset.py @@ -4,7 +4,7 @@ """Tests for VersionedTimeSeriesDataset parquet serialization.""" -from datetime import timedelta +from datetime import time, timedelta from pathlib import Path import numpy as np @@ -170,7 +170,7 @@ def test_filter_by_available_before(versioned_dataset: VersionedTimeSeriesDatase def test_filter_by_available_at(versioned_dataset: VersionedTimeSeriesDataset): """Filter dataset using relative availability definition.""" # Arrange - available_at = AvailableAt(timedelta(hours=-13)) + available_at = AvailableAt(day_offset=0, time_of_day=time(13, 0)) # Act filtered = versioned_dataset.filter_by_available_at(available_at) diff --git a/packages/openstef-core/tests/unit/test_types.py b/packages/openstef-core/tests/unit/test_types.py index 34b02f910..d8ddd6ba7 100644 --- a/packages/openstef-core/tests/unit/test_types.py +++ b/packages/openstef-core/tests/unit/test_types.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: MPL-2.0 -from datetime import datetime, time, timedelta +from datetime import UTC, datetime, time, timedelta, timezone import pytest import pytz @@ -47,15 +47,15 @@ def test_lead_time_from_string_roundtrip(input_delta: timedelta): @pytest.mark.parametrize( - ("lag_from_day", "expected_string"), + ("available_at", "expected_string"), [ - pytest.param(timedelta(hours=18), "D-1T0600", id="D-1T0600"), - pytest.param(timedelta(hours=12 + 24), "D-2T1200", id="D-2T1200"), + pytest.param(AvailableAt(day_offset=-1, time_of_day=time(6, 0)), "D-1T0600", id="D-1T0600"), + pytest.param(AvailableAt(day_offset=-2, time_of_day=time(12, 0)), "D-2T1200", id="D-2T1200"), ], ) -def test_available_at_str(lag_from_day: timedelta, expected_string: str): +def test_available_at_str(available_at: AvailableAt, expected_string: str): # Act - result = str(AvailableAt(lag_from_day=lag_from_day)) + result = str(available_at) # Assert assert result == expected_string @@ -64,8 +64,8 @@ def test_available_at_str(lag_from_day: timedelta, expected_string: str): @pytest.mark.parametrize( "available_at", [ - pytest.param(AvailableAt(lag_from_day=timedelta(hours=18)), id="D-1T0600"), - pytest.param(AvailableAt(lag_from_day=timedelta(hours=12 + 24)), id="D-2T1200"), + pytest.param(AvailableAt(day_offset=-1, time_of_day=time(6, 0)), id="D-1T0600"), + pytest.param(AvailableAt(day_offset=-2, time_of_day=time(12, 0)), id="D-2T1200"), ], ) def test_available_at_from_string_roundtrip(available_at: AvailableAt): @@ -73,7 +73,8 @@ def test_available_at_from_string_roundtrip(available_at: AvailableAt): reconstructed = AvailableAt.from_string(str(available_at)) # Assert - assert reconstructed.lag_from_day == available_at.lag_from_day + assert reconstructed.day_offset == available_at.day_offset + assert reconstructed.time_of_day == available_at.time_of_day def test_available_at_from_string_rejects_positive_days_part(): @@ -81,94 +82,99 @@ def test_available_at_from_string_rejects_positive_days_part(): AvailableAt.from_string("D1T0600") +def test_available_at_rejects_positive_day_offset(): + with pytest.raises(ValueError, match="Day offset must be negative or zero"): + AvailableAt(day_offset=1, time_of_day=time(6, 0)) + + _AMS = pytz.timezone("Europe/Amsterdam") @pytest.mark.parametrize( - ("available_at", "reference_date", "output_tz", "expected", "expected_tz"), + ("available_at", "reference_date", "expected"), [ pytest.param( - AvailableAt(lag_from_day=timedelta(hours=18)), + AvailableAt(day_offset=-1, time_of_day=time(6, 0)), datetime(2026, 3, 6), # noqa: DTZ001 - None, datetime(2026, 3, 5, 6, 0), # noqa: DTZ001 - None, id="naive", ), pytest.param( - AvailableAt(lag_from_day=timedelta(hours=36)), - datetime(2026, 3, 6), # noqa: DTZ001 - None, + AvailableAt(day_offset=-2, time_of_day=time(12, 0)), + datetime(2026, 3, 6, 13), # noqa: DTZ001 datetime(2026, 3, 4, 12, 0), # noqa: DTZ001 - None, id="naive_D-2", ), pytest.param( - AvailableAt(lag_from_day=timedelta(hours=18), tzinfo=pytz.UTC), - datetime(2026, 3, 6, tzinfo=pytz.UTC), - None, - datetime(2026, 3, 5, 6, 0, tzinfo=pytz.UTC), - "UTC", - id="utc", + AvailableAt(day_offset=-1, time_of_day=time(6, 0), tzinfo=pytz.UTC), + datetime(2026, 3, 6, tzinfo=UTC), + datetime(2026, 3, 5, 6, 0, tzinfo=UTC), + id="utc_to_utc", ), pytest.param( - AvailableAt(lag_from_day=timedelta(hours=18), tzinfo=_AMS), + AvailableAt(day_offset=-1, time_of_day=time(6, 0), tzinfo=_AMS), datetime(2026, 3, 6), # noqa: DTZ001 - None, - _AMS.localize(datetime(2026, 3, 5, 6, 0)), # noqa: DTZ001 - "Europe/Amsterdam", - id="named_tz", + datetime(2026, 3, 5, 6, 0), # noqa: DTZ001 + id="ams_tz_naive_ref_returns_naive", ), pytest.param( - AvailableAt(lag_from_day=timedelta(hours=18), tzinfo=_AMS), - datetime(2026, 3, 6, tzinfo=pytz.UTC), - None, - _AMS.localize(datetime(2026, 3, 5, 6, 0)), # noqa: DTZ001 - "Europe/Amsterdam", - id="own_tz_over_date_tz", + AvailableAt(day_offset=-1, time_of_day=time(6, 0), tzinfo=UTC), + _AMS.localize(datetime(2026, 3, 6)), # noqa: DTZ001 + # 06:00 UTC = 07:00 CET (March 5 is still winter time, UTC+1) + _AMS.localize(datetime(2026, 3, 5, 7, 0)), # noqa: DTZ001 + id="utc_stdlib_tz_ams_ref_returns_ams", ), pytest.param( - AvailableAt(lag_from_day=timedelta(hours=18)), - datetime(2026, 3, 6, tzinfo=pytz.UTC), - None, - datetime(2026, 3, 5, 6, 0, tzinfo=pytz.UTC), - "UTC", + AvailableAt(day_offset=-1, time_of_day=time(6, 0)), + datetime(2026, 3, 6, tzinfo=UTC), + datetime(2026, 3, 5, 6, 0, tzinfo=UTC), id="fallback_to_date_tz", ), pytest.param( - AvailableAt(lag_from_day=timedelta(hours=18), tzinfo=pytz.UTC), - datetime(2026, 3, 6, tzinfo=pytz.UTC), - _AMS, - _AMS.localize(datetime(2026, 3, 5, 6, 0)), # noqa: DTZ001 - "Europe/Amsterdam", - id="output_tz_overrides", + AvailableAt(day_offset=-1, time_of_day=time(6, 0)), + datetime(2026, 3, 6, tzinfo=timezone(timedelta(hours=1))), + datetime(2026, 3, 5, 6, 0, tzinfo=timezone(timedelta(hours=1))), + id="fallback_to_stdlib_fixed_offset_tz", + ), + # DST transition: clocks spring forward on 2026-03-29 in Europe/Amsterdam + # 06:00 CEST (UTC+2) on Mar 29 = 04:00 UTC + pytest.param( + AvailableAt(day_offset=-1, time_of_day=time(6, 0), tzinfo=_AMS), + datetime(2026, 3, 30, tzinfo=UTC), + datetime(2026, 3, 29, 4, 0, tzinfo=UTC), + id="ams_to_utc_after_dst_switch", + ), + # Day before DST: 06:00 CET (UTC+1) on Mar 28 = 05:00 UTC + pytest.param( + AvailableAt(day_offset=-1, time_of_day=time(6, 0), tzinfo=_AMS), + datetime(2026, 3, 29, tzinfo=UTC), + datetime(2026, 3, 28, 5, 0, tzinfo=UTC), + id="ams_to_utc_before_dst_switch", ), ], ) def test_available_at_apply( available_at: AvailableAt, reference_date: datetime, - output_tz: pytz.BaseTzInfo | None, expected: datetime, - expected_tz: str | None, ): # Act - result = available_at.apply(reference_date, output_tz=output_tz) + result = available_at.apply(reference_date) # Assert assert result == expected - if expected_tz is None: + if reference_date.tzinfo is None: assert result.tzinfo is None else: - assert str(result.tzinfo) == expected_tz + assert result.tzinfo == reference_date.tzinfo def test_available_at_day_offset(): - assert AvailableAt(lag_from_day=timedelta(hours=18)).day_offset == -1 - assert AvailableAt(lag_from_day=timedelta(hours=36)).day_offset == -2 + assert AvailableAt(day_offset=-1, time_of_day=time(6, 0)).day_offset == -1 + assert AvailableAt(day_offset=-2, time_of_day=time(12, 0)).day_offset == -2 def test_available_at_time_of_day(): - assert AvailableAt(lag_from_day=timedelta(hours=18)).time_of_day == time(6, 0) - assert AvailableAt(lag_from_day=timedelta(hours=36)).time_of_day == time(12, 0) - assert AvailableAt(lag_from_day=timedelta(hours=18, minutes=30)).time_of_day == time(5, 30) + assert AvailableAt(day_offset=-1, time_of_day=time(6, 0)).time_of_day == time(6, 0) + assert AvailableAt(day_offset=-2, time_of_day=time(12, 0)).time_of_day == time(12, 0) + assert AvailableAt(day_offset=-1, time_of_day=time(5, 30)).time_of_day == time(5, 30) From a8797bf31a414ebddddfb91725fdce62d7235dc4 Mon Sep 17 00:00:00 2001 From: lschilders Date: Mon, 9 Mar 2026 14:37:00 +0100 Subject: [PATCH 3/5] feature: (de)serialization support for tz-aware AvailableAt --- .../datasets/timeseries_dataset.py | 22 +---- .../openstef-core/src/openstef_core/types.py | 95 ++++++++++++++----- .../openstef-core/tests/unit/test_types.py | 83 +++++++++++++++- 3 files changed, 151 insertions(+), 49 deletions(-) diff --git a/packages/openstef-core/src/openstef_core/datasets/timeseries_dataset.py b/packages/openstef-core/src/openstef_core/datasets/timeseries_dataset.py index 2a142c1ad..bdc2544da 100644 --- a/packages/openstef-core/src/openstef_core/datasets/timeseries_dataset.py +++ b/packages/openstef-core/src/openstef_core/datasets/timeseries_dataset.py @@ -266,27 +266,7 @@ def filter_by_available_at(self, available_at: AvailableAt) -> Self: if available_at_series is None: return self - source_tz = available_at.tzinfo - - if source_tz is not None and self.index.tz is not None: - # DST-correct: interpret the date in AvailableAt's timezone - index_in_tz = self.index.tz_convert(source_tz) - else: - index_in_tz = self.index - - # Step 1: cutoff date = reference date + day_offset (mirrors apply()) - cutoff_day = index_in_tz.floor("D") + pd.Timedelta(days=available_at.day_offset) - - # Step 2: set the time of day (mirrors datetime.combine in apply()) - cutoff = cutoff_day + pd.Timedelta( - hours=available_at.time_of_day.hour, - minutes=available_at.time_of_day.minute, - ) - - # Convert back to the data's timezone for comparison - if source_tz is not None and self.index.tz is not None: - cutoff = cutoff.tz_convert(self.index.tz) - + cutoff = available_at.apply_index(self.index) data_filtered = self.data[available_at_series <= cutoff] return self._copy_with_data(data=data_filtered) diff --git a/packages/openstef-core/src/openstef_core/types.py b/packages/openstef-core/src/openstef_core/types.py index d312d7c0a..8c67f9da3 100644 --- a/packages/openstef-core/src/openstef_core/types.py +++ b/packages/openstef-core/src/openstef_core/types.py @@ -9,13 +9,16 @@ key domain types like lead times, availability timestamps, and quantile values. """ +from __future__ import annotations + import re from datetime import datetime, time, timedelta -from datetime import tzinfo as dt_tzinfo +from datetime import timezone as dt_timezone from enum import StrEnum from functools import total_ordering from typing import Any, Literal, Self, override +import pandas as pd import pytz from pydantic import GetCoreSchemaHandler, TypeAdapter from pydantic_core import CoreSchema, core_schema @@ -126,25 +129,30 @@ class AvailableAt(PydanticStringPrimitive): - *n* is the day offset (negative or zero) - *HHMM* is the time of day - For example, ``D-1T0600`` means "6:00 AM on the previous day". + An optional timezone suffix ``[Region/City]`` (RFC 9557 bracket + notation) makes the availability time timezone-aware. Both pytz + and stdlib ``datetime.timezone`` objects are accepted; they + round-trip through the IANA name via ``str(tz)`` / + ``pytz.timezone(name)``. + + For example, ``D-1T0600[Europe/Amsterdam]`` means "6:00 + Europe/Amsterdam on the previous day". The legacy ``DnTHH:MM`` format (with colon) is also accepted by ``from_string()``. - An optional timezone can be attached so that the time-of-day is - interpreted in that timezone (important for DST correctness). - Both pytz and stdlib ``datetime.timezone`` objects are supported. - Example: >>> from datetime import time + >>> import pytz >>> at = AvailableAt(day_offset=-1, time_of_day=time(6, 0)) >>> str(at) 'D-1T0600' - >>> at.apply(datetime(2026, 3, 6)) - datetime.datetime(2026, 3, 5, 6, 0) + >>> tz_at = AvailableAt(day_offset=-1, time_of_day=time(6, 0), tzinfo=pytz.timezone('Europe/Amsterdam')) + >>> str(tz_at) + 'D-1T0600[Europe/Amsterdam]' """ - def __init__(self, day_offset: int, time_of_day: time, *, tzinfo: dt_tzinfo | None = None): - """Initialise with a day offset and time of day. + def __init__(self, day_offset: int, time_of_day: time, *, tzinfo: pytz.BaseTzInfo | dt_timezone | None = None): + """Initialise with a day offset, time of day, and optional timezone. Args: day_offset: Day offset from the reference day (must be ≤ 0). @@ -165,20 +173,25 @@ def __init__(self, day_offset: int, time_of_day: time, *, tzinfo: dt_tzinfo | No self.tzinfo = tzinfo def __str__(self) -> str: - """Converts to string in ``DnTHHMM`` format (Windows-safe, no colon). + """Converts to string in ``DnTHHMM`` or ``DnTHHMM[tz]`` format. Returns: - String representation in ``DnTHHMM`` format. + String representation, with optional ``[timezone]`` suffix. """ - return f"D{self.day_offset}T{self.time_of_day.hour:02}{self.time_of_day.minute:02}" + base = f"D{self.day_offset}T{self.time_of_day.hour:02}{self.time_of_day.minute:02}" + if self.tzinfo is not None: + return f"{base}[{self.tzinfo}]" + return base @classmethod - def from_string(cls, s: str, *, tzinfo: dt_tzinfo | None = None) -> Self: - """Creates an instance from a string in ``DnTHHMM`` or ``DnTHH:MM`` format. + def from_string(cls, s: str) -> Self: + """Creates an instance from a string in ``DnTHHMM[tz]`` format. + + Accepts an optional ``[Region/City]`` timezone suffix. The + legacy colon format ``DnTHH:MM`` is also accepted. Args: - s: String in ``DnTHHMM`` or ``DnTHH:MM`` format to parse. - tzinfo: Optional timezone to attach to the parsed instance. + s: String to parse. Returns: AvailableAt instance parsed from the string. @@ -186,31 +199,36 @@ def from_string(cls, s: str, *, tzinfo: dt_tzinfo | None = None) -> Self: Raises: ValueError: If the string format is invalid or day offset is positive. """ - match = re.match(r"D(-?\d+)T(\d{2}):?(\d{2})", s) + match = re.match(r"D(-?\d+)T(\d{2}):?(\d{2})(?:(Z)|\[([^\]]+)\])?$", s) if not match: error_message = f"Cannot convert {s} to {cls.__name__}" raise ValueError(error_message) - days_part, hours_part, minutes_part = match.groups() + days_part, hours_part, minutes_part, z_part, tz_part = match.groups() - # Day offset must be negative or zero (available before/at the reference day) if int(days_part) > 0: msg = f"Day offset must be negative or zero, got {days_part}" raise ValueError(msg) + if z_part: + resolved_tz = pytz.UTC + elif tz_part: + resolved_tz = pytz.timezone(tz_part) + else: + resolved_tz = None + return cls( day_offset=int(days_part), time_of_day=time(hour=int(hours_part), minute=int(minutes_part)), - tzinfo=tzinfo, + tzinfo=resolved_tz, ) def apply(self, date: datetime) -> datetime: """Apply this availability offset to a reference date. The time-of-day is interpreted in ``self.tzinfo`` (falls back to - ``date.tzinfo``). The result is returned in the reference date's + ``date.tzinfo``). The result is returned in the reference date's timezone, or naive when the reference date is naive. - Both pytz and stdlib ``datetime.timezone`` are supported. Args: date: The reference date to apply the availability offset to. @@ -235,6 +253,35 @@ def apply(self, date: datetime) -> datetime: return aware.astimezone(date.tzinfo) return naive_result + def apply_index(self, index: pd.DatetimeIndex) -> pd.DatetimeIndex: + """Vectorized version of :meth:`apply` for a pandas DatetimeIndex. + + Same timezone logic as :meth:`apply`: the time-of-day is + interpreted in ``self.tzinfo`` (falls back to ``index.tz``), + then converted back to the index's timezone. + + Args: + index: DatetimeIndex of reference dates. + + Returns: + DatetimeIndex of cutoff timestamps, in the same timezone as *index*. + """ + source_tz = self.tzinfo + data_tz = index.tz + + work_index = index.tz_convert(source_tz) if source_tz is not None and data_tz is not None else index + + cutoff = work_index.floor("D") + pd.Timedelta( + days=self.day_offset, + hours=self.time_of_day.hour, + minutes=self.time_of_day.minute, + ) + + if source_tz is not None and data_tz is not None: + cutoff = cutoff.tz_convert(data_tz) + + return cutoff + class Quantile(float): """A float subclass representing a quantile value between 0 and 1. @@ -297,7 +344,7 @@ def format(self) -> str: return f"quantile_P{value:.1f}" @staticmethod - def parse(quantile_str: str) -> "Quantile": + def parse(quantile_str: str) -> Quantile: """Static method to parse a quantile string back to a Quantile object. Args: diff --git a/packages/openstef-core/tests/unit/test_types.py b/packages/openstef-core/tests/unit/test_types.py index d8ddd6ba7..c2636872c 100644 --- a/packages/openstef-core/tests/unit/test_types.py +++ b/packages/openstef-core/tests/unit/test_types.py @@ -49,8 +49,23 @@ def test_lead_time_from_string_roundtrip(input_delta: timedelta): @pytest.mark.parametrize( ("available_at", "expected_string"), [ - pytest.param(AvailableAt(day_offset=-1, time_of_day=time(6, 0)), "D-1T0600", id="D-1T0600"), - pytest.param(AvailableAt(day_offset=-2, time_of_day=time(12, 0)), "D-2T1200", id="D-2T1200"), + pytest.param(AvailableAt(day_offset=-1, time_of_day=time(6, 0)), "D-1T0600", id="no_tz"), + pytest.param(AvailableAt(day_offset=-2, time_of_day=time(12, 0)), "D-2T1200", id="no_tz_D-2"), + pytest.param( + AvailableAt(day_offset=-1, time_of_day=time(6, 0), tzinfo=pytz.UTC), + "D-1T0600[UTC]", + id="pytz_utc", + ), + pytest.param( + AvailableAt(day_offset=-1, time_of_day=time(6, 0), tzinfo=pytz.timezone("Europe/Amsterdam")), + "D-1T0600[Europe/Amsterdam]", + id="named_tz", + ), + pytest.param( + AvailableAt(day_offset=-1, time_of_day=time(6, 0), tzinfo=UTC), + "D-1T0600[UTC]", + id="stdlib_utc", + ), ], ) def test_available_at_str(available_at: AvailableAt, expected_string: str): @@ -64,8 +79,16 @@ def test_available_at_str(available_at: AvailableAt, expected_string: str): @pytest.mark.parametrize( "available_at", [ - pytest.param(AvailableAt(day_offset=-1, time_of_day=time(6, 0)), id="D-1T0600"), - pytest.param(AvailableAt(day_offset=-2, time_of_day=time(12, 0)), id="D-2T1200"), + pytest.param(AvailableAt(day_offset=-1, time_of_day=time(6, 0)), id="no_tz"), + pytest.param(AvailableAt(day_offset=-2, time_of_day=time(12, 0)), id="no_tz_D-2"), + pytest.param( + AvailableAt(day_offset=-1, time_of_day=time(6, 0), tzinfo=pytz.UTC), + id="pytz_utc", + ), + pytest.param( + AvailableAt(day_offset=-1, time_of_day=time(6, 0), tzinfo=pytz.timezone("Europe/Amsterdam")), + id="named_tz", + ), ], ) def test_available_at_from_string_roundtrip(available_at: AvailableAt): @@ -75,6 +98,35 @@ def test_available_at_from_string_roundtrip(available_at: AvailableAt): # Assert assert reconstructed.day_offset == available_at.day_offset assert reconstructed.time_of_day == available_at.time_of_day + if available_at.tzinfo is None: + assert reconstructed.tzinfo is None + else: + assert reconstructed.tzinfo is not None + # Both should resolve to the same zone + assert str(reconstructed.tzinfo) == str(available_at.tzinfo) + + +def test_available_at_from_string_legacy_colon_format(): + """The legacy DnTHH:MM format (with colon) should still be accepted.""" + at = AvailableAt.from_string("D-1T06:00") + assert at.day_offset == -1 + assert at.time_of_day == time(6, 0) + assert at.tzinfo is None + + +@pytest.mark.parametrize( + "tz_str", + [ + pytest.param("UTC", id="pytz_utc"), + pytest.param("Europe/Amsterdam", id="named_tz"), + ], +) +def test_available_at_from_string_legacy_colon_with_tz(tz_str: str): + """Legacy colon format combined with timezone suffix.""" + at = AvailableAt.from_string(f"D-1T06:00[{tz_str}]") + assert at.day_offset == -1 + assert at.time_of_day == time(6, 0) + assert str(at.tzinfo) == tz_str def test_available_at_from_string_rejects_positive_days_part(): @@ -87,6 +139,29 @@ def test_available_at_rejects_positive_day_offset(): AvailableAt(day_offset=1, time_of_day=time(6, 0)) +def test_available_at_from_string_rejects_invalid(): + with pytest.raises(ValueError, match="Cannot convert"): + AvailableAt.from_string("INVALID") + + +def test_available_at_from_string_rejects_trailing_garbage(): + with pytest.raises(ValueError, match="Cannot convert"): + AvailableAt.from_string("D-1T0600INVALID") + + +def test_available_at_from_string_z_suffix(): + """'Z' suffix should parse as UTC.""" + at = AvailableAt.from_string("D-1T0600Z") + assert at.day_offset == -1 + assert at.time_of_day == time(6, 0) + assert at.tzinfo == pytz.UTC + + +def test_available_at_from_string_rejects_invalid_tz(): + with pytest.raises(pytz.UnknownTimeZoneError): + AvailableAt.from_string("D-1T0600[INVALID]") + + _AMS = pytz.timezone("Europe/Amsterdam") From e272e90ad24a32a8928aabeb3427982c5c2a9759 Mon Sep 17 00:00:00 2001 From: lschilders Date: Mon, 9 Mar 2026 14:41:58 +0100 Subject: [PATCH 4/5] feature: improved docstring --- packages/openstef-core/src/openstef_core/types.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/openstef-core/src/openstef_core/types.py b/packages/openstef-core/src/openstef_core/types.py index 8c67f9da3..f820bc5b4 100644 --- a/packages/openstef-core/src/openstef_core/types.py +++ b/packages/openstef-core/src/openstef_core/types.py @@ -143,12 +143,12 @@ class AvailableAt(PydanticStringPrimitive): Example: >>> from datetime import time >>> import pytz - >>> at = AvailableAt(day_offset=-1, time_of_day=time(6, 0)) - >>> str(at) - 'D-1T0600' >>> tz_at = AvailableAt(day_offset=-1, time_of_day=time(6, 0), tzinfo=pytz.timezone('Europe/Amsterdam')) >>> str(tz_at) 'D-1T0600[Europe/Amsterdam]' + >>> at = AvailableAt.from_string("D-1T0600") + >>> at.day_offset, at.time_of_day + (-1, datetime.time(6, 0)) """ def __init__(self, day_offset: int, time_of_day: time, *, tzinfo: pytz.BaseTzInfo | dt_timezone | None = None): From bcf2469c5cc5e0cfa0ebd2347ae7128b9eea6384 Mon Sep 17 00:00:00 2001 From: lschilders Date: Mon, 9 Mar 2026 14:47:21 +0100 Subject: [PATCH 5/5] feature: add tests for apply_index Signed-off-by: lschilders --- .../openstef-core/tests/unit/test_types.py | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/packages/openstef-core/tests/unit/test_types.py b/packages/openstef-core/tests/unit/test_types.py index c2636872c..667bfc700 100644 --- a/packages/openstef-core/tests/unit/test_types.py +++ b/packages/openstef-core/tests/unit/test_types.py @@ -4,6 +4,7 @@ from datetime import UTC, datetime, time, timedelta, timezone +import pandas as pd import pytest import pytz @@ -253,3 +254,67 @@ def test_available_at_time_of_day(): assert AvailableAt(day_offset=-1, time_of_day=time(6, 0)).time_of_day == time(6, 0) assert AvailableAt(day_offset=-2, time_of_day=time(12, 0)).time_of_day == time(12, 0) assert AvailableAt(day_offset=-1, time_of_day=time(5, 30)).time_of_day == time(5, 30) + + +def test_available_at_apply_index_naive(): + """apply_index on a naive DatetimeIndex returns correct naive cutoffs.""" + + index = pd.DatetimeIndex([ + datetime(2026, 3, 6), # noqa: DTZ001 + datetime(2026, 3, 7), # noqa: DTZ001 + ]) + at = AvailableAt(day_offset=-1, time_of_day=time(6, 0)) + + result = at.apply_index(index) + + expected = pd.DatetimeIndex([ + datetime(2026, 3, 5, 6, 0), # noqa: DTZ001 + datetime(2026, 3, 6, 6, 0), # noqa: DTZ001 + ]) + pd.testing.assert_index_equal(result, expected) + + +def test_available_at_apply_index_utc(): + """apply_index on a UTC index with no self.tzinfo falls back to index tz.""" + + index = pd.to_datetime(["2026-03-06T00:00:00+00:00", "2026-03-07T00:00:00+00:00"]) + at = AvailableAt(day_offset=-1, time_of_day=time(6, 0)) + + result = at.apply_index(index) + + expected = pd.to_datetime(["2026-03-05T06:00:00+00:00", "2026-03-06T06:00:00+00:00"]) + pd.testing.assert_index_equal(result, expected) + + +def test_available_at_apply_index_cross_tz_dst(): + """apply_index with AMS tzinfo on UTC index shifts correctly across DST.""" + # Index in UTC, AvailableAt in Europe/Amsterdam + index = pd.to_datetime([ + "2026-03-29T12:00:00+00:00", # cutoff = Mar 28 06:00 CET = 05:00 UTC + "2026-03-30T12:00:00+00:00", # cutoff = Mar 29 06:00 CEST = 04:00 UTC + ]) + at = AvailableAt(day_offset=-1, time_of_day=time(6, 0), tzinfo=_AMS) + + result = at.apply_index(index) + + expected = pd.to_datetime([ + "2026-03-28T05:00:00+00:00", + "2026-03-29T04:00:00+00:00", + ]) + pd.testing.assert_index_equal(result, expected) + assert result.tz == index.tz + + +def test_available_at_apply_index_matches_apply(): + """apply_index results should match element-wise apply() calls.""" + index = pd.to_datetime([ + "2026-03-28T12:00:00+00:00", + "2026-03-29T12:00:00+00:00", + "2026-03-30T12:00:00+00:00", + ]) + at = AvailableAt(day_offset=-1, time_of_day=time(6, 0), tzinfo=_AMS) + + vectorized = at.apply_index(index) + scalar = pd.DatetimeIndex([at.apply(ts.to_pydatetime()) for ts in index]) + + pd.testing.assert_index_equal(vectorized, scalar)