Skip to content

Commit

Permalink
Calendar mapping bug (#70)
Browse files Browse the repository at this point in the history
* Move to ruff for formatting and import sorting

* Add test and fix calendar mapping issue

* Pin pandas to <2.2 for now

* Update changelog

* Add changes to linters/formatters to changelog
  • Loading branch information
BSchilperoort committed Feb 29, 2024
1 parent c0ef954 commit 4c336c0
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 46 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/).

## [Unreleased]

### Changed
- Moved to ruff formatter instead of black ([#70](https://github.com/AI4S2S/lilio/pull/70))
- Do import sorting with ruff instead of isort ([#70](https://github.com/AI4S2S/lilio/pull/70))

### Fixed
- Fixed issue with calendar generation when the (rightmost) target period crossed into the new year ([#70](https://github.com/AI4S2S/lilio/pull/70)).

## 0.4.2 (2024-01-19)
### Changed
- Consistent output type of train-test split as input ([#62](https://github.com/AI4S2S/lilio/pull/62)).
Expand Down
2 changes: 1 addition & 1 deletion lilio/calendar.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,7 @@ def _set_year_range_from_timestamps(self):

# ensure that the input data could always cover the advent calendar
# last date check
if self._map_year(max_year).iloc[0].right > self._last_timestamp:
while self._map_year(max_year).iloc[0].right > self._last_timestamp:
max_year -= 1
# first date check
while self._map_year(min_year).iloc[-1].right <= self._first_timestamp:
Expand Down
4 changes: 2 additions & 2 deletions lilio/resampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def _check_valid_resampling_methods(method: ResamplingMethod):


def _mark_target_period(
input_data: Union[pd.DataFrame, xr.Dataset]
input_data: Union[pd.DataFrame, xr.Dataset],
) -> Union[pd.DataFrame, xr.Dataset]:
"""Mark interval periods that fall within the given number of target periods.
Expand Down Expand Up @@ -77,7 +77,7 @@ def _mark_target_period(


def _resample_bins_constructor(
intervals: Union[pd.Series, pd.DataFrame]
intervals: Union[pd.Series, pd.DataFrame],
) -> pd.DataFrame:
"""Restructures the interval object into a tidy DataFrame.
Expand Down
6 changes: 3 additions & 3 deletions lilio/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@


def check_timeseries(
data: Union[pd.Series, pd.DataFrame, xr.DataArray, xr.Dataset]
data: Union[pd.Series, pd.DataFrame, xr.DataArray, xr.Dataset],
) -> None:
"""Check if input data contains valid time data.
Expand Down Expand Up @@ -96,7 +96,7 @@ def check_empty_intervals(indices_list: list[np.ndarray]) -> None:


def infer_input_data_freq(
data: Union[pd.Series, pd.DataFrame, xr.DataArray, xr.Dataset]
data: Union[pd.Series, pd.DataFrame, xr.DataArray, xr.Dataset],
) -> pd.Timedelta:
"""Infer the frequency of the input data, for comparison with the calendar freq.
Expand Down Expand Up @@ -207,7 +207,7 @@ def convert_interval_to_bounds(data: xr.Dataset) -> xr.Dataset:


def check_reserved_names(
input_data: Union[pd.Series, pd.DataFrame, xr.DataArray, xr.Dataset]
input_data: Union[pd.Series, pd.DataFrame, xr.DataArray, xr.Dataset],
) -> None:
"""Check if reserved names are already in the input data. E.g. "anchor_year"."""
reserved_names_pd = ["anchor_year", "i_interval", "is_target"]
Expand Down
56 changes: 16 additions & 40 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ classifiers = [
dependencies = [
"netcdf4",
"numpy",
"pandas",
"pandas<2.2",
"matplotlib",
"xarray",
"scikit-learn",
Expand All @@ -69,8 +69,6 @@ dev = [
"bump-my-version",
"hatch",
"ruff",
"isort",
"black",
"mypy",
"pytest",
"pytest-cov",
Expand All @@ -96,10 +94,9 @@ features = ["dev", "bokeh"]
lint = [
"ruff check .",
"mypy .",
"isort --check-only --diff .",
"black --check --diff .",
"ruff format . --check",
]
format = ["isort .", "black .", "lint",]
format = ["ruff format .", "ruff check . --fix", "lint",]
test = ["pytest ./lilio/ ./tests/ --doctest-modules",]
coverage = [
"pytest --cov --cov-report term --cov-report xml --junitxml=xunit-result.xml tests/",
Expand All @@ -120,19 +117,19 @@ testpaths = ["tests"]
ignore_missing_imports = true
python_version = "3.10"

[tool.black]
[tool.ruff]
target-version = "py39"
line-length = 88
target-version = ['py39', 'py310', 'py311']
include = '\.pyi?$'
exclude = ["docs", "build"]

[tool.ruff]
[tool.ruff.lint]
select = [
"E", # pycodestyle
"F", # pyflakes
"B", # flake8-bugbear
"D", # pydocstyle
"C90", # mccabe complexity
# "I", # isort (autosort not working correctly, disabled for now).
"I", # isort (autosort not working correctly, disabled for now).
"N", # PEP8-naming
"UP", # pyupgrade (upgrade syntax to current syntax)
"PLE", # Pylint error https://github.com/charliermarsh/ruff#error-ple
Expand All @@ -148,41 +145,20 @@ extend-select = [
ignore = [
"PLR2004", # magic value used in comparsion (i.e. `if ndays == 28: month_is_feb`).
]
# Allow autofix for all enabled rules (when `--fix`) is provided.
fixable = ["A", "B", "C90", "D", "E", "F", "TID252", "UP"]
unfixable = []
line-length = 88
exclude = ["docs", "build"]
# Allow unused variables when underscore-prefixed.
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
target-version = "py39"
pydocstyle.convention = "google"
mccabe.max-complexity = 10

[tool.ruff.per-file-ignores]
[tool.ruff.lint.per-file-ignores]
"tests/**" = ["D"]

[tool.ruff.pydocstyle]
convention = "google"

[tool.ruff.mccabe]
max-complexity = 10

# Configuration for when ruff's import sorting is fixed.
# [tool.ruff.isort]
# known-first-party = ["lilio"]
# force-single-line = true
# lines-after-imports = 2
# no-lines-before = ["future","standard-library","third-party","first-party","local-folder"]

[tool.isort]
py_version=39
skip = [".gitignore", ".dockerignore"]
skip_glob = ["docs/*"]
force_single_line = true
lines_after_imports = 2
no_lines_before = ["FUTURE","STDLIB","THIRDPARTY","FIRSTPARTY","LOCALFOLDER"]
known_first_party = ["lilio"]
src_paths = ["lilio", "tests"]
line_length = 120
[tool.ruff.lint.isort]
known-first-party = ["lilio"]
force-single-line = true
lines-after-imports = 2
no-lines-before = ["future","standard-library","third-party","first-party","local-folder"]

[tool.coverage.run]
branch = true
Expand Down
24 changes: 24 additions & 0 deletions tests/test_calendar.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,30 @@ def test_allow_overlap(self, allow_overlap, expected_anchors):
cal.map_years(2020, 2022)
assert np.array_equal(expected_anchors, cal.get_intervals().index.values)

def test_extra_year_edgecase(self):
"""Weird things can happen when a calendar interval crosses over the new year.
2 years have to be subtracted from the last datapoint's year.
"""
cal = Calendar("12-25")
cal.add_intervals("target", length="1M")
dates_inclusive = pd.date_range(start="2007-01-01", end="2010-01-01", freq="1d")
dates_exclusive = pd.date_range(start="2007-01-01", end="2009-12-31", freq="1d")
test_data_incl = pd.Series(
data=np.zeros(len(dates_inclusive)), index=dates_inclusive
)
test_data_excl = pd.Series(
data=np.zeros(len(dates_exclusive)), index=dates_exclusive
)

cal.map_to_data(test_data_incl)
n_years_inclusive = len(cal.get_intervals())
cal.map_to_data(test_data_excl)
n_years_exclusive = len(cal.get_intervals())

assert n_years_exclusive == 2
assert n_years_inclusive == 2


class TestAnchorKwarg:
correct_inputs = [ # format: (anchor, anchor_fmt, anchor_str)
Expand Down

0 comments on commit 4c336c0

Please sign in to comment.