Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Fixed training pipeline typehints. #541

Merged
merged 2 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions openstef/model/regressors/flatliner.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ class FlatlinerRegressor(OpenstfRegressor, RegressorMixin):
def __init__(self, quantiles=None):
"""Initialize FlatlinerRegressor.

The model always predicts 0.0, regardless of the input features. The model is
meant to be used for flatliner locations that still expect a prediction while
preserving the prediction interface.
The model always predicts 0.0, regardless of the input features. The model is meant to be used for flatliner
locations that still expect a prediction while preserving the prediction interface.

"""
super().__init__()
self.quantiles = quantiles
Expand Down
2 changes: 1 addition & 1 deletion openstef/model_selection/model_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def split_data_train_validation_test(
validation_fraction: float = 0.15,
back_test: bool = False,
stratification_min_max: bool = True,
) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
"""Split input data into train, test and validation set.

Function for splitting data with features in a train, test and
Expand Down
13 changes: 8 additions & 5 deletions openstef/pipeline/train_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# SPDX-License-Identifier: MPL-2.0
import logging
import os
from typing import Optional, Union
from typing import Optional, Union, Tuple

import pandas as pd
import structlog
Expand Down Expand Up @@ -155,7 +155,7 @@ def train_model_pipeline_core(
input_data: pd.DataFrame,
old_model: OpenstfRegressor = None,
horizons: list[float] = DEFAULT_TRAIN_HORIZONS_HOURS,
) -> Union[
) -> Tuple[
OpenstfRegressor,
Report,
ModelSpecificationDataClass,
Expand Down Expand Up @@ -246,7 +246,9 @@ def train_pipeline_common(
test_fraction: float = 0.0,
backtest: bool = False,
test_data_predefined: pd.DataFrame = pd.DataFrame(),
) -> tuple[OpenstfRegressor, Report, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
) -> tuple[
OpenstfRegressor, Report, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame
]:
"""Common pipeline shared with operational training and backtest training.

Args:
Expand Down Expand Up @@ -314,7 +316,8 @@ def train_pipeline_common(

def train_pipeline_step_load_model(
pj: PredictionJobDataClass, serializer: MLflowSerializer
) -> tuple[OpenstfRegressor, ModelSpecificationDataClass, Union[int, float]]:
) -> Tuple[OpenstfRegressor, ModelSpecificationDataClass, Union[int, float]]:
old_model: Optional[OpenstfRegressor]
try:
old_model, model_specs = serializer.load_model(experiment_name=str(pj.id))
old_model_age = old_model.age # Age attribute is openstef specific
Expand Down Expand Up @@ -509,7 +512,7 @@ def train_pipeline_step_split_data(
test_fraction: float,
backtest: bool = False,
test_data_predefined: pd.DataFrame = pd.DataFrame(),
) -> Union[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
"""The default way to perform train, val, test split.

Args:
Expand Down