-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
cff3403
commit 1a59fc5
Showing
4 changed files
with
243 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
from models import GenericBlock, TrendBlock, SeasonalBlock, NBeats | ||
from .nbeats import GenericBlock, TrendBlock, SeasonalBlock, NBeats | ||
from .utilities import prep_time_series, prep_multiple_time_series |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
""" | ||
Helper functions for using the NBeatsmodel | ||
""" | ||
|
||
from pandas import DataFrame, Series | ||
import numpy as np | ||
|
||
class InvalidArgumentError(Exception): | ||
"""Used to validate user input""" | ||
pass | ||
|
||
def prep_time_series(data, | ||
lookback:int = 7, | ||
horizon:int = 1) -> (np.ndarray, np.ndarray): | ||
""" | ||
Creates windows and their corresponding labels for each unique time series | ||
in a dataset | ||
E.g. if horizon = 2 and lookback = 3 (default) | ||
Input: [1, 2, 3, 4, 5, 6, 7] -> Output: ([1, 2, 3, 4, 5, 6], [7]) | ||
inputs: | ||
data: univariate time series you want to create windows for. Can be | ||
pandas dataframe, numpy array or list | ||
lookback: multiple of forecast horizon that you want to use for | ||
training window | ||
horizon: how far out into the future you want to predict | ||
returns numpy array of shape (len(data) - lookback * horizon + horizon, | ||
lookback * horizon) (training windows) | ||
and numpy array of shape (len(data) - lookback * horizon + horizon, | ||
lookback * horizon) | ||
""" | ||
|
||
### convert data into numpy array, if necessary | ||
if type(data) == list: | ||
data = np.array(data) | ||
|
||
if type(data) in [DataFrame, Series]: | ||
data = data.values | ||
|
||
if data.ndim > 1: | ||
if data.shape[1] > 1: | ||
raise InvalidArgumentError("""Input should be a univariate time | ||
series with only a single column""") | ||
|
||
# size of training window | ||
backcast_size = lookback * horizon | ||
|
||
# total length of data for training window + horizon | ||
window_step = np.expand_dims(np.arange(backcast_size + horizon), | ||
axis=0) | ||
|
||
# creates index values for data | ||
window_indexes = window_step + np.expand_dims( | ||
np.arange(len(data) - (backcast_size + horizon - 1)), axis=0).T | ||
|
||
windowed_array = data[window_indexes] | ||
|
||
return windowed_array[:, :-horizon], windowed_array[:, -horizon:] | ||
|
||
def prep_multiple_time_series(data, | ||
label_col: str, | ||
data_col: str, | ||
lookback: int = 7, | ||
horizon: int = 1): | ||
""" | ||
Creates training windows for time series that are stacked on top of each | ||
other | ||
Example: | ||
inputs: [['ar', 1] | ||
['ar', 2], | ||
['ar', 3], | ||
['br', 5], | ||
['br', 6], | ||
['br', 7]] | ||
outputs: [[1, 2], [[3], | ||
[5, 6]], [7]] | ||
It treats the values associated with 'ar' and 'br' as separate time series | ||
Arguments: | ||
data: pandas DataFrame that has at least two columns, one that are labels | ||
for each unique time series in your dataset, and another that are the time | ||
series values | ||
label_col: the name of the column that labels each time series | ||
data_col: the column that contains the time series values | ||
lookback: what multiple of your horizon you want your training data to be | ||
eg -- a horizon of 2 and lookback of 5 creates a training window of 10 | ||
horizon: how far into the future you want to predict | ||
""" | ||
# will be used to contain each unique time series inside the dataset | ||
ts_windows = [] | ||
ts_vals = [] | ||
|
||
# labels for each time series within dataset | ||
unique_ts = data[label_col].unique() | ||
|
||
# create windows + labels for each timeseries in the dataset | ||
for label in unique_ts: | ||
query = data[label_col] == label | ||
tmp = data.loc[query, data_col].values | ||
windows, labels = prep_time_series(tmp, lookback, horizon) | ||
ts_windows.append(windows) | ||
ts_vals.append(labels) | ||
|
||
return np.vstack(ts_windows), np.vstack(ts_vals) |