In [94]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
path = str(Path.cwd().parent)
print(path)
sys.path.insert(1, path)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
c:\Users\Joaquín Amat\Documents\GitHub\skforecast


In [95]:
import pandas as pd
import numpy as np
import os
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.datasets import fetch_dataset
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

In [96]:
# Download data
# ==============================================================================
data = fetch_dataset(
    name="h2o", raw=True, kwargs_read_csv={"names": ["y", "datetime"], "header": 0}
)

# Data preprocessing
# ==============================================================================
data['datetime'] = pd.to_datetime(data['datetime'], format='%Y-%m-%d')
data = data.set_index('datetime')
data = data.asfreq('MS')
data = data['y']
data = data.sort_index()

# Split train-test
# ==============================================================================
steps = 36
data_train = data[:-steps]
data_test  = data[-steps:]

h2o
---
Monthly expenditure ($AUD) on corticosteroid drugs that the Australian health
system had between 1991 and 2008.
Hyndman R (2023). fpp3: Data for Forecasting: Principles and Practice(3rd
Edition). http://pkg.robjhyndman.com/fpp3package/,https://github.com/robjhyndman
/fpp3package, http://OTexts.com/fpp3.
Shape of the dataset: (204, 2)


In [107]:
forecaster = ForecasterAutoreg(
                 regressor = LGBMRegressor(random_state=123, verbose=-1),
                 lags            = 15,
                 differentiation = 1,
                 transformer_y  = StandardScaler()
             )

forecaster.fit(y=data_train)
predictions = forecaster.predict(steps=36)

y_test = forecaster.transformer_y.transform(data_test.to_numpy().reshape(-1, 1)).flatten()
y_true = forecaster.transformer_y.transform(predictions.to_numpy().reshape(-1, 1)).flatten()
y_test = forecaster.differentiator.transform(y_test)[forecaster.differentiation:]
y_true = forecaster.differentiator.transform(y_true)[forecaster.differentiation:]
residuals = y_test - y_true
residuals = np.sort(residuals)

forecaster.set_out_sample_residuals(
    y_true=data_test,
    y_pred = predictions
)
out_sample_residuals_ = np.sort(forecaster.out_sample_residuals_)
np.testing.assert_array_almost_equal(residuals, out_sample_residuals_)


from sklearn.linear_model import LinearRegression

def test_forecaster_set_outsample_residuals_when_transformer_y_and_diferentiation():
    data_train = pd.Series(np.random.normal(loc=0, scale=1, size=100), index=range(100))
    data_test  = pd.Series(np.random.normal(loc=0, scale=1, size=36), index=range(100, 136))
    forecaster = ForecasterAutoreg(
                     regressor       = LinearRegression(),
                     lags            = 5,
                     differentiation = 1,
                     transformer_y   = StandardScaler()
                 )

    forecaster.fit(y=data_train)
    predictions = forecaster.predict(steps=36)
    forecaster.set_out_sample_residuals(
        y_true = data_test,
        y_pred = predictions
    )

    y_test = forecaster.transformer_y.transform(data_test.to_numpy().reshape(-1, 1)).flatten()
    y_true = forecaster.transformer_y.transform(predictions.to_numpy().reshape(-1, 1)).flatten()
    y_test = forecaster.differentiator.transform(y_test)[forecaster.differentiation:]
    y_true = forecaster.differentiator.transform(y_true)[forecaster.differentiation:]
    residuals = y_test - y_true
    residuals = np.sort(residuals)
    out_sample_residuals_ = np.sort(forecaster.out_sample_residuals_)

    np.testing.assert_array_almost_equal(residuals, out_sample_residuals_)

test_forecaster_set_outsample_residuals_when_transformer_y_and_diferentiation()



In [106]:
data_train = pd.Series(np.random.normal(loc=0, scale=1, size=100), index=range(100))
data_test  = pd.Series(np.random.normal(loc=0, scale=1, size=36), index=range(100, 136))
forecaster = ForecasterAutoreg(
                    regressor       = LinearRegression(),
                    lags            = 5,
                    differentiation = 1,
                    transformer_y   = StandardScaler()
                )

forecaster.fit(y=data_train)
predictions = forecaster.predict(steps=36)
forecaster.set_out_sample_residuals(
    y_true = data_test,
    y_pred = predictions
)

y_test = forecaster.transformer_y.transform(data_test.to_numpy().reshape(-1, 1)).flatten()
y_true = forecaster.transformer_y.transform(predictions.to_numpy().reshape(-1, 1)).flatten()
y_test = forecaster.differentiator.transform(y_test)[forecaster.differentiation:]
y_true = forecaster.differentiator.transform(y_true)[forecaster.differentiation:]
residuals = y_test - y_true
residuals = np.sort(residuals)
out_sample_residuals_ = np.sort(forecaster.out_sample_residuals_)

predictions



100    1.083226
101    0.908652
102    1.245104
103    1.019528
104    1.071558
105    1.039518
106    1.137090
107    1.115335
108    1.123438
109    1.117930
110    1.149029
111    1.159826
112    1.170058
113    1.175110
114    1.189643
115    1.202465
116    1.214292
117    1.223804
118    1.235226
119    1.247087
120    1.258880
121    1.269863
122    1.281043
123    1.292468
124    1.304007
125    1.315339
126    1.326626
127    1.337968
128    1.349378
129    1.360756
130    1.372102
131    1.383448
132    1.394818
133    1.406189
134    1.417551
135    1.428908
Name: pred, dtype: float64

In [110]:
forecaster.binner.n_bins_[0]

np.int64(10)

In [118]:
2510//forecaster.binner.n_bins_[0]

np.int64(251)

In [83]:
def print_directory_tree(startpath, indent_level=0, ignore_folders=None):
    if ignore_folders is None:
        ignore_folders = ['__pycache__', 'tests']  # Default folders to ignore

    for item in os.listdir(startpath):
        path = os.path.join(startpath, item)

        # Skip ignored folders
        if os.path.isdir(path) and item in ignore_folders:
            continue

        print(' ' * indent_level + '|-- ' + item)  # Print the item with indent

        # Recursively call for subdirectories
        if os.path.isdir(path):
            print_directory_tree(path, indent_level + 4, ignore_folders)

# Example usage: Set the path of the directory you want to print
directory_path = '/home/ubuntu/varios/skforecast/skforecast'
print_directory_tree(directory_path , ignore_folders=['__pycache__', 'tests', 'htmlcov'])

FileNotFoundError: [WinError 3] The system cannot find the path specified: '/home/ubuntu/varios/skforecast/skforecast'

    
    |-- skforecast
        |-- autoreg
            |-- ForecasterRecursive.py                   
            |-- ForecasterMultiseriesRecursive.py
            |-- ForecasterSarimax.py
            |-- ForecasterEquivalentDate.py             
            |-- __init__.py
        |-- direct
            |-- ForecasterDirect.py
            |-- ForecasterMultivariateDirect.py
            |-- __init__.py
        |-- deep_learning
            |-- ForecasterRnn.py
        |-- base
            |-- ForecasterBase.py
        |-- __init__.py