In [None]:
 #!pip install skforecast

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns



In [None]:

# il dataset è tratto dall' open-book:
# "Forecasting principles and pratice" (Rob J Hyndman and George Athanasopoulos)
# https://otexts.com/fpp3/


# 
url = 'https://raw.githubusercontent.com/JoaquinAmatRodrigo/skforecast/master/data/h2o_exog.csv'
df = pd.read_csv(url, sep=',')
display(df)

##  **Split dei dati e analisi**

N.B. Controllare sempre che la data sia nel formato: 1992-04-01

In [None]:

f_train = df[:-36]
f_test  = df[-36:]

f_train_y = f_train['y']
f_test_y  = f_test['y']

# plot
fig, ax=plt.subplots(figsize=(12, 6))
f_train['y'].plot(ax=ax, label='train')
f_test['y'].plot(ax=ax, label='test')
ax.legend();
plt.show()

# **Forecasting**

Per applicare un modello di machine learning ad una serie storica, quest'ultima deve essere trasformata in una matrice di **featrue** in cui ogni valore è correlato alla finestra temporale che lo precede.





https://joaquinamatrodrigo.github.io/skforecast/0.4.3/quick-start/introduction-forecasting.html

In [None]:

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler


from skforecast.ForecasterAutoreg import ForecasterAutoreg

# i 36 mesi di test più i 60 sconosciuti
steps = 36 + 60


# addestramento
forecaster = ForecasterAutoreg(
                regressor =  Ridge(),
                transformer_y = StandardScaler(),
                lags      = 30 # la finestra temporale che definisce le righe della matrice di feature
             )

forecaster.fit(y=f_train_y)


# predizione
f_pred = forecaster.predict(steps=steps)


# plot
fig, ax=plt.subplots(figsize=(12, 6))
f_train['y'].plot(ax=ax, label='train')
f_test['y'].plot(ax=ax, label='test')
f_pred.plot(ax=ax, label='predictions')
ax.legend();


In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score


mse_all = mean_squared_error(f_test_y, f_pred[:-60])
r2_all  = r2_score(f_test_y, f_pred[:-60])
print(" MSE:", mse_all, "  R2:", r2_all) 

 MSE: 0.010635861456363768   R2: 0.7507708719389445


# **Ottimizzazione degli iperparametri**

In [None]:
from skforecast.model_selection import grid_search_forecaster

param_grid = {'alpha': [0.0001, 0.001, 0.01, 0.1, 1., 10.]}
             
lags_grid = [10, 20, 30, 40]

grid_search = grid_search_forecaster(
     forecaster = forecaster,
     y = f_train_y,
     param_grid = param_grid,
     lags_grid  = lags_grid,
     steps      = steps,
     metric      = 'mean_absolute_error',
     initial_train_size = int(len(f_train)*0.5),
     return_best = True,
     verbose = False
)


In [None]:

# addestramento
fa_autoreg = ForecasterAutoreg(regressor = Ridge(alpha=0.001),
                              transformer_y = StandardScaler(),
                              lags = 20)

fa_autoreg.fit(y=f_train_y)


# predizione
fa_pred = fa_autoreg.predict(steps=steps)


# plot
fig, ax=plt.subplots(figsize=(12, 6))
f_train['y'].plot(ax=ax, label='train')
f_test['y'].plot(ax=ax, label='test')
fa_pred.plot(ax=ax, label='predictions')
ax.legend();

In [None]:
mse_all = mean_squared_error(f_test_y, fa_pred[:-60])
r2_all  = r2_score(f_test_y, fa_pred[:-60])
print(" MSE:", mse_all, "  R2:", r2_all) 

 MSE: 0.009775302911010137   R2: 0.7709362583332594
