In [None]:
!pip install skforecast

In [None]:
import numpy as np
import pandas as pd

# Gráficos
# ==============================================================================
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
plt.rcParams['lines.linewidth'] = 1.5
%matplotlib inline


# Modelado y Forecasting
# ==============================================================================
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregCustom import ForecasterAutoregCustom
from skforecast.ForecasterAutoregDirect import ForecasterAutoregDirect
from skforecast.model_selection import grid_search_forecaster
from skforecast.model_selection import backtesting_forecaster
from sklearn.model_selection import GridSearchCV
from skforecast.utils import save_forecaster
from skforecast.utils import load_forecaster

In [None]:
# Configuración warnings
# ==============================================================================
import warnings
# warnings.filterwarnings('ignore')

In [None]:
file = ('https://raw.githubusercontent.com/JorgeMendiProject/TFM/main/DatasetInterpolado.csv')
#Lectura de datos a partir del archivo .csv             

dataset = pd.read_csv(file,delimiter=';')

dataset.info()

In [None]:
#Se convierte la variable Fecha a Datetime

dataset['Fecha'] = pd.to_datetime(dataset['Fecha'], format='%d/%m/%Y')

dataset.info()

In [None]:
# Se adecua al formato que tiene el archivo base

dataset = dataset.set_index('Fecha')
dataset = dataset.rename(columns={"Fecha":'x',"TasaTPIB":'y'})  
dataset = dataset.asfreq('M')
dataset = dataset.sort_index()
print(f'Número de filas con missing values: {dataset.isnull().any(axis=1).mean()}', '\n')

# Verificar que un índice temporal está completo
# ==============================================================================
(dataset.index == pd.date_range(
                    start = dataset.index.min(),
                    end   = dataset.index.max(),
                    freq  = dataset.index.freq)
).all()


In [None]:
# Separación datos train-test
# ==============================================================================
steps = 12
dataset_train = dataset[:-steps]
dataset_test  = dataset[-steps:]

print(f"Fechas train : {dataset_train.index.min()} --- {dataset_train.index.max()}  (n={len(dataset_train)})")
print(f"Fechas test  : {dataset_test.index.min()} --- {dataset_test.index.max()}  (n={len(dataset_test)})", '\n')

fig, ax = plt.subplots(figsize=(9, 4))
dataset_train['y'].plot(ax=ax, label='train')
dataset_test['y'].plot(ax=ax, label='test')
ax.legend();

In [None]:
# LINEAR REGRESSOR
# Crear y entrenar forecaster
# ==============================================================================
forecaster = ForecasterAutoreg(
                regressor = LinearRegression(),
                lags = 24
                )
forecaster.fit(y=dataset_train['y'])
forecaster

In [None]:
# Predicciones
# ==============================================================================
steps = 12
predicciones = forecaster.predict(steps=steps)
predicciones

In [None]:
# Gráfico Predicciones
# ==============================================================================
fig, ax = plt.subplots(figsize=(9, 4))
dataset_train['y'].plot(ax=ax, label='train')
dataset_test['y'].plot(ax=ax, label='test')
predicciones.plot(ax=ax, label='predicciones')
ax.legend();

In [None]:
# Error test Linear Regressor
# ==============================================================================
error_mse = mean_squared_error(
                y_true = dataset_test['y'],
                y_pred = predicciones
            )

print(f"Error de test (mse): {error_mse}", '\n')

In [None]:
# Crear y entrenar forecaster con mejores hiperparámetros
# ==============================================================================

forecaster = ForecasterAutoreg(
                    regressor = LinearRegression(),
                    lags = [1, 12, 24]
             )

forecaster.fit(y=dataset_train['y'])


In [None]:
# Predicciones
# ==============================================================================
predicciones = forecaster.predict(steps=steps)
print(predicciones)
# Gráfico
# ==============================================================================
fig, ax = plt.subplots(figsize=(9, 4))
dataset_train['y'].plot(ax=ax, label='train')
dataset_test['y'].plot(ax=ax, label='test')
predicciones.plot(ax=ax, label='predicciones')
ax.legend();

# Error de test
# ==============================================================================
error_mse = mean_squared_error(
                y_true = dataset_test['y'],
                y_pred = predicciones
            )

print(f"Error de test (mse) {error_mse}")

In [None]:
# Backtesting
# ==============================================================================
steps = 3
n_backtesting = steps*4 # Se separan para el backtest los últimos 2 años

metrica, predicciones_backtest = backtesting_forecaster(
                                    forecaster         = forecaster,
                                    y                  = dataset['y'],
                                    initial_train_size = len(dataset) - n_backtesting,
                                    fixed_train_size   = False,
                                    steps              = steps,
                                    refit              = True,
                                    metric             = 'mean_squared_error',
                                    verbose            = True
                                 )

print(f"Error de backtest: {metrica}",'\n')


In [None]:
# Gráfico del Backtesting
# ========================
fig, ax = plt.subplots(figsize=(9, 4))
dataset_test['y'].plot(ax=ax, label='test')
predicciones_backtest.plot(ax=ax, label='predicciones_Backtest')
ax.legend();


In [None]:
# Importancia predictores
# ==============================================================================
impotancia = forecaster.get_feature_importance()
impotancia

In [None]:
variables_exog = ['EuriborM', 'ParoT', 'SalarioMedio', 'IPC','CTotalConsumo']
fin_train = '31/12/2018'
forecaster.fit(
    y= dataset.loc[:fin_train:,'y'],
    exog= dataset.loc[:fin_train:, variables_exog]
    ) # Entrenamiento con conjuntos de train
forecaster

In [None]:
exog_siguientes_12meses = dataset_test[variables_exog][:12]

predicciones = forecaster.predict(steps=12, exog=exog_siguientes_12meses)
predicciones

In [None]:
# Backtesting
# ==============================================================================
steps = 3
n_backtesting = steps*4 #Se separan el útltimo año para el backtest
metrica, predicciones_backtest = backtesting_forecaster(
                                    forecaster         = forecaster,
                                    y                  = dataset['y'],
                                    exog               = dataset[variables_exog],
                                    initial_train_size = len(dataset) - n_backtesting,
                                    fixed_train_size   = False,
                                    steps              = steps,
                                    refit              = True,
                                    metric             = 'mean_squared_error',
                                    verbose            = True
                                 )

print(f"Error de backtest: {metrica}",'\n')
predicciones_backtest

In [None]:
# Gráfico del Backtesting
# ========================
fig, ax = plt.subplots(figsize=(9, 4))
dataset_test['y'].plot(ax=ax, label='test')
predicciones_backtest.plot(ax=ax, label='predicciones_backtest')
ax.legend();