In [1]:
# Pronostico time trend + dummy seasonal
#
# =============================================================================

#
# Carga de datos
#
import importlib.util
import sys
import functions  #  type: ignore


df_orig = functions.load_data()
df_orig.head()

Unnamed: 0_level_0,yt_true
date,Unnamed: 1_level_1
1946-01-01,890
1946-02-01,992
1946-03-01,979
1946-04-01,959
1946-05-01,1110


In [3]:
#
# Componentes de tendencia lineal
#
df_orig = functions.add_linear_trend_component(df_orig)
df_orig.head()

Unnamed: 0_level_0,yt_true,trend
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1946-01-01,890,0
1946-02-01,992,1
1946-03-01,979,2
1946-04-01,959,3
1946-05-01,1110,4


In [4]:
# Componentes estacionales
#
df_orig = functions.add_month_component(df_orig)
df_orig.head(15)

Unnamed: 0_level_0,yt_true,trend,month
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1946-01-01,890,0,1
1946-02-01,992,1,2
1946-03-01,979,2,3
1946-04-01,959,3,4
1946-05-01,1110,4,5
1946-06-01,1546,5,6
1946-07-01,1539,6,7
1946-08-01,3401,7,8
1946-09-01,2092,8,9
1946-10-01,1436,9,10


In [5]:
# Particionamiento de los datos
#
(
    X_complete,
    y_complete,
    X_train,
    y_train,
    X_test,
    y_test,
) = functions.train_test_split(
    df=df_orig,
    x_columns=["trend", "month"],
    y_column="yt_true",
)

In [6]:
# Especificación del Modelo
#
from sklearn.pipeline import Pipeline  #  type: ignore
from sklearn.preprocessing import (  #  type: ignore
    PolynomialFeatures,
    OneHotEncoder,
    MinMaxScaler,
)
from sklearn.linear_model import LinearRegression  #  type: ignore
from sklearn.compose import ColumnTransformer  #  type: ignore


def create_pipeline(degree):
    pipeline = Pipeline(
        [
            (
                "preprocessor",
                ColumnTransformer(
                    [
                        (
                            "trend",
                            Pipeline(
                                [
                                    ("poly", PolynomialFeatures(degree=degree)),
                                    ("scaler", MinMaxScaler()),
                                ]
                            ),
                            ["trend"],
                        ),
                        ("seasonal", OneHotEncoder(), ["month"]),
                    ]
                ),
            ),
            ("regressor", LinearRegression()),
        ]
    )

    return pipeline