# Introduction #

Run this cell to set everything up!

In [None]:
# Setup feedback system
from learntools.core import binder
binder.bind(globals())
from learntools.time_series.ex5 import *

# Setup notebook
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.deterministic import (CalendarFourier,
                                           CalendarSeasonality,
                                           CalendarTimeTrend)
from statsmodels.tsa.tsatools import lagmat


# Set Matplotlib defaults
plt.style.use("seaborn-whitegrid")
plt.rc("figure", autolayout=True, figsize=(11, 5))
plt.rc(
    "axes",
    labelweight="bold",
    labelsize="large",
    titleweight="bold",
    titlesize=16,
    titlepad=10,
)


# Utility functions
def add_trend(X, freq="D", order=1):
    base_period = X.index[0]
    trend = CalendarTimeTrend(
        freq=freq, order=order, constant=False, base_period=base_period,
    )
    X = X.join(trend.in_sample(X.index))
    return X


def add_seasonal(X, freq="D", period="W"):
    seasonality = CalendarSeasonality(freq="D", period="W")
    X = X.join(seasonality.in_sample(X.index))
    return X


def add_fourier(X, freq="A", order=52):
    fourier = CalendarFourier(freq=freq, order=order)
    X = X.join(fourier.in_sample(X.index))
    return X


def add_lags(X, y, lags=[1]):
    X_lag = lagmat(y, maxlag=max(lags), use_pandas=True, trim="both")
    X_lag = X_lag.iloc[:, [lag - 1 for lag in lags]]
    X = X.join(X_lag).dropna()
    y = y.loc[X.index]
    return X, y 


# Load 1C data
data_dir = Path("../input/ts-course-data")
df_train = pd.read_csv(data_dir / "1c_train.csv", parse_dates=["date"])

# Aggregate item sales into a single time series
ts_cnt = df_train.pivot_table(index="date", values="item_cnt_day", aggfunc="sum")
ts_cnt = ts_cnt.to_period("D")
ts_cnt = ts_cnt["item_cnt_day"]

ts_valid = ts_cnt.iloc[-90:]
ts_train = ts_cnt.drop(ts_valid.index)

-------------------------------------------------------------------------------

How would you define the forecasting goal in the following scenarios?
1. TODO
2. TODO
3. TODO

# 1) Defining the Forecasting Goal

After you've thought about your answer, run this cell for the solution.

In [None]:
# View the solution (Run this cell to receive credit!)
q_1.check()

-------------------------------------------------------------------------------

Run this cell to create the features for the next question.

In [None]:
def make_deterministic_features(ts):
    X = pd.DataFrame(index=ts.index)
    X = add_trend(X, order=2)
    X = add_seasonal(X)
    X = add_fourier(X, order=52)
    return X


X = make_deterministic_features(ts_cnt)
X_train = X.loc[ts_train.index, :]
X_valid = X.loc[ts_valid.index, :]

y = ts_cnt.copy()
y_train = y.loc[ts_train.index]
y_valid = y.loc[ts_valid.index]

# 2) Deterministic Forecast

Make a forecast on the validation set using only deterministic features.

In [None]:
# YOUR CODE HERE
#_UNCOMMENT_IF(PROD)_
#____

# Check your answer
q_2.check()

In [None]:
# Lines below will give you a hint or solution code
#_COMMENT_IF(PROD)_
q_2.hint()
#_COMMENT_IF(PROD)_
q_2.solution()

In [None]:
#%%RM_IF(PROD)%%
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = pd.Series(model.predict(X_train), index=y_train.index)
y_forecast = pd.Series(model.predict(X_valid), index=y_valid.index)

rmse = np.sqrt(np.nanmean((y_valid - y_forecast) ** 2))
print(f"Validation RMSE: {rmse}")

q_2.assert_check_passed()

Run this cell if you'd like to see a plot of your forecast.

In [None]:
ax = ts_cnt.plot(style=".", color="0.25")
ax = y_pred.plot(ax=ax, color="C0")
y_forecast.plot(ax=ax, color="C3");

-------------------------------------------------------------------------------

Run this cell to set up the training data for this question.

In [None]:
X_det = make_deterministic_features(ts_cnt)
X_train = X_det.loc[ts_train.index, :]
y_train = ts_cnt.loc[X_train.index]
X_train, y_train = add_lags(X_train, y_train, lags=[1, 2, 5])

# 3) Recursive Forecast

Make a forecast on the validation set with lag features using the recursive method.

In [None]:
# YOUR CODE HERE
#_UNCOMMENT_IF(PROD)_
#____

# Check your answer
q_3.check()

In [None]:
# Lines below will give you a hint or solution code
#_COMMENT_IF(PROD)_
q_3.hint()
#_COMMENT_IF(PROD)_
q_3.solution()

In [None]:
#%%RM_IF(PROD)%%
model = LinearRegression().fit(X_train, y_train)
y_pred = pd.Series(model.predict(X_train), index=X_train.index)

for date in y_valid.index:
    X_train = X_det.loc[:date, :]
    y_train[date] = np.nan
    X_train, y_train = add_lags(X_train, y_train, lags=[1, 2, 5])
    y_train[date] = model.predict(X_train.loc[[date], :])[0]

y_fore = y_train[ts_valid.index]
q_3.assert_check_passed()

Run this cell if you'd like to see the forecast.

In [None]:
ax = ts_cnt.plot(style=".", color="0.25")
ax = y_pred.plot(ax=ax, color="C0")
_ = y_fore.plot(ax=ax, color="C3")

-------------------------------------------------------------------------------

# 4) Validate with Backtest

Instead of using a validation set, use the backtesting method to test your model.

# Keep Going #