# Introduction #

Run this cell to set everything up!

In [None]:
# Setup feedback system
from learntools.core import binder
binder.bind(globals())
from learntools.time_series.ex3 import *

# Setup notebook
import holidays
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from fbprophet import Prophet
from pathlib import Path
from scipy.signal import periodogram
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.deterministic import CalendarTimeTrend, CalendarFourier, CalendarSeasonality


# Set Matplotlib defaults
plt.style.use("seaborn-whitegrid")
plt.rc("figure", autolayout=True, figsize=(11, 5))
plt.rc(
    "axes",
    labelweight="bold",
    labelsize="large",
    titleweight="bold",
    titlesize=16,
    titlepad=10,
)


def plot_periodogram(ts, freq=None, ax=None):
    if freq is None:
        freq = pd.Timedelta("1 Y") / pd.Timedelta("1 D")
    freqencies, spec_density = periodogram(ts, fs=freq)
    if ax is None:
        _, ax = plt.subplots()
    ax.step(freqencies, spec_density, color="purple")
    ax.set_xscale("log")
    ax.set_xticks([1, 4, 6, 12, 26, 52, 104])
    ax.set_xticklabels(
        [
            "Yearly",
            "Quarterly",
            "Bimonthly",
            "Monthly",
            "Biweekly",
            "Weekly",
            "Semiweekly",
        ],
        rotation=30,
    )
    ax.ticklabel_format(axis="y", style="sci", scilimits=(0, 0))
    ax.set_ylabel("Density")
    return ax


# Load 1C data
data_dir = Path("../input/ts-course-data")
df_train = pd.read_csv(data_dir / "1c_train.csv", parse_dates=["date"])

# Aggregate item sales into a single time series
ts = df_train.pivot_table(index="date", values="item_cnt_day", aggfunc="sum")
ts = ts["item_cnt_day"]
ts = ts.to_period("D")

-------------------------------------------------------------------------------

Examine the following seasonal plots. First, a plot of monthly sales over each year:

In [None]:
y = ts.to_frame()
y["month"] = y.index.month
y["year"] = y.index.year.astype("category")
sns.lineplot(x="month", y="item_cnt_day", hue="year", data=y);

And now a plot of daily sales over each week:

In [None]:
y = ts.to_frame()
y["day"] = y.index.dayofweek
y["week"] = y.index.weekofyear
sns.lineplot(x="day", y="item_cnt_day", hue="week", data=y, ci=None);

# 1) Seasonal Plots

What kind of seasonality do you see in the series of item sales?

In [None]:
# View the solution (Run this cell to receive credit!)
q_1.check()

-------------------------------------------------------------------------------

# 2) Create Seasonal Indicators

Now create seasonal indicators for a weekly period. (Hint: the *frequency* is given in days.)

In [None]:
# YOUR CODE HERE
#_UNCOMMENT_IF(PROD)_
#____

# Check your answer
q_2.check()

In [None]:
# Lines below will give you a hint or solution code
#_COMMENT_IF(PROD)_
q_2.hint()
#_COMMENT_IF(PROD)_
q_2.solution()

In [None]:
#%%RM_IF(PROD)%%
X_season = pd.DataFrame(index=ts.index)

seasonality = CalendarSeasonality(freq="D", period="W")
X_season = X_season.join(seasonality.in_sample(X_season.index))

q_2.assert_check_passed()

In [None]:
print(X_season)

-------------------------------------------------------------------------------

Now look at the periodogram of the sales series:

In [None]:
ax = plot_periodogram(ts)
_ = ax.set_title("Item Sales Frequency Components")

# 3) Create Seasonal Fourier Features

Based on the periodogram above, create Fourier features for an annual (yearly) seasonality with the appropriate order.

In [None]:
# YOUR CODE HERE
#_UNCOMMENT_IF(PROD)_
#____

# Check your answer
q_3.check()

In [None]:
# Lines below will give you a hint or solution code
#_COMMENT_IF(PROD)_
q_3.hint()
#_COMMENT_IF(PROD)_
q_3.solution()

In [None]:
#%%RM_IF(PROD)%%
X_fourier = pd.DataFrame(index=ts.index)

fourier = CalendarFourier(freq="A", order=52)
X_fourier = X_fourier.join(fourier.in_sample(X_fourier.index))

q_3.assert_check_passed()

In [None]:
print(X_fourier)

-------------------------------------------------------------------------------

# 4) Create Holiday Indicators

Join holiday indicators for Russia (`"RU"` country code) to your feature set.

In [None]:
# YOUR CODE HERE
#_UNCOMMENT_IF(PROD)_
#____

# Check your answer
q_4.check()

In [None]:
# Lines below will give you a hint or solution code
#_COMMENT_IF(PROD)_
q_4.hint()
#_COMMENT_IF(PROD)_
q_4.solution()

In [None]:
#%%RM_IF(PROD)%%
# Solution 1
X_holiday = pd.DataFrame(index=ts.index)

ru_holidays = holidays.RU(years=ts.index.year.unique()).items()  # returns pairs (date, name)
date, name = list(zip(*ru_holidays))  # unzip pairs into separate lists

X_holiday = X_holiday.join(pd.Series(
    name,
    index=pd.PeriodIndex(date, freq="D"),
    name="holiday",
))
X_holiday = pd.get_dummies(X_holiday, drop_first=True)


# Solution 2
X_holiday = pd.DataFrame(index=ts.index).to_timestamp()

prophet = Prophet()
prophet.add_country_holidays("RU")
X_holiday = X_holiday.join(
    prophet.construct_holiday_dataframe(
        pd.period_range(start=ts.index[0], end=ts.index[-1], freq="D")
    ).set_index("ds"))
X_holiday = pd.get_dummies(X_holiday, drop_first=True).to_period("D")


q_4.assert_check_passed()

<mark>maybe wrap this into a function for easier use</mark>


In [None]:
print(X_holiday)

-------------------------------------------------------------------------------

# 4) Fit Seasonal Model

Create a seasonal model using scikit-learn's `LinearRegression`.


In [None]:
# YOUR CODE HERE
#_UNCOMMENT_IF(PROD)_
#____

# Check your answer
q_4.check()

In [None]:
# Lines below will give you a hint or solution code
#_COMMENT_IF(PROD)_
q_4.hint()
#_COMMENT_IF(PROD)_
q_4.solution()

In [None]:
#%%RM_IF(PROD)%%
def create_features(ts, trend_order=1, fourier_order=12):
    """Recreates features from earlier questions to ensure consistency."""
    X = pd.DataFrame(index=ts.index)
    trend = CalendarTimeTrend(
        freq="D", order=trend_order, base_period=X.index[0]
    )
    X = X.join(trend.in_sample(X.index))
    seasonality = CalendarSeasonality(freq="D", period="W")
    X = X.join(seasonality.in_sample(X.index))
    fourier = CalendarFourier(freq="A", order=fourier_order)
    X = X.join(fourier.in_sample(X.index))
    # holidays
    # prophet = Prophet()
    # prophet.add_country_holidays("RU")
    # X = X.join(
    #     prophet.construct_holiday_dataframe(
    #         pd.date_range(start=ts.index[0], end=ts.index[-1], freq="D")
    #     ).set_index("ds").to_period("D"))
    # X = pd.get_dummies(X, drop_first=True)
    return X


X = create_features(ts, fourier_order=110)

seasonal_model = LinearRegression()
seasonal_model.fit(X, ts)


q_4.assert_check_passed()

<mark>move `create_features` to learntools import, maybe; need for checking code anyway</mark>

Run the cell below to see the result!

In [None]:
# plot seasonal plus trend
seasonal_fit = pd.Series(seasonal_model.predict(X).reshape(-1), index=ts.index)
ax = ts.plot(color="0.25", style=".", title="Total Sales", ylabel="items sold")
seasonal_fit.plot(ax=ax, label="Linear Trend")
plt.legend();

The weekly and yearly seasonal features appear to be fitting the data well. But what about the holiday indicators?

-------------------------------------------------------------------------------

Run this cell to see the detrended and deseasonalized series.

In [None]:
deseasonalized = ts - seasonal_fit
ax = deseasonalized.plot()

# 5) Examine Deseasonalized Series

Does there appear to be any regular or periodic behavior still present? How could you check?

In [None]:
# View the solution (Run this cell to receive credit!)
q_5.check()

Look now at the periodogram of the deseasonalized series.

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, sharey=True, figsize=(10, 7))
ax1 = plot_periodogram(ts, ax=ax1)
ax1.set_title("Item Sales Frequency Components")
ax2 = plot_periodogram(deseasonalized, ax=ax2);
ax2.set_title("Deseasonalized")
plt.show()

Based on this periodogram, does it seem like your seasonal features captured all (or almost all) of the seasonality in this series? Does it confirm your observations of the deseasonalized series?

You might try varying the number of Fourier components to see what effect that has on the periodogram of the deseasonalized series.

# Keep Going #