# Introduction #

Run this cell to set everything up!

In [None]:
# Setup feedback system
from learntools.core import binder
binder.bind(globals())
from learntools.time_series.ex2 import *

# Setup notebook
import matplotlib.pyplot as plt
import pandas as pd
from fbprophet import Prophet
from pathlib import Path
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.deterministic import CalendarTimeTrend

# Set Matplotlib defaults
plt.style.use("seaborn-whitegrid")
plt.rc("figure", autolayout=True, figsize=(11, 5))
plt.rc(
    "axes",
    labelweight="bold",
    labelsize="large",
    titleweight="bold",
    titlesize=16,
    titlepad=10,
)


# Load 1C data
data_dir = Path("../input/ts-course-data")
df_train = pd.read_csv(data_dir / "1c_train.csv", parse_dates=["date"])

# Aggregate item sales into a single time series
ts = df_train.pivot_table(index="date", values="item_cnt_day", aggfunc="sum")
ts = ts["item_cnt_day"]

# Plot
ts.plot(color="0.25", title="Total Sales", ylabel="items sold");

-------------------------------------------------------------------------------

# 1) Make Trend Plot

Visualize the trend by creating a 365-day moving-average plot with a centered window.

In [None]:
# YOUR CODE HERE
#_UNCOMMENT_IF(PROD)_
#ax = ____

# Check your answer
q_1.check()

In [None]:
# Lines below will give you a hint or solution code
#_COMMENT_IF(PROD)_
q_1.hint()
#_COMMENT_IF(PROD)_
q_1.solution()

In [None]:
#%%RM_IF(PROD)%%
ts.rolling(365, center=True).mean().plot();

q_1.assert_check_passed()

How does this moving-average compare to the trend component created by Prophet? Run the next cell if you'd like to see it again.

In [None]:
df = pd.DataFrame(dict(ds=ts.index, y=ts))
prophet = Prophet(seasonality_mode="multiplicative")
prophet.fit(df)
y_pred = prophet.predict(df)
prophet.plot_components(y_pred);

--------------------------------------------------------------------------------

# 1) Create a Trend Feature

Create a linear trend feature using the `CalendarTimeTrend` function from `statsmodels`.

In [None]:
X = pd.DataFrame(index=ts.index).to_period("D")

# YOUR CODE HERE: Instantiate CalendarTimeTrend
#_UNCOMMENT_IF(PROD)_
#trend = ____
# YOUR CODE HERE: Create the feature and join to X
#_UNCOMMENT_IF(PROD)_
#X = ____


# Check your answer
q_1.check()

In [None]:
# Lines below will give you a hint or solution code
#_COMMENT_IF(PROD)_
q_1.hint()
#_COMMENT_IF(PROD)_
q_1.solution()

In [None]:
#%%RM_IF(PROD)%%
X = pd.DataFrame(index=ts.index).to_period("D")

trend = CalendarTimeTrend(
    freq="D", order=1, constant=False, base_period=X.index[0]
)
X = X.join(trend.in_sample(X.index))


q_1.assert_check_passed()

Run the next cell if you'd like to see the result.

In [None]:
print(X)

--------------------------------------------------------------------------------

# 2) Fit Linear Trend Model

Using the `LinearRegression` model from scikit-learn, fit a linear trend model to `ts` on the feature set `X` you just created.

In [None]:
# YOUR CODE HERE: Create the trend model
#_UNCOMMENT_IF(PROD)_
#trend_model = ____
# YOUR CODE HERE: Fit the model to ts using X
#_UNCOMMENT_IF(PROD)_
#____

# Check your answer
q_2.check()

In [None]:
# Lines below will give you a hint or solution code
#_COMMENT_IF(PROD)_
q_2.hint()
#_COMMENT_IF(PROD)_
q_2.solution()

In [None]:
#%%RM_IF(PROD)%%
trend_model = LinearRegression()
trend_model.fit(X, ts)

q_2.assert_check_passed()

You can see the a plot of the result by running the next cell.

In [None]:
trend_fit = pd.Series(trend_model.predict(X), index=ts.index)
ax = ts.plot(color="0.25", style=".", title="Total Sales", ylabel="items sold")
trend_fit.plot(ax=ax, linewidth=3, label="Linear Trend")
plt.legend();

--------------------------------------------------------------------------------

# 2) Create a Quadratic Trend Feature

Now create a trend feature of order 2.

In [None]:
# YOUR CODE HERE
#_UNCOMMENT_IF(PROD)_
#____

# Check your answer
q_2.check()

In [None]:
# Lines below will give you a hint or solution code
#_COMMENT_IF(PROD)_
q_2.hint()
#_COMMENT_IF(PROD)_
q_2.solution()

In [None]:
#%%RM_IF(PROD)%%
X_2 = pd.DataFrame(index=ts.index).to_period("D")

trend_2 = CalendarTimeTrend(
    freq="D", order=2, constant=False, base_period=X_2.index[0]
)
X_2 = X_2.join(trend_2.in_sample(X_2.index))

q_2.assert_check_passed()

Run the next cell if you'd like to see the result.

In [None]:
print(X_2)

And this cell will show you a plot of the quadratic trend.


In [None]:
trend_model_2 = LinearRegression().fit(X_2, ts)
trend_fit_2 = pd.Series(trend_model_2.predict(X_2), index=ts.index)
ax = ts.plot(color="0.25", style=".", title="Total Sales", ylabel="items sold")
trend_fit_2.plot(ax=ax, linewidth=3, label="Quadratic Trend")
plt.legend();

# 3) Detrend Series

Now, using either of the trend predictions you made, detrend the item sales series and plot the result.

In [None]:
# YOUR CODE HERE
#_UNCOMMENT_IF(PROD)_
#detrended = ____

# Check your answer
q_3.check()

In [None]:
# Lines below will give you a hint or solution code
#_COMMENT_IF(PROD)_
q_3.hint()
#_COMMENT_IF(PROD)_
q_3.solution()

In [None]:
#%%RM_IF(PROD)%%
# Solution 1
detrended = ts - trend_fit
detrended.plot()

# Solution 2
detrended_2 = ts - trend_fit_2
detrended_2.plot()

q_3.assert_check_passed()

Does it appear that the trend model you chose was able to effectively detrend the sales series? You might like to try detrending with the other trend model and comparing the results.

----

# 4) Create Change and Percent Change Series

Create series for the change in sales and the percent change in sales.

In [None]:
# YOUR CODE HERE
#_UNCOMMENT_IF(PROD)_
#ts_change = ____
#_UNCOMMENT_IF(PROD)_
#ts_pct_change = ____

# Check your answer
q_4.check()

In [None]:
# Lines below will give you a hint or solution code
#_COMMENT_IF(PROD)_
q_4.hint()
#_COMMENT_IF(PROD)_
q_4.solution()

In [None]:
#%%RM_IF(PROD)%%
ts_change = ts.diff()
ts_pct_change = ts.pct_change() * 100

q_4.assert_check_passed()

Run the next cell if you'd like to see the results.

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(10, 7))
ts_change.plot(ax=ax1)
ax1.set_title("Change in Sales")
ts_pct_change.plot(ax=ax2)
ax2.set_title("Percent Change in Sales");

# (Optional) Look at Log-Sales

Sometimes a logarithmic transform is effective at stabilizing the changes in a series. The next cell plots the change in store sales after a log-transform. What effect did this transform have on the series of differences?

In [None]:
import numpy as np

fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(10, 7))
ts_change.plot(ax=ax1)
ax1.set_title("Change in Sales")
(np.log1p(ts).diff()).plot(ax=ax2)
ax2.set_title("Change in Log-Sales")

# (Optional) Look at Splines

The trends Prophet creates are actually a little more flexible than what we can easily do with linear regression. You can get the same effect, however, using "splines". The *MARS* algorithm in the `pyearth` library is easy to use.

In [None]:
from pyearth import Earth

trend_model = Earth()
# etc.

# Keep Going #