In [None]:
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
import numpy as np

from sklearn.linear_model import LinearRegression

#### **Import data**

In [None]:
df = pd.read_csv("store-sales-time-series-forecasting/book_sales.csv", index_col = "Date", parse_dates = ["Date"])
df = df.drop(["Paperback"], axis = 1)
df.head()

#### **Linear regression**

The interesting features that could be used to solve this problem are time and lags. 

In [None]:
df["Time"] = range(0, df.shape[0])

In [None]:
plt.style.use("seaborn-whitegrid")

plt.rc(
    "figure",
    autolayout = True,
    figsize = (11, 4),
    titlesize = 18,
    titleweight = "bold"
)

plt.rc(
    "axes",
    labelweight="bold",
    labelsize="large",
    titleweight="bold",
    titlesize=16,
    titlepad=10,
)

fig, ax = plt.subplots()
ax.plot("Time", "Hardcover", data = df, color = '0.7')
ax = sns.regplot(x = "Time", y = "Hardcover", data = df, ci = 95, scatter_kws = dict(color = "0.25"))
ax.set_title('Time Plot of Hardcover Sales')
ax.grid(False);

In [None]:
lr = LinearRegression()

lr.fit(X = df["Time"].values.reshape(-1, 1), y = df["Hardcover"].values)

lr.coef_[0], lr.intercept_

In [None]:
df["Hardcover"].values

In [None]:
df["Lag_1"] = df.Hardcover.shift(1)
df.head()

In [None]:
# dato che si osserva una correlazione tra la variabile e il suo lag, 
# tale lag dovrebbe essere tenuto in considerazione per le analisi.
# Stiamo tenendo in considerazione una dipendenza seriale: il sales di un
# giorno sarà minore di quello successivo.  

fig, ax = plt.subplots()
ax = sns.regplot(x = "Lag_1", y = "Hardcover", data = df[["Lag_1", "Hardcover"]], ci = 95, scatter_kws = dict(color = "0.25"))
ax.set_title('Lag Plot of Hardcover Sales')
ax.grid(False);

In [None]:
lr = LinearRegression()

lr.fit(X = df["Lag_1"].values[1:].reshape(-1, 1), y = df["Hardcover"].values[1:])

lr.coef_[0], lr.intercept_

In [None]:
lr = LinearRegression()

lr.fit(X = df[["Time", "Lag_1"]].values[1:,:], y = df["Hardcover"].values[1:])

lr.coef_[0], lr.intercept_