In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

### Feature Engineering Conclusion
*Before there were 4 features, And now there are 19 features in total including-*
| Category           | Purpose             |
| ------------------ | ------------------- |
| Time features      | Seasonality         |
| Lag features       | Memory              |
| Rolling features   | Trend & volatility  |
| Aggregate features | Indicate-Behavior   |

In [3]:
df = pd.read_csv("../data/processed/featured_data.csv", parse_dates=["date"])

In [4]:
df.dtypes

date               datetime64[ns]
store                       int64
item                        int64
sales                       int64
year                        int64
month                       int64
week                        int64
day                         int64
dayofweek                   int64
is_weekend                  int64
sales_lag_1               float64
sales_lag_7               float64
sales_lag_14              float64
sales_lag_28              float64
rolling_mean_7            float64
rolling_mean_14           float64
rolling_mean_28           float64
store_avg_sales           float64
item_avg_sales            float64
dtype: object

## Data Split - Train & Validation

In [5]:
train_df = df[df["date"] < "2017-01-01"]
val_df   = df[df["date"] >= "2017-01-01"]

In [6]:

def create_sequences(series, window=28):
    X, y = [], []
    for i in range(len(series) - window):
        X.append(series[i:i+window])
        y.append(series[i+window])
    return np.array(X), np.array(y)

train_series = train_df["sales"].values
val_series   = val_df["sales"].values

X_train_seq, y_train_seq = create_sequences(train_series, 28)
X_val_seq, y_val_seq     = create_sequences(val_series, 28)

# reshape for LSTM [samples, timesteps, features]
X_train_seq = X_train_seq.reshape(-1, 28, 1)
X_val_seq   = X_val_seq.reshape(-1, 28, 1)
