In [None]:
# Data Import and Loading.
import pandas as pd
import numpy as np

df = pd.read_csv("../data/merged_retail_sales.csv")

df['Date'] = pd.to_datetime(df['Date'])

df.head()

Unnamed: 0,Store,Dept,Date,Weekly_Sales,IsHoliday,Type,Size,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,CPI,Unemployment,IsHoliday_y
0,1,1,2010-02-05,24924.5,False,A,151315,42.31,2.572,5347.45,192.0,24.6,1481.31,3359.45,211.096358,8.106,False
1,1,1,2010-02-12,46039.49,True,A,151315,38.51,2.548,5347.45,192.0,24.6,1481.31,3359.45,211.24217,8.106,True
2,1,1,2010-02-19,41595.55,False,A,151315,39.93,2.514,5347.45,192.0,24.6,1481.31,3359.45,211.289143,8.106,False
3,1,1,2010-02-26,19403.54,False,A,151315,46.63,2.561,5347.45,192.0,24.6,1481.31,3359.45,211.319643,8.106,False
4,1,1,2010-03-05,21827.9,False,A,151315,46.5,2.625,5347.45,192.0,24.6,1481.31,3359.45,211.350143,8.106,False


In [None]:
# Date-based features.
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Week'] = df['Date'].dt.isocalendar().week
df['DayOfWeek'] = df['Date'].dt.dayofweek  # Monday=0, Sunday=6

In [4]:
# Lag features. Last week’s sales help predict this week’s.
df = df.sort_values(['Store', 'Dept', 'Date'])

df['Sales_Lag_1'] = df.groupby(['Store', 'Dept'])['Weekly_Sales'].shift(1)
df['Sales_Lag_2'] = df.groupby(['Store', 'Dept'])['Weekly_Sales'].shift(2)

In [11]:
# Rolling Average features. Smooths trends and avoids overreacting to one week’s spike.
df['Sales_MA_4'] = df.groupby(['Store', 'Dept'])['Weekly_Sales'].transform(lambda x: x.rolling(window=4).mean())

In [None]:
# Holiday Flag: Already have IsHoliday, but let’s add an intuitive flag. I think this will enhance model interpretability.
df['Holiday_Flag'] = df['IsHoliday'].astype(int)
# 1 if holiday week, 0 otherwise.

In [9]:
# Handle missing values from lag and rolling features.
df.fillna(0, inplace=True)

In [13]:
# Saving the engineered dataset is equally important.
df.to_csv("../data/retail_sales_engineered.csv", index=False)
print("Feature engineering complete. Engineered dataset saved.")

Feature engineering complete. Engineered dataset saved.
