## Sales_Forecasting_m5 - Forecasting Scope + Data Reshaping

In [1]:
# Import libraries
import pandas as pd

# Load raw data
sales = pd.read_csv("../data/raw/sales_train_validation.csv")
calendar = pd.read_csv("../data/raw/calendar.csv")

In [2]:
# Step 1:Forecasting Scope + Data Reshaping
# Melt sales data (wide â†’ long)
sales_long = sales.melt(
    id_vars=["id", "item_id", "dept_id", "cat_id", "store_id", "state_id"],
    var_name="d",
    value_name="sales"
)

# Merge with calendar to get actual dates
sales_long = sales_long.merge(
    calendar[["d", "date"]],
    on="d",
    how="left"
)

# Convert date to datetime
sales_long["date"] = pd.to_datetime(sales_long["date"])

In [3]:
# Aggregate to Store-Level Daily Sales
store_daily_sales = (
    sales_long
    .groupby(["store_id", "date"], as_index=False)
    .agg({"sales": "sum"})
)

In [4]:
# Save Processed Dataset
store_daily_sales.to_csv(
    "../data/processed/store_daily_sales.csv",
    index=False
)

In [5]:
# Sanity checks
store_daily_sales["store_id"].unique()
store_daily_sales.head()
store_daily_sales.tail()
store_daily_sales.describe()

Unnamed: 0,date,sales
count,19130,19130.0
mean,2013-09-11 00:00:00,3434.156247
min,2011-01-29 00:00:00,0.0
25%,2012-05-21 00:00:00,2506.25
50%,2013-09-11 00:00:00,3228.0
75%,2015-01-02 00:00:00,4091.0
max,2016-04-24 00:00:00,9338.0
std,,1313.723126
