In [5]:
import pandas as pd
import  yaml
import warnings
import holidays
from datetime import date

warnings.filterwarnings("ignore")

In [6]:
def load_config(config_path=r'D:\Lumini\NIB7072\Q5_sales_forecasting\Q5_sales_forecasting\config\config.yaml'):
    with open(config_path, 'r') as file:
        return yaml.safe_load(file)
config = load_config()
train_data_path = config['data']['processed']['processed_train_data_path']

### Sales Related Features


In [7]:
grouped_df = pd.read_csv(train_data_path)

In [8]:
# Rolling averages (e.g., 7-day rolling average)
grouped_df['rolling_avg_7d'] = grouped_df.groupby(['store', 'item_dept'])['daily_sales_qty'].transform(lambda x: x.rolling(window=7, min_periods=1).mean())

# Cumulative sales
grouped_df['cumulative_sales'] = grouped_df.groupby(['store', 'item_dept'])['daily_sales_qty'].cumsum()

# Lag features (e.g., sales of the previous day)
grouped_df['lag_1d'] = grouped_df.groupby(['store', 'item_dept'])['daily_sales_qty'].shift(1)

# Fill NaN values resulting from the shift with 0
grouped_df['lag_1d'].fillna(0, inplace=True)

In [9]:
grouped_df.tail()

Unnamed: 0,store,item_dept,date_id,daily_sales_qty,rolling_avg_7d,cumulative_sales,lag_1d
547,ABC,Grocery,12/9/2021,2314.716,2658.681571,237354.228,2345.361
548,ABC,Household,12/9/2021,1006.0,1126.857143,95916.0,981.0
549,XYZ,Beverages,12/9/2021,1104.0,1166.285714,93289.0,1102.0
550,XYZ,Grocery,12/9/2021,3216.296,3652.661714,308569.011,2845.032
551,XYZ,Household,12/9/2021,1049.0,1169.428571,101559.0,848.0


In [10]:
grouped_df['date_id'] = pd.to_datetime(grouped_df['date_id'])

In [11]:
def is_holiday(date):
    uk_holidays = holidays.UnitedKingdom()
    is_holiday = date in uk_holidays
    if is_holiday:
        return 1
    else:
        return 0

In [12]:
### # Day of the week
grouped_df['day_of_week'] = grouped_df['date_id'].dt.dayofweek

# Month
grouped_df['month'] = grouped_df['date_id'].dt.month

# Check if the date is a holiday 
grouped_df['is_holiday'] = grouped_df['date_id'].apply(is_holiday)

# grouped_df['is_holiday'] = grouped_df['date_id'].isin(pd.to_datetime(holidays)).astype(int)


### Outlet-Related Features

In [15]:
outlet_features = pd.read_csv(r'D:\Lumini\NIB7072\Q5_sales_forecasting\Q5_sales_forecasting\data\raw\outlet_info.csv')

# Merge the outlet-related features with the sales data
grouped_df = pd.merge(grouped_df, outlet_features, on='store', how='left')
