The get_sales_related_features function generates a series of sales-related features from the target variable, which contains weekly sales data. It calculates several rolling statistics for a four-week window, including the average sales, standard deviation, minimum, and maximum sales values. Additionally, the function computes sales figures for previous weeks, such as previous week's sales, two-week, three-week, and monthly sales sums. It also calculates outlet-wise minimum and maximum sales.

The function calculates several other features, including the weekly sales change compared to the next and previous weeks, the ratio of sales to the maximum sales in the dataset, and the cumulative sales up to the current week. These features are essential for modeling sales performance and can be used to detect trends and patterns in sales data, helping to forecast future sales more accurately. The final output contains these calculated features for further analysis and model training.

In [1]:
%run ./run_script.ipynb

conf = get_conf()

trans = get_datasources(conf)["trans_info"]
item = get_datasources(conf)["item_info"]
stores = get_datasources(conf)["outlets_info"]

trans = pre_process_transaction_info(trans)
item = pre_process_item_info(item)
store = pre_process_stores_info(stores)

trans_weekly= get_weekly_sales(item, trans)
primary_keys =  create_primary_keys(trans_weekly)
weekly_sales = identify_weekly_sales(trans_weekly, primary_keys)
target_variable = create_target_variable(weekly_sales)

In [2]:
def get_sales_related_features(target_variable):
    
    """
    Generating sales related features 
    
    Args:
        target_variable: Pandas DataFrame
            Target Variable
    
    Returns:
        sales_related_features: Pandas DataFrame
            Sales related features
    """
    
    sales_related_features = target_variable.copy()
    sales_related_features = sales_related_features.sort_values(by="week").reset_index(drop=True)
    
    window = sales_related_features.groupby(["outlet_code", "item_department"]).rolling(window=4)
    
    sales_related_features["fe_avg_4_week_sales"] = window["total_sales_qty"].mean().reset_index(
        level=[0, 1,2],drop=True)
    sales_related_features["fe_4_weeks_std_dev_weekly"] = window["total_sales_qty"].std().reset_index(
        level=[0, 1,2], drop=True)
    sales_related_features["fe_4_weeks_weekly_min_sales"]=window["total_sales_qty"].min().reset_index(
        level=[0, 1,2], drop=True)
    sales_related_features["fe_4_weeks_weekly_max_sales"]=window["total_sales_qty"].max().reset_index(
        level=[0, 1,2], drop=True)
    
    sales_related_features = sales_related_features.sort_values(by=["outlet_code", "item_department",
                                                                    "week"])
    # Previous week sales
    sales_related_features['previous_week_sales']=sales_related_features.groupby(['outlet_code',
                                                                                  'item_department']
                                                                                )['total_sales_qty'].shift(1)
    # Previous 2 weeks sales
    sales_related_features['prev_2_weeks_sales']=sales_related_features.groupby(['outlet_code', 'item_department'])['total_sales_qty'].rolling(window=2).sum().reset_index(level=[0, 1], drop=True)
    
    # Previous 3 weeks sales
    sales_related_features['prev_3_weeks_sales'] = sales_related_features.groupby(['outlet_code', 'item_department'])['total_sales_qty'].rolling(window=3).sum().reset_index(level=[0, 1], drop=True)
    
    # Previous month sales
    sales_related_features['prev_month_sales'] = sales_related_features.groupby(['outlet_code', 'item_department'])['total_sales_qty'].rolling(window=4).sum().reset_index(level=[0, 1], drop=True)
    
    # Outlet-wise minimum and maximum sales
    sales_related_features['outlet_min_sales'] = sales_related_features.groupby('outlet_code')['total_sales_qty'].transform('min')
    sales_related_features['outlet_max_sales'] = sales_related_features.groupby('outlet_code')['total_sales_qty'].transform('max')

    # Calculate the weekly sales change
    sales_related_features['fe_sales_change_vs_next_week'] = sales_related_features['sales_next_week'] - sales_related_features['total_sales_qty']
    
    # Calculate the difference between current week's sales and previous week's sales
    sales_related_features['fe_sales_change_vs_previous_week'] = sales_related_features['total_sales_qty'].diff()
    #sales_related_features['fe_sales_change_vs_previous_week'].fillna(0, inplace=True)
    
    # Calculate the ratio of sales to the maximum sales in the dataset
    sales_related_features['fe_sales_to_max_sales_ratio'] = sales_related_features['total_sales_qty'] / sales_related_features['total_sales_qty'].max()
    
    # Calculate the cumulative sales up to the current week
    sales_related_features['fe_cumulative_sales'] = sales_related_features['total_sales_qty'].cumsum()
    
    sales_related_features = sales_related_features[[
    'outlet_code',
    'item_department',
    'week',
    'fe_avg_4_week_sales',
    'fe_4_weeks_std_dev_weekly',
    'fe_4_weeks_weekly_min_sales',
    'fe_4_weeks_weekly_max_sales',
    'previous_week_sales',
    'prev_2_weeks_sales',
    'prev_3_weeks_sales',
    'prev_month_sales',
    'outlet_min_sales',
    'outlet_max_sales',
    'fe_sales_change_vs_next_week',
    'fe_sales_change_vs_previous_week',
    'fe_sales_to_max_sales_ratio',
    'fe_cumulative_sales'
    ]]
    
    return sales_related_features


In [3]:
sales_related_features = get_sales_related_features(target_variable)
sales_related_features

Unnamed: 0,outlet_code,item_department,week,fe_avg_4_week_sales,fe_4_weeks_std_dev_weekly,fe_4_weeks_weekly_min_sales,fe_4_weeks_weekly_max_sales,previous_week_sales,prev_2_weeks_sales,prev_3_weeks_sales,prev_month_sales,outlet_min_sales,outlet_max_sales,fe_sales_change_vs_next_week,fe_sales_change_vs_previous_week,fe_sales_to_max_sales_ratio,fe_cumulative_sales
0,A,Beverages,2022-01-17,,,,,,,,,40.0,3516.0,744.0,,0.129944,598.0
27,A,Beverages,2022-01-24,1523.50,111.207014,1371.0,1638.0,598.0,1940.0,,,40.0,3516.0,402.0,744.0,0.291612,1940.0
41,A,Beverages,2022-01-31,,,,,1342.0,3086.0,3684.0,,40.0,3516.0,-646.0,402.0,0.378966,3684.0
59,A,Beverages,2022-02-07,273.25,70.863131,194.0,365.0,1744.0,2842.0,4184.0,4782.0,40.0,3516.0,476.0,-646.0,0.238592,4782.0
62,A,Beverages,2022-02-14,217.50,18.448125,194.0,239.0,1098.0,2672.0,4416.0,5758.0,40.0,3516.0,-270.0,476.0,0.342025,6356.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
538,E,Grocery,2022-09-19,60.75,14.407753,50.0,82.0,612.0,1216.0,1912.0,2662.0,0.0,1051.0,65.0,-8.0,0.131247,586765.0
554,E,Grocery,2022-09-26,39.25,4.645787,33.0,44.0,604.0,1273.0,1885.0,2581.0,0.0,1051.0,22.0,65.0,0.145372,587434.0
565,E,Grocery,2022-10-03,0.00,0.000000,0.0,0.0,669.0,1360.0,1964.0,2576.0,0.0,1051.0,46.0,22.0,0.150152,588125.0
582,E,Grocery,2022-10-10,840.00,80.502588,759.0,945.0,691.0,1428.0,2097.0,2701.0,0.0,1051.0,-537.0,46.0,0.160148,588862.0
