*Lag Feature for Quantity and Amount*

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from math import sqrt

In [2]:
df = pd.read_csv('data\product_weekly_sales.csv', parse_dates= ['week'])

  df = pd.read_csv('data\product_weekly_sales.csv', parse_dates= ['week'])


In [3]:
df = df.sort_values(['ProductCategory', 'week']).reset_index(drop = True)

In [None]:
# create lag for xgboost
for lag in [1,2,3]:
    df[f'lag_qty_{lag}'] = df.groupby('ProductCategory')['Product_Weekly_Quantity'].shift(lag)

for lag in [1,2,3]:
    df[f'lag_amt_{lag}'] = df.groupby('ProductCategory')['ProductWeeklyAmount'].shift(lag)

In [6]:
#  Drop missing lag values
df_model = df.dropna(subset= ['lag_qty_1', 'lag_qty_2', 'lag_qty_3',
                              'lag_amt_1', 'lag_amt_2', 'lag_amt_3',])

In [7]:
le = LabelEncoder()
df_model['ProductCategory_encoded'] = le.fit_transform(df_model['ProductCategory'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_model['ProductCategory_encoded'] = le.fit_transform(df_model['ProductCategory'])


In [8]:
features = ['ProductCategory_encoded',
            'lag_qty_1', 'lag_qty_2', 'lag_qty_3',
            'lag_amt_1', 'lag_amt_2', 'lag_amt_3',]

In [9]:
x = df_model[features]

In [10]:
y_quantity = df_model['Product_Weekly_Quantity']
y_amount = df_model['ProductWeeklyAmount']

In [11]:
# Train test split
x_train, x_test, yq_train, yq_test, ya_train, ya_test = train_test_split(
    x, y_quantity, y_amount, test_size= 0.3, random_state= 42
)

In [12]:
# predict quantity
model_quantity = XGBRegressor(random_state = 42)
model_quantity.fit(x_train, yq_train)

# predict Amount
model_amount = XGBRegressor(random_state = 42)
model_amount.fit(x_train, ya_train)

In [13]:
pred_q = model_quantity.predict(x_test)
pred_a = model_amount.predict(x_test)

In [14]:
rmse_q = sqrt(mean_squared_error(yq_test, pred_q))
rmse_a = sqrt(mean_squared_error(ya_test,pred_a))

In [15]:
print(f"Quantity Prediction RMSE: {round(rmse_q, 2)}")
print(f"Amount Prediction RMSE:   {round(rmse_a, 2)}")

Quantity Prediction RMSE: 137.78
Amount Prediction RMSE:   7739.6


*Prediction*

In [38]:
category_forecast_states = []

In [40]:
for category in df_model['ProductCategory'].unique():
    cat_data = df_model[df_model['ProductCategory'] == category].sort_values('week')

    category_forecast_states.append({
        'ProductCategory': category,
        'ProductCategory_encoded': le.transform([category])[0],
        'lags_qty': [
            cat_data.iloc[-1]['Product_Weekly_Quantity'],
            cat_data.iloc[-2]['Product_Weekly_Quantity'],
            cat_data.iloc[-3]['Product_Weekly_Quantity']
        ],
        'lags_amt': [
            cat_data.iloc[-1]['ProductWeeklyAmount'],
            cat_data.iloc[-2]['ProductWeeklyAmount'],
            cat_data.iloc[-3]['ProductWeeklyAmount']
        ]
    })


In [41]:
category_forecast_states

[{'ProductCategory': 'Books',
  'ProductCategory_encoded': 0,
  'lags_qty': [2452, 2512, 2391],
  'lags_amt': [118114.797901213, 124540.82401201, 118048.24350042]},
 {'ProductCategory': 'Clothing',
  'ProductCategory_encoded': 1,
  'lags_qty': [2401, 2417, 2199],
  'lags_amt': [114742.889113731, 121196.07440369, 111322.29801797]},
 {'ProductCategory': 'Electronics',
  'ProductCategory_encoded': 2,
  'lags_qty': [2097, 2260, 2382],
  'lags_amt': [101153.889714404, 111871.207064478, 118554.417347712]},
 {'ProductCategory': 'Home Decor',
  'ProductCategory_encoded': 3,
  'lags_qty': [2437, 2374, 2383],
  'lags_amt': [119379.469890641, 117977.039406069, 117706.070200763]}]

In [37]:
forecast_input_df = pd.DataFrame(latest_records)

In [None]:
forecast_results = []

for week_ahead in range(1, 13): 
    for state in category_forecast_states:
        features = [
            state['ProductCategory_encoded'],
            state['lags_qty'][0], state['lags_qty'][1], state['lags_qty'][2],
            state['lags_amt'][0], state['lags_amt'][1], state['lags_amt'][2]
        ]

        pred_qty = model_quantity.predict([features])[0]
        pred_amt = model_amount.predict([features])[0]

        forecast_results.append({
            'ProductCategory': state['ProductCategory'],
            'week_ahead': week_ahead,
            'Predicted_Quantity': max(0, pred_qty),
            'Predicted_Amount': max(0, pred_amt)
        })

        state['lags_qty'] = [pred_qty] + state['lags_qty'][:2]
        state['lags_amt'] = [pred_amt] + state['lags_amt'][:2]


In [47]:
forecast_df = pd.DataFrame(forecast_results)
forecast_df = forecast_df.sort_values(['ProductCategory', 'week_ahead']).reset_index(drop=True)

In [48]:
forecast_df.to_csv('data/Product_12_week_forecast.csv' ,index = False)

In [53]:
import joblib

joblib.dump(model_quantity, 'model/xgb_model_quantity.pkl')
joblib.dump(model_amount, 'model/xgb_model_amount.pkl')
joblib.dump(le, 'model/product_category_encoder.pkl')


['model/product_category_encoder.pkl']