In [15]:
import pandas as pd
import numpy as np
from lightgbm import LGBMRegressor
from lightgbm.callback import early_stopping, log_evaluation
from datetime import timedelta

import os
import joblib

In [None]:
# Load preprocessed data
sales_model = pd.read_pickle("../Data/Processed/LightGBM_data.pkl")


In [10]:
# We are only interested intraining FOODS_3_819_WI_3_evaluation
sku_id = 'FOODS_3_819_WI_3_evaluation'
sku_df = sales_model[sales_model['id'] == sku_id].copy()
sku_df = sku_df.sort_values('date').reset_index(drop=True)

In [None]:
# Define features and target
features = ['lag_7', 'lag_28', 'rmean_7', 'rmean_28', 'sell_price',
            'day', 'weekday', 'month', 'year']
X = sales_model[features]
y = sales_model['sales']

X.head()

Unnamed: 0,lag_7,lag_28,rmean_7,rmean_28,sell_price,day,weekday,month,year
855332,1.0,3.0,1.857143,1.392857,2.0,26,5,2,2011
885822,2.0,0.0,2.0,1.357143,2.0,27,6,2,2011
916312,0.0,0.0,2.0,1.428571,2.0,28,0,2,2011
946802,2.0,1.0,2.0,1.428571,2.0,1,1,3,2011
977292,2.0,4.0,2.0,1.464286,2.0,2,2,3,2011


In [12]:
# Train LGBM on full historical data
model = LGBMRegressor(
    objective='regression',
    n_estimators=100,
    learning_rate=0.1,
    max_depth=6,
    random_state=42
)


model.fit(X, y, eval_metric='rmse')

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.578702 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1015
[LightGBM] [Info] Number of data points in the train set: 46544102, number of used features: 9
[LightGBM] [Info] Start training from score 1.422146


In [13]:
# Initialize with last 28 days for lags and rolling values
history = sku_df.iloc[-28:].copy()  
forecast_records = []
last_date = history['date'].iloc[-1]

for i in range(1, 29):
    forecast_date = last_date + timedelta(days=i)

    row = {}
    row['lag_7'] = history['sales'].iloc[-7]
    row['lag_28'] = history['sales'].iloc[0]
    row['rmean_7'] = history['sales'].iloc[-7:].mean()
    row['rmean_28'] = history['sales'].mean()
    row['sell_price'] = history['sell_price'].iloc[-1]

    row['date'] = forecast_date
    row['day'] = forecast_date.day
    row['weekday'] = forecast_date.weekday()
    row['month'] = forecast_date.month
    row['year'] = forecast_date.year

    X_next = pd.DataFrame([row])[features]
    y_next = model.predict(X_next)[0]
    row['sales'] = y_next

    # Store forecast
    forecast_records.append({
        'item_id': sku_id.split('_WI')[0],     # e.g., FOODS_3_819
        'state_id': 'WI',
        'date': forecast_date,
        'forecast': y_next
        # 'quantile': 0.5
    })

    # Update history for next prediction
    history = pd.concat([history, pd.DataFrame([row])], ignore_index=True)

In [16]:
forecast_df = pd.DataFrame(forecast_records)

# Preview
print("Final 28-day point forecast (LGBM):")
print(forecast_df[['date', 'forecast']].to_string(index=False))

# Save forecast
point_forecast_path_folder = "Point Forecasts"
os.makedirs(point_forecast_path_folder, exist_ok=True)

model_path = os.path.join(point_forecast_path_folder, "lgbm_point_forecast.pkl")
joblib.dump(model, model_path)
print(f"✅ Saved point forecast model to {model_path}")

# Save forecast DataFrame
forecast_csv_path = os.path.join(point_forecast_path_folder, "FOODS_3_819_WI_3_forecast.csv")
forecast_df.to_csv(forecast_csv_path, index=False)
print(f"✅ Saved 28-day forecast to {forecast_csv_path}")

Final 28-day point forecast (LGBM):
      date  forecast
2016-05-23  2.091624
2016-05-24  1.843422
2016-05-25  1.829836
2016-05-26  1.708561
2016-05-27  1.799401
2016-05-28  2.071868
2016-05-29  2.091565
2016-05-30  1.775117
2016-05-31  1.701756
2016-06-01  1.865495
2016-06-02  1.865495
2016-06-03  2.059802
2016-06-04  2.361896
2016-06-05  2.388813
2016-06-06  1.938525
2016-06-07  1.800957
2016-06-08  1.800957
2016-06-09  1.800957
2016-06-10  1.956396
2016-06-11  2.383265
2016-06-12  2.383265
2016-06-13  1.932977
2016-06-14  1.800957
2016-06-15  1.800957
2016-06-16  1.722673
2016-06-17  1.839148
2016-06-18  2.251970
2016-06-19  2.246627
✅ Saved point forecast model to Point Forecasts/lgbm_point_forecast.pkl
✅ Saved 28-day forecast to Point Forecasts/FOODS_3_819_WI_3_forecast.csv
