In [1]:
%cd ../

/home/hoanghu/projects/Food-Waste-Optimization


In [2]:
from pathlib import Path
import json

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from darts.models import LightGBMModel
from darts import TimeSeries
from darts import metrics

Support for Torch based models not available. To enable them, install "darts", "u8darts[torch]" or "u8darts[all]" (with pip); or "u8darts-torch" or "u8darts-all" (with conda).


In [3]:
plt.style.use('seaborn-v0_8')
plt.rcParams.update({'font.size': 8})

In [4]:
path_root_trained_model = Path("trained_models")
path_dir_processed = Path("experiments_hoangle/processed")

path_fact = path_dir_processed / "fact.csv"
path_metrics = path_dir_processed / "metrics.csv"

# Read data

In [5]:
raw = pd.read_csv(path_fact, header=0, parse_dates=[0])

raw.head(5)

Unnamed: 0,date,restaurant,num_fish,num_chicken,num_vegetarian,num_meat,num_NotMapped,num_vegan,num_customer_in,num_customer_out,num_rcpts,amnt_waste_customer,amnt_waste_coffee,amnt_waste_kitchen,amnt_waste_hall
0,2023-01-02,Chemicum,85.0,0.0,0.0,171.0,1.0,91.0,,,272.0,4.7,1.2,12.0,0.0
1,2023-01-03,Chemicum,163.0,0.0,32.0,78.0,1.0,120.0,,,327.0,5.0,1.4,14.8,0.0
2,2023-01-04,Chemicum,70.0,0.0,0.0,218.0,3.0,137.0,,,351.0,4.15,4.0,7.1,0.0
3,2023-01-05,Chemicum,232.0,85.0,0.0,2.0,4.0,178.0,,,437.0,10.0,3.3,8.5,0.0
4,2023-01-06,Chemicum,,,,,,,,,,,,,


# Start building model

In [6]:
RESTAURANTS = raw['restaurant'].unique()

# This cutoff date is used for all 3 forecasting models of 3 corresponding restaurants
# to ensure the datetime of predicted series among 3 restaurants are the same
DATE_START = '2023-01-09'
DATE_END = '2024-05-08'

# This EPS is added to series to avoid 0s
EPS = 1e-4

# This is used for both training (output_chunk_length) and backtesting/predicting (forecast_horizon)
FORECAST_HORIZON = 1

In [7]:
col_tgt = 'num_rcpts'

freq = pd.offsets.BusinessDay()
date_range = pd.date_range(start=DATE_START, end=DATE_END, freq=freq)

df_tgt = pd.DataFrame({'date': date_range})

for restaurant in RESTAURANTS:
    # Create target
    tgt_raw = raw[raw['restaurant'] == restaurant][['date', col_tgt]]
    tgt_raw = tgt_raw[~tgt_raw.isna().any(axis=1)]

    # Create dataframe full dates with data from 'tgt_raw'
    tgt = pd.DataFrame({'date': date_range})
    tgt = tgt.merge(tgt_raw, on='date', how='left')

    # Fill nan values by 0s: because they are holiday
    tgt.fillna(0, inplace=True)

    # rename 
    tgt.rename(
        columns={
            'num_rcpts': f"{restaurant}_rcpts",
        },
        inplace=True
    )

    # merge to main dataframe
    df_tgt = df_tgt.merge(tgt, on='date', how='left')

In [8]:
df_tgt.head()

Unnamed: 0,date,Chemicum_rcpts,Physicum_rcpts,Exactum_rcpts
0,2023-01-09,529.0,125.0,100.0
1,2023-01-10,456.0,173.0,213.0
2,2023-01-11,561.0,201.0,217.0
3,2023-01-12,541.0,197.0,162.0
4,2023-01-13,540.0,185.0,138.0


In [9]:
cols_tgt = [
    'Chemicum_rcpts',
    'Physicum_rcpts',
    'Exactum_rcpts'
]

series_tgt = TimeSeries.from_dataframe(
    df=df_tgt,
    time_col='date',
    freq=freq,
    fill_missing_dates=False,
    value_cols=cols_tgt
)
series_tgt = series_tgt + EPS 

## 2. Build model

In [10]:
add_encoders = {
    'cyclic': {
        'future': ['dayofweek', 'day', 'month']
    },
    'datetime_attribute': {'future': ['dayofweek', 'day', 'month']},
}

model = LightGBMModel(
    lags=7,
    lags_future_covariates=[0],
    add_encoders=add_encoders,
    output_chunk_length=FORECAST_HORIZON,
    verbose=-1
)

## 3. Backtest

In [11]:
rmse_bt, mse_bt = model.backtest(
    series_tgt,
    forecast_horizon=FORECAST_HORIZON,
    metric=[metrics.rmse, metrics.mse],
    metric_kwargs={'component_reduction': None},
    verbose=True,
).tolist()

print(rmse_bt)
print(mse_bt)



  0%|          | 0/339 [00:00<?, ?it/s]

KeyboardInterrupt: 

## 4. Train and save

In [12]:
model.fit(series_tgt)

LightGBMModel(lags=7, lags_past_covariates=None, lags_future_covariates=[0], output_chunk_length=1, output_chunk_shift=0, add_encoders={'cyclic': {'future': ['dayofweek', 'day', 'month']}, 'datetime_attribute': {'future': ['dayofweek', 'day', 'month']}}, likelihood=None, quantiles=None, random_state=None, multi_models=True, use_static_covariates=True, categorical_past_covariates=None, categorical_future_covariates=None, categorical_static_covariates=None, verbose=-1)

In [None]:
path_model = Path("trained_models/receipt/Jul_23_LightBGM.pt")
model.save(path_model)

In [44]:
date_pred = pd.to_datetime("2024-05-10")

n_bdays = len(pd.bdate_range(start='2024-05-09', end=date_pred))

n_bdays

2

In [47]:
model.predict(n_bdays)

Timestamp('2024-05-10 00:00:00')