In [1]:
%cd ../

/home/hoanghu/projects/Food-Waste-Optimization


In [16]:
from pathlib import Path
import json

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
from darts.models import ARIMA, LinearRegressionModel
from darts import TimeSeries
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf

In [5]:
plt.style.use('seaborn-v0_8')
plt.rcParams.update({'font.size': 8})

In [6]:
path_root_trained_model = Path("trained_models")
path_processed = Path("experiments_hoangle/processed/fact.csv")

# Read dataset

In [7]:
df = pd.read_csv(path_processed, header=0, parse_dates=[0])

df.head(5)

Unnamed: 0,date,restaurant,num_fish,num_chicken,num_vegetable,num_meat,num_NotMapped,num_vegan,num_customer_in,num_customer_out,num_rcpts,amnt_waste_customer,amnt_waste_coffee,amnt_waste_kitchen,amnt_waste_hall
0,2023-01-02,Chemicum,85.0,0.0,0.0,171.0,1.0,91.0,,,272.0,4.7,1.2,12.0,0.0
1,2023-01-03,Chemicum,163.0,0.0,32.0,78.0,1.0,120.0,,,327.0,5.0,1.4,14.8,0.0
2,2023-01-04,Chemicum,70.0,0.0,0.0,218.0,3.0,137.0,,,351.0,4.15,4.0,7.1,0.0
3,2023-01-05,Chemicum,232.0,85.0,0.0,2.0,4.0,178.0,,,437.0,10.0,3.3,8.5,0.0
4,2023-01-06,Chemicum,,,,,,,,,,,,,


# Train & save models

In [8]:
RESTAURANTS = df['restaurant'].unique()
MEAL_TYPES = ['num_fish', 'num_chicken', 'num_vegetable', 'num_meat', 'num_NotMapped', 'num_vegan']

# This cutoff date is used for all 3 forecasting models of 3 corresponding restaurants
# to ensure the datetime of predicted series among 3 restaurants are the same
CUTOFF_DATE = '2024-05-08'  

In [22]:
freq = pd.offsets.BusinessDay()

for restaurant in RESTAURANTS:
    df_restaurant_raw = df[df['restaurant'] == restaurant][['date', 'restaurant', *MEAL_TYPES]]\
        .drop(columns='restaurant')
    df_restaurant_raw = df_restaurant_raw[~df_restaurant_raw.isna().any(axis=1)]

    # Create dataframe full dates with data from 'df_restaurant_raw'
    date_start = df_restaurant_raw['date'].min()

    date_range = pd.date_range(start=date_start, end=CUTOFF_DATE, freq=freq)

    df_restaurant = pd.DataFrame({'date': date_range})
    df_restaurant = df_restaurant.merge(df_restaurant_raw, on='date', how='left')

    # Fill nan values by column-wise mean
    fillna_vals = df_restaurant.mean(axis=0)

    for waste_type in MEAL_TYPES:
        df_restaurant.fillna({waste_type: fillna_vals[waste_type]}, inplace=True)

    # Create series
    series = TimeSeries.from_dataframe(
        df=df_restaurant,
        time_col='date',
        freq=freq,
        fill_missing_dates = True,
        value_cols=MEAL_TYPES
    )

    # Define model
    add_encoders = {
        'cyclic': {
            'past': ['dayofweek']
        },
        'datetime_attribute': {'past': ['dayofweek']},
    }
    model = LinearRegressionModel(lags=4, lags_past_covariates=5, add_encoders=add_encoders, output_chunk_length=5)

    # Train model
    model.fit(series)

    # Save model
    path_model = path_root_trained_model / "meal" / f"{restaurant}.pt"
    path_model.parent.mkdir(exist_ok=True, parents=True)

    model.save(path_model)

# Load models

In [26]:
add_encoders = {
    'cyclic': {
        'past': ['dayofweek']
    },
    'datetime_attribute': {'past': ['dayofweek']},
}

models = {'meal': {}}

for restaurant in RESTAURANTS:
    path_model = Path(f"trained_models/meal/{restaurant}.pt")

    models['meal'][restaurant] = LinearRegressionModel(
        lags=4,
        lags_past_covariates=5,
        add_encoders=add_encoders,
        output_chunk_length=5
    ).load(path_model)

In [30]:
def _post_process(prediction):
    if prediction <= 0:
        prediction = 0.

    prediction = round(prediction, 2)

    return prediction

In [31]:
num_of_days = 3

predictions = {}

# Forecast the future
for restaurant in RESTAURANTS:
    pred = models['meal'][restaurant].predict(num_of_days)

    df_pred = pred.pd_dataframe().reset_index()
    df_pred['date'] = df_pred['date'].dt.strftime(r"%Y-%m-%d")

    for row in df_pred.itertuples():
        if row.date not in predictions:
            predictions[row.date] = {'date': row.date}

        predictions[row.date][restaurant] = {
            'num_fish': _post_process(row.num_fish),
            'num_chicken': _post_process(row.num_chicken),
            'num_vegetable': _post_process(row.num_vegetable),
            'num_meat': _post_process(row.num_meat),
            'num_NotMapped': _post_process(row.num_NotMapped),
            'num_vegan': _post_process(row.num_vegan),
        }

print(json.dumps(predictions, indent=2))

{
  "2024-05-09": {
    "date": "2024-05-09",
    "Chemicum": {
      "num_fish": 181.17,
      "num_chicken": 199.95,
      "num_vegetable": 0.0,
      "num_meat": 25.63,
      "num_NotMapped": 131.17,
      "num_vegan": 219.86
    },
    "Physicum": {
      "num_fish": 181.17,
      "num_chicken": 199.95,
      "num_vegetable": 0.0,
      "num_meat": 25.63,
      "num_NotMapped": 131.17,
      "num_vegan": 219.86
    },
    "Exactum": {
      "num_fish": 181.17,
      "num_chicken": 199.95,
      "num_vegetable": 0.0,
      "num_meat": 25.63,
      "num_NotMapped": 131.17,
      "num_vegan": 219.86
    }
  },
  "2024-05-10": {
    "date": "2024-05-10",
    "Chemicum": {
      "num_fish": 120.99,
      "num_chicken": 103.42,
      "num_vegetable": 0.0,
      "num_meat": 158.36,
      "num_NotMapped": 137.5,
      "num_vegan": 193.75
    },
    "Physicum": {
      "num_fish": 120.99,
      "num_chicken": 103.42,
      "num_vegetable": 0.0,
      "num_meat": 158.36,
      "num_NotMapped

In [14]:
df_restaurant.head()

Unnamed: 0,date,num_fish,num_chicken,num_vegetable,num_meat,num_NotMapped,num_vegan
0,2023-01-02,85.0,0.0,0.0,171.0,1.0,91.0
1,2023-01-03,163.0,0.0,32.0,78.0,1.0,120.0
2,2023-01-04,70.0,0.0,0.0,218.0,3.0,137.0
3,2023-01-05,232.0,85.0,0.0,2.0,4.0,178.0
4,2023-01-06,211.667647,135.894118,4.455882,119.238235,37.594118,287.326471
