In [1]:
%cd ../

/home/hoanghu/projects/Food-Waste-Optimization


In [2]:
from pathlib import Path
import json

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
from darts.models import ARIMA, LinearRegressionModel
from darts import TimeSeries
from statsmodels.tsa.stattools import adfuller, kpss

Support for Torch based models not available. To enable them, install "darts", "u8darts[torch]" or "u8darts[all]" (with pip); or "u8darts-torch" or "u8darts-all" (with conda).
  from tqdm.autonotebook import tqdm


In [3]:
plt.style.use('seaborn-v0_8')
plt.rcParams.update({'font.size': 8})

In [4]:
path_root_trained_model = Path("trained_models")
path = "src/data/basic_mvp_data/supersight.xlsx"

# Read dataset

In [5]:
occupancy = pd.read_excel(path, header=0, index_col=None)

occupancy.head()

Unnamed: 0,dateCreated,countIn,countOut,phoneName
0,2024-03-15T04:26:15.000Z,0,1,S63
1,2024-03-15T04:26:24.000Z,1,2,S63
2,2024-03-15T04:26:27.000Z,1,0,S63
3,2024-03-15T05:19:19.000Z,1,0,S63
4,2024-03-15T05:37:09.000Z,1,0,S63


# Processing

In [6]:
phoneName2restaurant = {
    'S163': 'Exactum',
    'S216': 'Chemicum',
    'S217': 'Physicum',
}

hour_start, hour_end = 8, 16

In [7]:
occupancy = occupancy[occupancy['phoneName'].isin(phoneName2restaurant.keys())]
occupancy['restaurant'] = occupancy['phoneName'].apply(lambda x: phoneName2restaurant[x])

# Convert index
occupancy['datetime'] = pd.to_datetime(occupancy['dateCreated']).dt.tz_localize(None)
occupancy = occupancy.set_index('datetime')\
    .rename(columns={
        'countIn': 'num_customer_in',
        'countOut': 'num_customer_out'
    })

# Accumulate num_customer_in, num_customer_out by custom hour
cols = ['num_customer_in', 'num_customer_out', 'restaurant']

freq = pd.offsets.CustomBusinessHour(start=f"{hour_start}:00", end=f"{hour_end}:00")

occupancy = occupancy[cols]\
    .groupby(by='restaurant')\
    .resample('h')\
    .sum()\
    [['num_customer_in', 'num_customer_out']]\
    .reset_index()

occupancy = occupancy[
    (occupancy['datetime'].dt.hour >= hour_start) 
    & (occupancy['datetime'].dt.hour <= hour_end)
]

occupancy.head()

Unnamed: 0,restaurant,datetime,num_customer_in,num_customer_out
0,Chemicum,2024-05-27 13:00:00,5,4
1,Chemicum,2024-05-27 14:00:00,4,1
2,Chemicum,2024-05-27 15:00:00,1,1
3,Chemicum,2024-05-27 16:00:00,1,0
19,Chemicum,2024-05-28 08:00:00,227,129


# Start training

In [8]:
RESTAURANTS = occupancy['restaurant'].unique()

# This cutoff date is used for all 3 forecasting models of 3 corresponding restaurants
# to ensure the datetime of predicted series among 3 restaurants are the same
CUTOFF_DATE = '2024-07-08 00:00:00'

for restaurant in RESTAURANTS:
    # Extract restaurant data
    df_restaurant_raw = occupancy[occupancy['restaurant'] == restaurant][['datetime', 'restaurant', 'num_customer_in']]\
        .drop(columns='restaurant')
    df_restaurant_raw = df_restaurant_raw[~df_restaurant_raw.isna().any(axis=1)]

    dt_start = df_restaurant_raw['datetime'].min()
    dt_end = CUTOFF_DATE

    date_range = pd.date_range(start=dt_start, end=dt_end, freq=freq)

    df_restaurant = pd.DataFrame({'datetime': date_range})
    df_restaurant = df_restaurant.merge(df_restaurant_raw, on='datetime', how='left')

    # Create training series
    series = TimeSeries.from_dataframe(
        df=df_restaurant,
        time_col='datetime',
        freq=freq,
        fill_missing_dates = False,
        value_cols='num_customer_in'
    )

    # Define model
    add_encoders = {
        'cyclic': {
            'future': ['hour', 'dayofweek']
        },
        'datetime_attribute': {'future': ['hour', 'dayofweek']},
    }
    model = ARIMA(add_encoders=add_encoders)

    # Start training
    model.fit(series)

    # Save model
    path_model = path_root_trained_model / "occupancy" / f"{restaurant}.pt"
    path_model.parent.mkdir(exist_ok=True, parents=True)

    model.save(path_model)



In [9]:
series.pd_dataframe()

component,num_customer_in
datetime,Unnamed: 1_level_1
2024-05-27 13:00:00,9.0
2024-05-27 14:00:00,1.0
2024-05-27 15:00:00,0.0
2024-05-28 08:00:00,33.0
2024-05-28 09:00:00,41.0
...,...
2024-07-05 11:00:00,0.0
2024-07-05 12:00:00,0.0
2024-07-05 13:00:00,0.0
2024-07-05 14:00:00,0.0


# Load models and forecast

Predictions are in business day and each hour from 10 AM to 15 PM

In [10]:
add_encoders = {
    'cyclic': {
        'future': ['hour', 'dayofweek']
    },
    'datetime_attribute': {'future': ['hour', 'dayofweek']},
}

num_of_days = 3
NUM_TIMESTAMP_PER_DAY = 9   # Since each day, the predictions' timestamp are 10 AM, 11 AM... 15 PM
num_timesteps = NUM_TIMESTAMP_PER_DAY * num_of_days

models = {'occupancy': {}}
predictions = {}
for restaurant in RESTAURANTS:
    # Load model
    path_model = path_root_trained_model / "occupancy" / f"{restaurant}.pt"

    models['occupancy'][restaurant] = ARIMA(add_encoders=add_encoders).load(path_model)

    # Forecast
    pred = models['occupancy'][restaurant].predict(num_timesteps)

    # Post-process forecasted data
    df_pred = pred.pd_dataframe().reset_index()
    df_pred['datetime'] = df_pred['datetime'].dt.strftime(r"%Y-%m-%d %H:%M:%S")

    for row in df_pred.itertuples():
        if row.datetime not in predictions:
            predictions[row.datetime] = {'datetime': row.datetime}

        predictions[row.datetime][restaurant] = row.num_customer_in

print(json.dumps(list(predictions.values()), indent=2))

[
  {
    "datetime": "2024-07-08 08:00:00",
    "Chemicum": 180.20006671741567,
    "Exactum": 16.32915154888667,
    "Physicum": -2.8331123105694473
  },
  {
    "datetime": "2024-07-08 09:00:00",
    "Chemicum": 146.31696087545288,
    "Exactum": 10.260344838152946,
    "Physicum": -1.856151252357506
  },
  {
    "datetime": "2024-07-08 10:00:00",
    "Chemicum": 102.58315589111123,
    "Exactum": 9.131025927857763,
    "Physicum": -2.299926495298223
  },
  {
    "datetime": "2024-07-08 11:00:00",
    "Chemicum": 46.58971826550237,
    "Exactum": 6.961287959860897,
    "Physicum": -2.3216388637595173
  },
  {
    "datetime": "2024-07-08 12:00:00",
    "Chemicum": 19.507027851479847,
    "Exactum": 4.2330703731400945,
    "Physicum": -2.465218601876245
  },
  {
    "datetime": "2024-07-08 13:00:00",
    "Chemicum": 7.582015778982736,
    "Exactum": 1.6617759003404444,
    "Physicum": -2.3660259423463117
  },
  {
    "datetime": "2024-07-08 14:00:00",
    "Chemicum": 2.896596465111088