In [1]:
%cd ../

/home/hoanghu/projects/Food-Waste-Optimization/experiments_hoangle


In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, root_mean_squared_error

from utils import Paths

# Load related dim tables

In [3]:
dim_biowaste = pd.read_excel(Paths.dim_biowaste(), index_col=None)
dim_biowaste.head()

Unnamed: 0,date,restaurant,amnt_waste_customer,amnt_waste_coffee,amnt_waste_kitchen,amnt_waste_hall
0,2023-01-02,Chemicum,4.7,1.2,12.0,0.0
1,2023-01-03,Chemicum,5.0,1.4,14.8,0.0
2,2023-01-04,Chemicum,4.15,4.0,7.1,0.0
3,2023-01-05,Chemicum,10.0,3.3,8.5,0.0
4,2023-01-09,Chemicum,7.65,2.1,4.9,0.0


In [4]:
dim_lunches = pd.read_excel(Paths.dim_lucnhes(), index_col=None)
dim_lunches.head()

Unnamed: 0,date,restaurant,category,dish,pcs
0,2023-01-02,Chemicum,fish,Kalapuikot tillikermaviilikast,78
1,2023-01-02,Chemicum,meat,Uunimakkaraa,165
2,2023-01-02,Chemicum,vegan,Marokkolainen linssipata,84
3,2023-01-03,Chemicum,fish,Herkkulohipihvit,105
4,2023-01-03,Chemicum,fish,Kalapuikot tillikermaviilikast,52


In [5]:
dim_dishes = pd.read_excel(Paths.dim_dishes(), index_col=None)

dim_dishes.head()

Unnamed: 0,meal_id,restaurant,category,dish
0,1,Chemicum,chicken,BBQ-Broilerikastiketta
1,2,Chemicum,chicken,Broileria appelsiini-currykastikkeessa
2,3,Chemicum,chicken,Broileria pekonikastikkeessa
3,4,Chemicum,chicken,Broileria pestokastikkeessa
4,5,Chemicum,chicken,Broilerinkoipea


# Create data

## OneHot encode dish name

In [6]:
n = len(dim_dishes)
dim_dishes['embd'] = [embd for embd in np.eye(n)]


dim_dishes.head()

Unnamed: 0,meal_id,restaurant,category,dish,embd
0,1,Chemicum,chicken,BBQ-Broilerikastiketta,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,2,Chemicum,chicken,Broileria appelsiini-currykastikkeessa,"[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,3,Chemicum,chicken,Broileria pekonikastikkeessa,"[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,4,Chemicum,chicken,Broileria pestokastikkeessa,"[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,5,Chemicum,chicken,Broilerinkoipea,"[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ..."


In [7]:
dim_lunches = (
    dim_lunches
    .merge(dim_dishes, on=['restaurant', 'category', 'dish'], how='left')
)
dim_lunches['embd'] = dim_lunches['embd'] * dim_lunches['pcs']
dim_lunches.head()

Unnamed: 0,date,restaurant,category,dish,pcs,meal_id,embd
0,2023-01-02,Chemicum,fish,Kalapuikot tillikermaviilikast,78,33,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,2023-01-02,Chemicum,meat,Uunimakkaraa,165,101,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,2023-01-02,Chemicum,vegan,Marokkolainen linssipata,84,136,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,2023-01-03,Chemicum,fish,Herkkulohipihvit,105,27,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,2023-01-03,Chemicum,fish,Kalapuikot tillikermaviilikast,52,33,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


## Get embedding per day and restaurant 

In [8]:
embds_by_day_res = (
    dim_lunches
    .groupby(['date', 'restaurant'])['embd']
    .sum()
    .reset_index()
)

embds_by_day_res.head()

Unnamed: 0,date,restaurant,embd
0,2023-01-02,Chemicum,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,2023-01-03,Chemicum,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,2023-01-04,Chemicum,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,2023-01-05,Chemicum,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,2023-01-09,Chemicum,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


## Get total waste per day and restaurant

In [9]:
dim_biowaste['waste'] = (
    dim_biowaste['amnt_waste_customer']
    + dim_biowaste['amnt_waste_coffee']
    + dim_biowaste['amnt_waste_kitchen']
    + dim_biowaste['amnt_waste_hall']
)

dim_biowaste.drop(
    columns=['amnt_waste_customer', 'amnt_waste_coffee', 'amnt_waste_kitchen', 'amnt_waste_hall'],
    inplace=True
)

dim_biowaste.head()

Unnamed: 0,date,restaurant,waste
0,2023-01-02,Chemicum,17.9
1,2023-01-03,Chemicum,21.2
2,2023-01-04,Chemicum,15.25
3,2023-01-05,Chemicum,21.8
4,2023-01-09,Chemicum,14.65


## Create single table containing embeddings and biowaste per day and restaurant

In [10]:
fact = (
    embds_by_day_res
    .merge(dim_biowaste, on=['date', 'restaurant'])
)

fact.head()

Unnamed: 0,date,restaurant,embd,waste
0,2023-01-02,Chemicum,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",17.9
1,2023-01-03,Chemicum,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",21.2
2,2023-01-04,Chemicum,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",15.25
3,2023-01-05,Chemicum,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",21.8
4,2023-01-09,Chemicum,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",14.65


# Find waste amount emitted by each dish

In [11]:
model = LinearRegression(positive=True, fit_intercept=False)

X = np.stack(fact['embd'].to_numpy())
y = fact['waste']

model.fit(X, y)

model.coef_

array([3.30498567e-02, 5.57677981e-02, 0.00000000e+00, 0.00000000e+00,
       2.25866202e-01, 1.99135315e-01, 4.69086241e-02, 1.50769908e-01,
       4.52380602e-02, 9.03610049e-02, 4.81421633e-02, 5.01292242e-02,
       8.29600505e-02, 3.53688568e-02, 7.12260909e-02, 5.43041162e-02,
       4.21300479e-02, 7.23562396e-02, 9.83370009e-02, 2.88935239e-02,
       5.46858756e-02, 3.51284474e-02, 3.52924549e-02, 4.46239075e-02,
       1.95139803e-01, 8.03674897e-03, 3.70910609e-02, 2.10597671e-03,
       4.23717054e-02, 0.00000000e+00, 0.00000000e+00, 7.31765330e-02,
       2.06578935e-02, 0.00000000e+00, 5.18250835e-02, 1.06236237e+00,
       6.81757236e-05, 5.03297481e-02, 5.49146782e-02, 9.38073688e-02,
       1.04650450e-01, 0.00000000e+00, 4.15015629e-02, 1.63614981e-01,
       1.02597927e-01, 0.00000000e+00, 7.65628665e-02, 0.00000000e+00,
       8.55829072e+00, 0.00000000e+00, 0.00000000e+00, 8.84767947e-02,
       1.17690833e-01, 4.58406530e-02, 5.59387884e-02, 4.27050793e-02,
      

In [12]:
r2 = r2_score(y, model.predict(X))
rmse = root_mean_squared_error(y, model.predict(X))

print(f"r2   = {r2:.4f}")
print(f"rmse = {rmse:.4f}")

r2   = 0.7105
rmse = 7.8209


In [13]:
# This amount is added to waste of all dishes, which
# is based on the assumption that every dish emits waste
THETA = 1e-2

dim_dishes['waste'] = model.coef_ + THETA

dim_dishes.head()

Unnamed: 0,meal_id,restaurant,category,dish,embd,waste
0,1,Chemicum,chicken,BBQ-Broilerikastiketta,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.04305
1,2,Chemicum,chicken,Broileria appelsiini-currykastikkeessa,"[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.065768
2,3,Chemicum,chicken,Broileria pekonikastikkeessa,"[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.01
3,4,Chemicum,chicken,Broileria pestokastikkeessa,"[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.01
4,5,Chemicum,chicken,Broilerinkoipea,"[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...",0.235866


In [14]:
dim_dishes = dim_dishes[['meal_id', 'waste']]
dim_dishes.to_excel(Paths.pred_biowaste(), index=False)