In [1]:
%cd ../../

%load_ext autoreload
%autoreload 2

/home/hoanghu/projects/Food-Waste-Optimization


In [2]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
from sklearn.linear_model import Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.preprocessing import TargetEncoder, MinMaxScaler
from sklearn.metrics import root_mean_squared_error
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.decomposition import PCA

# Load data

In [3]:
path = "src/data/basic_mvp_data/Sold lunches.csv"

raw_lunch = pd.read_csv(path, delimiter=';', encoding='utf-8-sig', parse_dates=['Date'], low_memory=False)
raw_lunch.head()

Unnamed: 0,Date,Receipt time,Restaurant,Food Category,Dish,pcs,Hiilijalanjälki
0,2.1.2023,10:31,600 Chemicum,Liha,"Uunimakkaraa,sinappikastiketta",1,9
1,2.1.2023,10:32,600 Chemicum,Kala,Kalapuikot tillikermaviilikast,1,104
2,2.1.2023,10:32,600 Chemicum,Liha,"Uunimakkaraa,sinappikastiketta",1,9
3,2.1.2023,10:35,600 Chemicum,Kala,Kalapuikot tillikermaviilikast,1,104
4,2.1.2023,10:36,600 Chemicum,Liha,"Uunimakkaraa,sinappikastiketta",2,18


# Extract menu per day for each restaurant

In [4]:
lunches = (
    raw_lunch
    .replace(
        {
            '600 Chemicum': 'Chemicum',
            '610 Physicum': 'Physicum',
            '620 Exactum': 'Exactum',
            'Kala': 'fish',
            'Liha': 'meat',
            'Vegaani': 'vegan',
            'Kasvis': 'vegetarian',
            'Kana': 'chicken'
        },
    )
    .rename(columns={
        'Date': 'date',
        'Restaurant': 'restaurant',
        'Food Category': 'category',
        'Dish': 'dish'
    })
    
)

lunches['pcs'] = pd.to_numeric(lunches.pcs, errors='coerce')
lunches['date'] = pd.to_datetime(lunches.date, format='%d.%m.%Y', errors='coerce')

lunches = (
    lunches
    .groupby(['date', 'restaurant', 'category', 'dish'])['pcs']
    .sum()
    .reset_index()
)

# Remove 'Not Napped'
lunches = lunches[lunches['category'] != 'Not Mapped']

# Remove 'takeaway'
def is_takeaway(s: str):
    return s.lower().count('take away') > 0

lunches['is_takeaway'] = lunches['dish'].map(is_takeaway)
lunches = lunches[~lunches['is_takeaway']].drop(columns='is_takeaway')

lunches.head(10)

Unnamed: 0,date,restaurant,category,dish,pcs
2,2023-01-02,Chemicum,fish,Kalapuikot tillikermaviilikast,78.0
4,2023-01-02,Chemicum,meat,"Uunimakkaraa,sinappikastiketta",165.0
5,2023-01-02,Chemicum,vegan,Marokkolainen linssipata,84.0
8,2023-01-03,Chemicum,fish,"Herkkulohipihvit, punajuurimaj",105.0
10,2023-01-03,Chemicum,fish,Kalapuikot tillikermaviilikast,52.0
12,2023-01-03,Chemicum,meat,Pasta Carbonara,17.0
13,2023-01-03,Chemicum,meat,"Uunimakkaraa,sinappikastiketta",56.0
14,2023-01-03,Chemicum,vegan,Marokkolainen linssipata,62.0
16,2023-01-03,Chemicum,vegan,Vegaaninen buttertofu,51.0
17,2023-01-03,Chemicum,vegetarian,Feta-pinaattilasagnette,29.0


In [5]:
dish_count = (
    lunches
    .groupby(['date' ,'restaurant'])['category']
    .value_counts()
    .reset_index()
    .pivot(index=['date', 'restaurant'], columns='category', values='count')
    .fillna(0.0)
    .rename(columns={
        'chicken': 'num_cat_chicken',
        'fish': 'num_cat_fish',
        'meat': 'num_cat_meat',
        'vegan': 'num_cat_vegan',
        'vegetarian': 'num_cat_vegetarian',
    })
    .reset_index()
)
dish_count.head()

category,date,restaurant,num_cat_chicken,num_cat_fish,num_cat_meat,num_cat_vegan,num_cat_vegetarian
0,2023-01-02,Chemicum,0.0,1.0,1.0,1.0,0.0
1,2023-01-03,Chemicum,0.0,2.0,2.0,2.0,1.0
2,2023-01-04,Chemicum,0.0,2.0,1.0,1.0,0.0
3,2023-01-05,Chemicum,1.0,2.0,0.0,1.0,0.0
4,2023-01-09,Chemicum,0.0,2.0,1.0,2.0,0.0


In [6]:
num_pcs = lunches.groupby(['date', 'restaurant'])['pcs'].sum().reset_index()
num_pcs.head()

Unnamed: 0,date,restaurant,pcs
0,2023-01-02,Chemicum,327.0
1,2023-01-03,Chemicum,372.0
2,2023-01-04,Chemicum,387.0
3,2023-01-05,Chemicum,471.0
4,2023-01-09,Chemicum,568.0


In [7]:
dishes = (
    dish_count
    .merge(
        num_pcs,
        on=['date', 'restaurant'],
        how='inner'
    )
    .drop(columns=['date'])
)

dishes.head()

Unnamed: 0,restaurant,num_cat_chicken,num_cat_fish,num_cat_meat,num_cat_vegan,num_cat_vegetarian,pcs
0,Chemicum,0.0,1.0,1.0,1.0,0.0,327.0
1,Chemicum,0.0,2.0,2.0,2.0,1.0,372.0
2,Chemicum,0.0,2.0,1.0,1.0,0.0,387.0
3,Chemicum,1.0,2.0,0.0,1.0,0.0,471.0
4,Chemicum,0.0,2.0,1.0,2.0,0.0,568.0


In [9]:
restaurant = "Exactum"

dishes_chemicum = dishes[dishes['restaurant'] == restaurant]

dishes_chemicum['alot_vegan'] = dishes_chemicum['num_cat_vegan'] >= 2

pcs_alot_vegan = dishes_chemicum[dishes_chemicum['alot_vegan']]['pcs']
pcs_not_alot_vegan = dishes_chemicum[~dishes_chemicum['alot_vegan']]['pcs']

# Plot
fig = make_subplots(
    rows=1, cols=2,
    specs=[
        [{'type': 'scatter'}, {'type': 'box'}]
    ],
    subplot_titles=("Distribution", "Boxplot")
)

fx1 = ff.create_distplot([pcs_alot_vegan], ['distplot'], curve_type='kde')
fig.add_trace(
    go.Scatter(x=fx1.data[1]['x'], y=fx1.data[1]['y'], name='More vegan', marker=dict(color='blue'), legendgroup="blue", fill='tozeroy'),
    row=1, col=1
)
fig.add_trace(
    go.Box(x=pcs_alot_vegan, name='More vegan', marker=dict(color='blue'), legendgroup="blue", showlegend=False),
    row=1, col=2
)

fx1 = ff.create_distplot([pcs_not_alot_vegan], ['distplot'], curve_type='kde')
fig.add_trace(
    go.Scatter(x=fx1.data[1]['x'], y=fx1.data[1]['y'], name='Less vegan', marker=dict(color='orange'), legendgroup="orange", fill='tozeroy'),
    row=1, col=1
)
fig.add_trace(
    go.Box(x=pcs_alot_vegan, name='Less vegan', marker=dict(color='orange'), legendgroup="orange", showlegend=False),
    row=1, col=2
)

fig.update_layout(
    height=600, 
    width=1200,
    title_text=f"<b>Distribution analysis ({restaurant}) of no. pieces when the menu has more vegan dishes (>= 2) and less</b>",
    title_font_size=20,
    xaxis_tickangle=-90,
    title_x=0.5,
)
fig.show()