In [1]:
%cd ../

/home/hoanghu/projects/Food-Waste-Optimization


In [2]:
import re

import numpy as np
import pandas as pd
import psycopg as pg
from loguru import logger
from psycopg import sql

# Process meals

In [3]:
meals_raw = pd.read_excel("data/processed/menus_2024.xlsx", sheet_name="meals")

meals_raw.head()

Unnamed: 0,meal_code,meal_name,category,CO2
0,34.0,Sitruunaiset kalapaloja ja kukkakaalitsatsikia,kala,0.81
1,710.0,Broileri-Caesarsalaatti,kana,0.67
2,724.0,Broilerilasagnette,kana,0.82
3,725.0,"Broilerinuggetit, currykastiketta",kana,1.06
4,726.0,"Broileripyörykät, currykastike",kana,0.86


In [4]:
meals = (
    meals_raw.dropna(axis=0, how='any')
    .groupby('meal_code')
    .first()
    .reset_index()
)

meals.head()

Unnamed: 0,meal_code,meal_name,category,CO2
0,34.0,Sitruunaiset kalapaloja ja kukkakaalitsatsikia,kala,0.81
1,37.0,Lihapullia ja pippurikastiketta,liha,0.61
2,710.0,Broileri-Caesarsalaatti,kana,0.67
3,713.0,Broileri-pekonihöystöä,kana,0.56
4,724.0,Broilerilasagnette,kana,0.82


# Process menus

In [5]:
path = "data/processed/menus_2024.xlsx"

weeks = ["week1", "week2", "week3", "week4", "week5", "week6"]

list_menus = []
for week in weeks:
    raw = pd.read_excel(path, sheet_name=week)
    raw['week'] = week

    list_menus.append(raw)

menus_raw = pd.concat(list_menus)
menus_raw.head()

Unnamed: 0,meal_type,misc,monday,tuesday,wednesday,thursday,friday,saturday,week
0,today’s special,meal_id,7609,7607,9039,6121,6818,2284,week1
1,today’s special,meal_name,"Lohta pesto & mustajuurta (L, G)","Broileria pekonikastikkeessa (L, G, KELA)",BBQ-savutofuburger & raikasta nektariiniketsup...,"Karamellisoitua possua (M,G,KELA)",Filippiiniläiset kanavartaat & Hedelmäsalsaa (...,"Limemarinoidut kanavartaat, avokadokastiketta ...",week1
2,vegan-kpl,meal_id,9044,9064,9053,9097,9052,9075,week1
3,vegan-kpl,meal_name,Kasvisjalapenonugetteja ja Chipotle-majoneesia...,"Kasvispyöryköitä ja Currykastiketta (VE, G, KE...","Falafelpyöryköitä & Chimicurrikastiketta (VE, ...",Kasvisjahispyöryköitä pesto-tomaattikastikees...,Punajuuripyöryköitä ja vaaleaa balsamicokastik...,"Pinaattilettuja & puolukkasurvosta (VE, G, IV)",week1
4,vegan-miscellaneous,meal_id,9018,7562,7573,6852,7564,9073,week1


In [6]:
meal_ids_new = [
    9039,
    9044, 9064, 9053, 9097, 9052, 9075,
    9018, 9073,
    9074,
    9078,
    9105, 9106,
    9107, 8994,

    9026, 9022,
    9049, 9059, 6044, 8985, 9056, 9066,
    9042,
    9079,
    2218, 2206, 20018,
    8986,

    9094,
    9045, 9063, 9047, 9069, 9054, 9067,
    8993, 9017, 9036,
    9076,
    8989,
    200011, 20003,

    9029,
    9060, 9051, 8983, 9046, 7029, 9098,
    8999,
    8988,
    20017, 200011, 950005,

    9050, 9068, 9062, 9048, 9065,
    9003, 9111,
    9077, 9037,
    8987, 8992,
    950017, 950019, 950000,

    9021, 9040,
    9096, 9061, 9055, 9043, 9058,
    9099,
    8991,
    950001, 950011,
]

cols = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday']

menus = (
    menus_raw
    .melt(id_vars=['meal_type', 'misc', 'week'], value_vars=cols, value_name='meal', var_name='weekday')
    .pivot(index=['meal_type', 'week', 'weekday'], columns='misc', values='meal')
    .reset_index()
    .dropna(axis=0, how='any')
)
# menus['meal_id'] = menus['meal_id'].astype(np.int32)

# Decompose meal name and tag
pat1 = r"(\s\([A-Z](\w|\,|\s)*\))"
pat2 = r"(\(|\)|\s)"

def _f_extract_tag(s: str):
    meal_name, tag = "", ""

    s = s.strip()
    out = re.findall(pat1, s)
    if len(out) < 1:
        meal_name = s
    else:
        meal_name = re.sub(pat1, "", s)
        tag = out[0][0]
        tag = re.sub(r"(\(|\)|\s)", '', tag)

    return pd.Series({'meal': meal_name, 'tag': tag})
    
extracted = menus['meal_name'].apply(_f_extract_tag)
menus = pd.concat([menus.drop(columns='meal_name'), extracted], axis=1)

# Extract `is_kela`
menus['is_kela'] = menus['tag'].str.contains('KELA')

# Mark new dishes
menus['is_new'] = menus['meal_id'].isin(meal_ids_new)

# Process meal_id
pat_meal_code = r"(\d*)\s*\/\s*(\d*)"

def _f_extract_meal_id(s):
    meal_id_1, meal_id_2 = None, None
    match s:
        case int():
            meal_id_1 = s
        case str():
            out = re.findall(pat_meal_code, s)[0]
            meal_id_1, meal_id_2 = int(out[0]), int(out[1])

    return pd.Series({'meal_id_1': meal_id_1, 'meal_id_2': meal_id_2})

extracted = menus['meal_id'].apply(_f_extract_meal_id)
menus = pd.concat([menus, extracted], axis=1)

# Remove duplicate meals
menus = menus.groupby('meal_id').first().reset_index()

# Assign restaurant
menus['restaurant'] = 'na'

meal_type_che_exac = ['fish', 'meat', 'today’s special', 'vegan-kpl', 'vegan-miscellaneous']
indices = menus[menus['meal_type'].isin(meal_type_che_exac)].index
menus.loc[indices, 'restaurant'] = 'che-exa'

meal_type_phy = 'salad'
indices = menus[menus['meal_type'] == meal_type_phy].index
menus.loc[indices, 'restaurant'] = 'phy'

menus['restaurant'] = menus['restaurant'].map({
    'che-exa': ['chemicum', 'exactum'],
     'phy': ['physicum'],
     'na': [],
})

# Convert meal_type
def _f_conv_mealtype(s: str, another: str|None = None):
    out = [s]
    if another is not None:
        out.append(another)

    return out
menus['meal_type'] = menus['meal_type'].apply(_f_conv_mealtype)

menus.head()

Unnamed: 0,meal_id,meal_type,week,weekday,meal,tag,is_kela,is_new,meal_id_1,meal_id_2,restaurant
0,710,[salad],week3,tuesday,Broileri-Caesarsalaatti,"VL,KELA",True,False,710.0,,[physicum]
1,785,[meat],week1,wednesday,Meksikolainen uunimakkara,"L,G,KELA",True,False,785.0,,"[chemicum, exactum]"
2,790,[meat],week4,thursday,Uunimakkara ja sinappikastike,"L,G,KELA",True,False,790.0,,"[chemicum, exactum]"
3,791,[meat],week1,thursday,Carbonara-kastike & pastaa,"G,L",False,False,791.0,,"[chemicum, exactum]"
4,839,[salad],week3,thursday,Chili-katkarapusalaatti,"M,G,KELA",True,False,839.0,,[physicum]


# Check old data

In [7]:
path = "data/raw/Sold lunches.csv"

pos_raw = pd.read_csv(path, delimiter=';')
pos_raw.head()

  pos_raw = pd.read_csv(path, delimiter=';')


Unnamed: 0,Date,Receipt time,Restaurant,Food Category,Dish,pcs,Hiilijalanjälki
0,2.1.2023,10:31,600 Chemicum,Liha,"Uunimakkaraa,sinappikastiketta",1,9
1,2.1.2023,10:32,600 Chemicum,Kala,Kalapuikot tillikermaviilikast,1,104
2,2.1.2023,10:32,600 Chemicum,Liha,"Uunimakkaraa,sinappikastiketta",1,9
3,2.1.2023,10:35,600 Chemicum,Kala,Kalapuikot tillikermaviilikast,1,104
4,2.1.2023,10:36,600 Chemicum,Liha,"Uunimakkaraa,sinappikastiketta",2,18


In [8]:

pos = (
    pos_raw[['Restaurant', 'Food Category', 'Dish', 'pcs', 'Hiilijalanjälki']]
    .copy()
    .rename(columns={
        'Dish': 'meal',
        'Restaurant': 'restaurant',
        'Hiilijalanjälki': 'co2',
        'Food Category': 'meal_type'
    })
)

def _f_conv(s):
    match s:
        case int() | float():
            return float(s)
        case str():
            if re.search(r'\s', s) is None:
                return float(s)
            return np.nan

pos['pcs'] = pos['pcs'].apply(_f_conv)
pos = pos[~pos['pcs'].isna()]

pos['meal'] = pos['meal'].str.strip()

pos['restaurant'] = pos['restaurant'].map({
    '600 Chemicum': 'chemicum',
    '610 Physicum': 'physicum',
    '620 Exactum': 'exactum'
})

pos['meal_type'] = pos['meal_type'].map({
    'Kasvis': 'vegetarian',
    'Kana': 'chicken',
    'Vegaani': 'vegan-miscellaneous',
    'Kala': 'fish',
    'Not Mapped': np.nan,
    'Liha': 'meat'
})

pos['co2'] = pd.to_numeric(pos['co2'].str.replace(',', '.', regex=True), errors='coerce')
pos['co2'] = pos['co2'] / pos['pcs']
post = pos.dropna(axis=0, how='any')


pos = (
    pos
    .groupby(['meal', 'meal_type'])['co2']
    .first()
    # .agg({'pcs': 'sum'})
    .reset_index()
)

pos.head()

# pos.to_excel("meals.xlsx", index=False)

Unnamed: 0,meal,meal_type,co2
0,Aurajuusto-pinaattilasagnettea,vegetarian,0.74
1,BBQ-Broilerikastiketta,chicken,0.57
2,Bangladeshilainen linssipata,vegan-miscellaneous,0.41
3,Bataatti-maapähkinäkeitto,vegan-miscellaneous,0.4
4,Bataattipihvit BBQ-tomaattik.,vegan-miscellaneous,0.4


### Compose list of paninies

In [9]:
# pat_panini = r"(panini|Panini)"
# panini_names = pos[
#     (pos['restaurant'] == 'physicum')
#     & (pos['meal'].str.contains(pat_panini, regex=True))
# ]['meal'].unique()

panini_names = [
    'Gluteeniton kinkku panini', 'Panini Bombay', 'Panini poro',
    'Panini, BBQ kana-pekoni', 'Panini, Kana',
    'Panini, Kana-sinihomejuusto', 'Panini, Kinkku Original',
    'Panini, Kinkku-salami', 'Panini, Mozzarella', 'Panini, Savulohi',
    'Panini, Vege-BBQ', 'Panini, americano', 'Panini, hot chili kana'
    ]

panini = pos[pos['meal'].isin(panini_names)].copy()

# Assign meal_id
panini_start_id = 950019 + 1
panini['meal_id'] = range(panini_start_id, panini_start_id + len(panini))

panini['is_kela'] = None
panini['is_new'] = False

def _f_conv_mealtype(s: str, another: str|None = None):
    out = [s]
    if another is not None:
        out.append(another)

    return out

panini['meal_type'] = panini['meal_type'].apply(_f_conv_mealtype, another='paninies')

panini['restaurant'] = 'phy'
panini['restaurant'] = panini['restaurant'].map({'phy': ['physicum']})

panini.head()

Unnamed: 0,meal,meal_type,co2,meal_id,is_kela,is_new,restaurant
36,Gluteeniton kinkku panini,"[meat, paninies]",,950020,,False,[physicum]
177,Panini Bombay,"[vegan-miscellaneous, paninies]",0.43,950021,,False,[physicum]
178,Panini poro,"[meat, paninies]",0.91,950022,,False,[physicum]
179,"Panini, BBQ kana-pekoni","[chicken, paninies]",0.91,950023,,False,[physicum]
180,"Panini, Kana","[chicken, paninies]",0.91,950024,,False,[physicum]


# Extract final list of meals

In [10]:
meals_final = (
    menus
    .merge(meals, left_on='meal_id_1', right_on='meal_code', how='left')
    .drop(columns=['meal_code', 'meal_name', 'category'])
    .rename(columns={'CO2': 'co2_1'})
    .merge(meals, left_on='meal_id_2', right_on='meal_code', how='left')
)

meals_final['co2'] = meals_final[['co2_1', 'CO2']].bfill(axis=1).iloc[:, 0]
meals_final = meals_final[['meal_id_1', 'meal', 'meal_type', 'restaurant', 'is_kela', 'is_new', 'co2']].rename(columns={'meal_id_1': 'meal_id'})

# Add paninies
meals_final = pd.concat([meals_final, panini], ignore_index=True)

# Reformat meal_id
meals_final['meal_id'] = meals_final['meal_id'].astype(int)

meals_final.head()

Unnamed: 0,meal_id,meal,meal_type,restaurant,is_kela,is_new,co2
0,710,Broileri-Caesarsalaatti,[salad],[physicum],True,False,0.67
1,785,Meksikolainen uunimakkara,[meat],"[chemicum, exactum]",True,False,1.14
2,790,Uunimakkara ja sinappikastike,[meat],"[chemicum, exactum]",True,False,0.9
3,791,Carbonara-kastike & pastaa,[meat],"[chemicum, exactum]",False,False,0.86
4,839,Chili-katkarapusalaatti,[salad],[physicum],True,False,1.23


In [11]:
meals_final[meals_final['co2'].isna()]

# meals_final.to_excel("meals_final_2425.xlsx", index=False)

Unnamed: 0,meal_id,meal,meal_type,restaurant,is_kela,is_new,co2
245,950020,Gluteeniton kinkku panini,"[meat, paninies]",[physicum],,False,


# Test: Insert to local DB

In [12]:
DB_PORT = '1001'
DB_PWD = 'untangling_ylva'
DB_USER = 'helsinki'
DB_NAME = 'foodwaste'
DB_HOST = 'localhost'

In [14]:
table_name = "meals"
values = [(r.meal_id, r.meal, r.meal_type, r.restaurant, r.is_kela, r.is_new) for r in meals_final.itertuples()]
# 
try:
    with pg.connect(
        user=DB_USER,
        password=DB_PWD,
        host=DB_HOST,
        port=DB_PORT,
        dbname=DB_NAME,
    ) as conn:
        with conn.cursor() as cur:
            stmt = (
                sql.SQL(
                    """
                    insert into {table} 
                    (meal_id, meal, meal_type, restaurant, is_kela, is_new)
                    values ({placeholders})
                    ;"""
                )
                .format(
                    table=sql.Identifier(table_name),
                    placeholders=sql.SQL(", ").join(sql.Placeholder() * len(values[0])),
                )
            )
            cur.executemany(stmt, values)

except pg.OperationalError as e:
    logger.error(f"Connect to DB got error: {e}")

In [15]:
table_name = "co2"
values = [(r.meal_id, r.co2) for r in meals_final.itertuples()]
# 
try:
    with pg.connect(
        user=DB_USER,
        password=DB_PWD,
        host=DB_HOST,
        port=DB_PORT,
        dbname=DB_NAME,
    ) as conn:
        with conn.cursor() as cur:
            stmt = (
                sql.SQL(
                    """
                    insert into {table} 
                    (meal_id, co2)
                    values ({placeholders})
                    ;"""
                )
                .format(
                    table=sql.Identifier(table_name),
                    placeholders=sql.SQL(", ").join(sql.Placeholder() * len(values[0])),
                )
            )
            cur.executemany(stmt, values)

except pg.OperationalError as e:
    logger.error(f"Connect to DB got error: {e}")