In [1]:
import mlflow
import mlflow.spacy
import mlflow.sklearn
import polars as pl
import seaborn as sns
import scipy
import duckdb
import numpy as np
import json

In [2]:
import os
import sys
from dotenv import load_dotenv
sys.path.insert(0, os.path.abspath('..'))
sys.path.insert(0, os.path.abspath('../..'))
from app.model_functions import run_BOW_on_line
from app.evaluation import reciprocal_rank, rank_results, precision_at_k
from app.data_handling.BOW import transform_ds_to_BOW
from app.data_handling.NER_parquet import process_ingredient, transform_ingredients_to_tokens, transform_data_to_tokens, construct_ingredient_query

load_dotenv()

True

In [3]:
mlflow.set_tracking_uri(os.getenv("MLFLOW_TRACKING_URL"))

In [4]:
with open("../../data/eval_data/processed_data.json", "r") as f:
    eval_ds = pl.read_json(f)

In [5]:
model_uri = 'runs:/083d261eafb942a2878f1dab36056631/model'
BOW_model = mlflow.sklearn.load_model(model_uri)

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████████████████████████████████████████████████████| 5/5 [00:02<00:00,  1.70it/s]


# BOW Evaluation 

In [6]:
eval_ds = transform_ds_to_BOW(eval_ds, BOW_model)

In [7]:
eval_ds.head()

index,title,ingredients,google_search_query,main_food_items_and_preparations,important_ingredients_for_search,google_search_query_processed,tfidf
i64,str,list[str],str,list[str],list[str],struct[2],object
1461,"""Apricot Cake""","[""2 c. sugar"", ""4 eggs"", … ""1/2 c. chopped nuts (English or pecans)""]","""apricot cake recipe with baby …","[""sugar"", ""eggs"", … ""chopped nuts (English or pecans)""]","[""apricot baby food"", ""flour"", … ""Wesson oil""]","{""Apricot Cake"",[""baby food"", ""oil"", … ""cinnamon""]}","<Compressed Sparse Row sparse matrix of dtype 'float64' 	with 21 stored elements and shape (1, 130236)>  Coords	Values  (0, 9283)	0.21712884633218843  (0, 11227)	0.1983406079654085  (0, 11950)	0.1435427933001902  (0, 27801)	0.10397334298783731  (0, 28655)	0.1545387933857181  (0, 40188)	0.13477667718333167  (0, 41158)	0.22558277653043618  (0, 45373)	0.12492984261939608  (0, 45749)	0.20424254996450877  (0, 59855)	0.49958837040533793  (0, 61279)	0.3252229750070988  (0, 82354)	0.175862530653854  (0, 82985)	0.12296912118731682  (0, 88411)	0.17669976855051128  (0, 101700)	0.10744209625000255  (0, 108170)	0.14579595442297072  (0, 108877)	0.1595550757864492  (0, 113207)	0.10989552183156677  (0, 115704)	0.2507885160635505  (0, 120835)	0.3147401308886549  (0, 126098)	0.2586934565664201"
1490,"""Angel Delights""","[""1 c. miniature marshmallows"", ""1 c. broken pretzels"", … ""1 lb. almond bark""]","""angel delights recipe with mar…","[""miniature marshmallows"", ""broken pretzels"", … ""almond bark""]","[""marshmallows"", ""pretzels"", … ""almond bark""]","{""Angel Delights"",[""marshmallows"", ""pretzels"", … ""almond bark""]}","<Compressed Sparse Row sparse matrix of dtype 'float64' 	with 11 stored elements and shape (1, 130236)>  Coords	Values  (0, 7337)	0.27457616363739945  (0, 12685)	0.3871133931186115  (0, 19199)	0.3181489993693008  (0, 29676)	0.3188529733949754  (0, 31613)	0.2273677064414553  (0, 44849)	0.2629846945636357  (0, 67466)	0.18886814325325993  (0, 73219)	0.2995899324685194  (0, 76492)	0.3185707815222047  (0, 88291)	0.3064827378587215  (0, 93854)	0.3607822177757804"
3374,"""Savory Sausage Casserole""","[""1 lb. bulk pork sausage"", ""1 c. uncooked rice"", … ""1/2 c. toasted halved or slivered blanched almonds""]","""sausage and rice casserole wit…","[""bulk pork sausage"", ""uncooked rice"", … ""toasted halved or slivered blanched almonds""]","[""pork sausage"", ""rice"", … ""almonds""]","{""Savory Sausage Casserole"",[""rice"", ""chicken noodle soup mix"", ""almonds""]}","<Compressed Sparse Row sparse matrix of dtype 'float64' 	with 26 stored elements and shape (1, 130236)>  Coords	Values  (0, 7358)	0.21355553258531798  (0, 15826)	0.2656119366939254  (0, 20044)	0.2800944185217424  (0, 24794)	0.17787352198803952  (0, 26596)	0.14236548100216345  (0, 27801)	0.11540937169961717  (0, 35906)	0.3261291738007668  (0, 44415)	0.1630538812069721  (0, 53524)	0.20752976289414013  (0, 67466)	0.14751402422039178  (0, 81718)	0.26515036232498423  (0, 83546)	0.14095178706771552  (0, 84923)	0.1381936223803339  (0, 91367)	0.1625829450155788  (0, 92645)	0.18955392095331905  (0, 98577)	0.17385264943030337  (0, 102599)	0.14795515722248265  (0, 102752)	0.2024964773815439  (0, 107807)	0.14619186341709522  (0, 107960)	0.2483404065659469  (0, 109616)	0.16831256159406507  (0, 109799)	0.195927873320918  (0, 116265)	0.13064018822766785  (0, 118687)	0.20597190005502025  (0, 122036)	0.21671441949791226  (0, 125599)	0.14634658876689555"
7006,"""Zemakove Knedliky A Kysele Zel…","[""2 c. mashed potatoes"", ""1/2 tsp. salt"", … ""1 1/2 c. flour""]","""potato dumplings with sauerkra…","[""mashed potatoes"", ""salt"", … ""flour""]","[""mashed potatoes"", ""sauerkraut"", … ""egg""]","{""Zemakove Knedliky A Kysele Zeli(Potato Dumplings With Sauerkraut) "",[""sauerkraut"", ""mashed potatoes""]}","<Compressed Sparse Row sparse matrix of dtype 'float64' 	with 10 stored elements and shape (1, 130236)>  Coords	Values  (0, 11950)	0.2658405181035908  (0, 40101)	0.268414630494851  (0, 45373)	0.2313694287604218  (0, 73488)	0.3664366954222877  (0, 92993)	0.2999027792622051  (0, 93268)	0.2540424580328967  (0, 101700)	0.19898221203975064  (0, 102689)	0.4467713827574058  (0, 107306)	0.35325407455267455  (0, 120835)	0.3885980737393621"
9957,"""Scalloped Sweet Potatoes""","[""1/4 c. sugar"", ""1/2 tsp. cinnamon"", … ""3 Tbsp. margarine""]","""scalloped sweet potatoes and a…","[""sugar"", ""cinnamon"", … ""margarine""]","[""sweet potatoes"", ""apples"", ""cinnamon""]","{""Scalloped Sweet Potatoes"",[""sweet potatoes"", ""apples""]}","<Compressed Sparse Row sparse matrix of dtype 'float64' 	with 13 stored elements and shape (1, 130236)>  Coords	Values  (0, 9131)	0.29602645547341194  (0, 28655)	0.2382677014934043  (0, 57771)	0.23875589953065665  (0, 72698)	0.24389063403695624  (0, 74731)	0.4537104932959401  (0, 88532)	0.23643065970012364  (0, 92993)	0.24967115325124473  (0, 96812)	0.31683790501263137  (0, 107807)	0.4061269287450603  (0, 113207)	0.16943676611911168  (0, 114215)	0.24878528714618794  (0, 116265)	0.18146187200653421  (0, 120835)	0.16175530193551932"


### Identity query
Here we use the original ingredients as the input to the model. Since this is also what the model is trained on, it should give close to perfect results

In [8]:
tot_rr = 0
tot_prec_at_3 = 0
tot_evals = 0
for index, ingredient_list in eval_ds.select('index', 'ingredients').iter_rows():
    emb_query = run_BOW_on_line("\n".join(ingredient_list), model_uri)
    ranked_ds = rank_results(eval_ds, emb_query)
    tot_rr += reciprocal_rank(ranked_ds, index)
    tot_prec_at_3 += precision_at_k(ranked_ds, index)
    tot_evals += 1

Downloading artifacts: 100%|██████████████████████████████████████████████████████████| 5/5 [00:03<00:00,  1.36it/s]
Downloading artifacts: 100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.14it/s]


In [9]:
identity_mrr = tot_rr/tot_evals
print(f"Identity Query, MRR: {identity_mrr}")
indentity_mean_prec_at_3 = tot_prec_at_3 / tot_evals
print(f"Identity Query, P@3: {indentity_mean_prec_at_3}")

Identity Query, MRR: 0.9968928571428571
Identity Query, P@3: 0.998


### LLM task 1 - Main food items and preparations
Here the BOW should perform worse

In [10]:
tot_rr = 0
tot_prec_at_3 = 0
tot_evals = 0
for index, ingredient_list in eval_ds.select('index', 'main_food_items_and_preparations').iter_rows():
    emb_query = run_BOW_on_line("\n".join(ingredient_list), model_uri)
    ranked_ds = rank_results(eval_ds, emb_query)
    tot_rr += reciprocal_rank(ranked_ds, index)
    tot_prec_at_3 += precision_at_k(ranked_ds, index)
    tot_evals += 1

In [12]:
llm1_mrr = tot_rr/tot_evals
print(f"LLM Task 1, MRR: {llm1_mrr}")
llm1_mean_prec_at_3 = tot_prec_at_3 / tot_evals
print(f"LLM Task 2, P@3: {llm1_mean_prec_at_3}")

LLM Task 1, MRR: 0.9710163489736073
LLM Task 2, P@3: 0.987


### LLM task 2 - Important ingredients for search
Here the BOW should perform worse

In [13]:
tot_rr = 0
tot_prec_at_3 = 0
tot_evals = 0
for index, ingredient_list in eval_ds.select('index', 'important_ingredients_for_search').iter_rows():
    emb_query = run_BOW_on_line("\n".join(ingredient_list), model_uri)
    ranked_ds = rank_results(eval_ds, emb_query)
    tot_rr += reciprocal_rank(ranked_ds, index)
    tot_prec_at_3 += precision_at_k(ranked_ds, index)
    tot_evals += 1

In [14]:
llm2_mrr = tot_rr/tot_evals
print(f"LLM Task 2, MRR: {llm2_mrr}")
llm2_mean_prec_at_3 = tot_prec_at_3 / tot_evals
print(f"LLM Task 2, P23: {llm2_mean_prec_at_3}")

LLM Task 2, MRR: 0.7990394606038969
LLM Task 2, P23: 0.865


### LLM task 3 - Google query, processed
Here the BOW should perform worse

In [15]:
tot_rr = 0
tot_prec_at_3 = 0
tot_evals = 0
# TODO: Add title into the query
for index, ingredient_list in eval_ds.select('index', 'google_search_query_processed').iter_rows():
    emb_query = run_BOW_on_line("\n".join(ingredient_list['ingredients']), model_uri)
    ranked_ds = rank_results(eval_ds, emb_query)
    tot_rr += reciprocal_rank(ranked_ds, index)
    tot_prec_at_3 += precision_at_k(ranked_ds, index)
    tot_evals += 1

In [16]:
llm3_mrr = tot_rr/tot_evals
print(f"LLM Task 3, MRR: {llm3_mrr}")
llm3_mean_prec_at_3 = tot_prec_at_3 / tot_evals
print(f"LLM Task 3, P@3: {llm3_mean_prec_at_3}")

LLM Task 3, MRR: 0.5290517475412047
LLM Task 3, P@3: 0.598


# NER DB 
Currently this should perform pretty bad, since there is no ranking function, the metrics would be pretty random.

In [17]:
model_uri = "models:/recipe_NER@prod"
ner = mlflow.spacy.load_model(model_uri=model_uri)

Downloading artifacts: 100%|████████████████████████████████████████████████████████| 17/17 [00:09<00:00,  1.84it/s]


### Identity query

In [18]:
tokens, preps, optionals = transform_data_to_tokens(eval_ds['ingredients'], ner, create_vocab=False)
ner_ds = eval_ds.select(pl.exclude('tfidf')).with_columns(
    pl.Series(name='tokens', values=tokens),
    pl.Series(name='preps', values=preps),
    pl.Series(name='optionals', values=optionals)
)
ner_ds.write_parquet('eval_test.parquet')

In [19]:
tot_rr = 0
tot_prec_at_3 = 0
tot_evals = 0
for index, tokens, preps in ner_ds.select('index', 'tokens', 'preps').iter_rows():
    preps_obj = json.loads(preps)
    sql = construct_ingredient_query('eval_test.parquet', tokens, preps_obj)
    try:
        results = duckdb.sql(sql).pl()
        tot_rr += reciprocal_rank(results, index)
        tot_prec_at_3 += precision_at_k(results, index)
        tot_evals += 1
    except:
        continue

In [20]:
identity_mrr = tot_rr/tot_evals
print(f"Identity Query, MRR: {identity_mrr}")
indentity_mean_prec_at_3 = tot_prec_at_3 / tot_evals
print(f"Identity Query, P@3: {indentity_mean_prec_at_3}")

Identity Query, MRR: 0.966053986261216
Identity Query, P@3: 0.9759036144578314


### LLM task 1

In [21]:
tokens, preps, optionals = transform_data_to_tokens(eval_ds['main_food_items_and_preparations'], ner, create_vocab=False)
ner_ds = eval_ds.select(pl.exclude('tfidf')).with_columns(
    pl.Series(name='tokens', values=tokens),
    pl.Series(name='preps', values=preps),
    pl.Series(name='optionals', values=optionals)
)
ner_ds.write_parquet('eval_test.parquet')

In [22]:
tot_rr = 0
tot_prec_at_3 = 0
tot_evals = 0
for index, tokens, preps in ner_ds.select('index', 'tokens', 'preps').iter_rows():
    preps_obj = json.loads(preps)
    sql = construct_ingredient_query('eval_test.parquet', tokens, preps_obj)
    try:
        results = duckdb.sql(sql).pl()
        tot_rr += reciprocal_rank(results, index)
        tot_prec_at_3 += precision_at_k(results, index)
        tot_evals += 1
    except:
        continue

In [23]:
llm1_mrr = tot_rr/tot_evals
print(f"LLM Task 1, MRR: {llm1_mrr}")
llm1_mean_prec_at_3 = tot_prec_at_3 / tot_evals
print(f"LLM Task 1, P@3: {llm1_mean_prec_at_3}")

LLM Task 1, MRR: 0.9623864537790293
LLM Task 1, P@3: 0.9707661290322581


### LLM task 2

In [24]:
tokens, preps, optionals = transform_data_to_tokens(eval_ds['important_ingredients_for_search'], ner, create_vocab=False)
ner_ds = eval_ds.select(pl.exclude('tfidf')).with_columns(
    pl.Series(name='tokens', values=tokens),
    pl.Series(name='preps', values=preps),
    pl.Series(name='optionals', values=optionals)
)
ner_ds.write_parquet('eval_test.parquet')

In [25]:
tot_rr = 0
tot_prec_at_3 = 0
tot_evals = 0
for index, tokens, preps in ner_ds.select('index', 'tokens', 'preps').iter_rows():
    preps_obj = json.loads(preps)
    sql = construct_ingredient_query('eval_test.parquet', tokens, preps_obj)
    try:
        results = duckdb.sql(sql).pl()
        tot_rr += reciprocal_rank(results, index)
        tot_prec_at_3 += precision_at_k(results, index)
        tot_evals += 1
    except:
        continue

In [26]:
llm2_mrr = tot_rr/tot_evals
print(f"LLM Task 2, MRR: {llm2_mrr}")
llm2_mean_prec_at_3 = tot_prec_at_3 / tot_evals
print(f"LLM Task 2, P@3: {llm2_mean_prec_at_3}")

LLM Task 2, MRR: 0.9125958040123223
LLM Task 2, P@3: 0.948


### LLM Task 3

In [27]:
ingreds = [node['ingredients'] for node in eval_ds['google_search_query_processed']]
tokens, preps, optionals = transform_data_to_tokens(ingreds, ner, create_vocab=False)
ner_ds = eval_ds.select(pl.exclude('tfidf')).with_columns(
    pl.Series(name='tokens', values=tokens),
    pl.Series(name='preps', values=preps),
    pl.Series(name='optionals', values=optionals)
)
ner_ds.write_parquet('eval_test.parquet')

In [None]:
tot_rr = 0
tot_prec_at_3 = 0
tot_evals = 0
for index, tokens, preps in ner_ds.select('index', 'tokens', 'preps').iter_rows():
    preps_obj = json.loads(preps)
    sql = construct_ingredient_query('eval_test.parquet', tokens, preps_obj)
    try:
        results = duckdb.sql(sql).pl()
        tot_rr += reciprocal_rank(results, index)
        tot_prec_at_3 += precision_at_k(results, index)
        tot_evals += 1
    except:
        continue

In [None]:
llm3_mrr = tot_rr/tot_evals
print(f"LLM Task 3, MRR: {llm3_mrr}")
llm3_mean_prec_at_3 = tot_prec_at_3 / tot_evals
print(f"LLM Task 3, P@3: {llm3_mean_prec_at_3}")