# SentenceBERT Evaluation 

In [11]:
import polars as pl
import seaborn as sns
import numpy as np
import json
import timeit
import matplotlib.pyplot as plt
from sentence_transformers import SentenceTransformer, CrossEncoder

In [85]:
import os
import sys
from dotenv import load_dotenv
sys.path.insert(0, os.path.abspath('..'))
sys.path.insert(0, os.path.abspath('../..'))
sys.path.insert(0, os.path.abspath('../../..'))
from app.evaluation import reciprocal_rank, rank_BOW_results, precision_at_k
load_dotenv()

True

# Set up definitions
Datasets:
 - 1k_processed
 - 10k
 - 100k
 - full

In [74]:
dataset_name = '10k'
run_name = 'first_SBERT'

In [75]:
with open("../../../data/eval_data/processed_data.json", "r") as f:
    query_ds = pl.read_json(f)
with open("../../../data/eval_data/human_annotations.json", "r") as f:
    human_query_ds = pl.read_json(f)

In [76]:
if dataset_name == '1k_processed':
    eval_ds = query_ds
elif dataset_name == '10k':
    with open("../../../data/eval_data/eval_10k.json", "r") as f:
        eval_ds = pl.read_json(f)
elif dataset_name == '100k':
    with open("../../../data/eval_data/eval_100k.json", "r") as f:
        eval_ds = pl.read_json(f)

In [91]:
results_df = pl.DataFrame(
    schema={
        'Model': pl.String,
        'Eval_Task': pl.String,
        "Dataset": pl.String,
        "MRR": pl.Float64,
        "TKA": pl.Float64,
        "Time_Tot": pl.Float64,
        "Time_Query": pl.Float64,
        "Error_Count": pl.Int32
    })
results_df

Model,Eval_Task,Dataset,MRR,TKA,Time_Tot,Time_Query,Error_Count
str,str,str,f64,f64,f64,f64,i32


In [78]:
sbert_model = SentenceTransformer("all-MiniLM-L6-v2")

In [79]:
sents = ["\n".join(ing) for ing in eval_ds['ingredients']]

In [80]:
embeddings = sbert_model.encode(sents)

In [82]:
eval_ds = eval_ds.with_columns(pl.Series(name="sbert", values=embeddings))

## Running code

### start = timeit.default_timer()
embeddings = sbert_model.encode(eval_ds['
time = timeit.default_timer() - start
print(f"Dataset transformation took {time}s, or {time/eval_ds.shape[0]}s/row")

In [92]:
def evaluation_task_SBERT(eval_ds: pl.DataFrame, query_ds: pl.DataFrame, query_field: str, results_df: pl.DataFrame,
                          task_name: str, use_title: bool = False, model_name: str = "sbert"):
    print(f"--- Running {task_name} on {dataset_name} using {model_name} ---")
    tot_rr = 0
    tot_prec_at_3 = 0
    tot_evals = 0
    errors = 0
    start = timeit.default_timer()
    for index, q in query_ds.select('index', query_field).iter_rows():
        query = ""
        if query_field == 'google_search_query_processed':
            query += "\n".join(q['ingredients'])
        else:
            query += "\n".join(q)

        query_emb = sbert_model.encode(query)
        similarities = sbert_model.similarity(query_emb, eval_ds['sbert'])
        ranked_ds = eval_ds.with_columns(
                pl.Series(name='rank', values=similarities[0])
            ).filter(pl.col('rank') > 0).sort('rank', descending=True)
        tot_rr += reciprocal_rank(ranked_ds, index)
        tot_prec_at_3 += precision_at_k(ranked_ds, index)
        tot_evals += 1
    time = timeit.default_timer() - start
    mrr = tot_rr/tot_evals
    mprec3 = tot_prec_at_3 / tot_evals
    if task_name == 'gold_adversarial_ingredients' or task_name == 'gold_adversarial_subset_ingredients' or task_name == 'added_prep_suffixes':
        mrr = 1-mrr
        mprec3 = 1-mprec3
    result_data = {
        "Model": [model_name],
        'Eval_Task': [task_name],
        'Dataset': dataset_name,
        "MRR": [mrr],
        "TKA": [mprec3],
        "Time_Tot": time,
        "Time_Query": time/tot_evals,
        "Error_Count": errors
    }
    result_df = pl.DataFrame(result_data)
    results_df = results_df.vstack(result_df)
    print(f"--- COMPLETED {task_name} in {time}s ---")
    return results_df

In [13]:
results_df = evaluation_task_SBERT(eval_ds, query_ds, 'ingredients', results_df, 'identity')
results_df = evaluation_task_SBERT(eval_ds, query_ds, 'main_food_items_and_preparations', results_df, 'llm1')
results_df = evaluation_task_SBERT(eval_ds, query_ds, 'important_ingredients_for_search', results_df, 'llm2')
results_df = evaluation_task_SBERT(eval_ds, query_ds, 'google_search_query_processed', results_df, 'llm3')

In [93]:
results_df = evaluation_task_SBERT(eval_ds, human_query_ds, 'human_ingredients', results_df, 'gold_ingredients')
results_df = evaluation_task_SBERT(eval_ds, human_query_ds, 'human_key_ingredients', results_df, 'gold_key_ingredients')

--- Running gold_ingredients on 10k using sbert ---
--- COMPLETED gold_ingredients in 182.10980565601494s ---
--- Running gold_key_ingredients on 10k using sbert ---
--- COMPLETED gold_key_ingredients in 196.45251754496712s ---


In [None]:
results_df = evaluation_task_SBERT(eval_ds, query_ds, 'added_preps', results_df, 'added_prep_suffixes')

In [94]:
results_df = evaluation_task_SBERT(eval_ds, human_query_ds, 'human_adv_ingredients', results_df, 'gold_adversarial_ingredients')
results_df = evaluation_task_SBERT(eval_ds, human_query_ds, 'human_adv_subset_ingredients', results_df, 'gold_adversarial_subset_ingredients')

--- Running gold_adversarial_ingredients on 10k using sbert ---
--- COMPLETED gold_adversarial_ingredients in 202.4209078020649s ---
--- Running gold_adversarial_subset_ingredients on 10k using sbert ---
--- COMPLETED gold_adversarial_subset_ingredients in 205.59320931194816s ---


In [95]:
results_df

Model,Eval_Task,Dataset,MRR,TKA,Time_Tot,Time_Query,Error_Count
str,str,str,f64,f64,f64,f64,i32
"""sbert""","""gold_ingredients""","""10k""",0.791362,0.84,182.109806,3.642196,0
"""sbert""","""gold_key_ingredients""","""10k""",0.651571,0.68,196.452518,3.92905,0
"""sbert""","""gold_adversarial_ingredients""","""10k""",0.232163,0.12,202.420908,4.048418,0
"""sbert""","""gold_adversarial_subset_ingred…","""10k""",0.70695,0.68,205.593209,4.111864,0


In [96]:
from datetime import datetime
date_time = datetime.now().strftime("%Y_%m_%d__%H_%M")

In [97]:
date_time

'2025_06_12__21_25'

In [98]:
results_df.write_parquet(f"../../../data/results/SBERT/{run_name}_{date_time}.parquet")

In [99]:
pl.read_parquet(f"../../../data/results/SBERT/{run_name}_{date_time}.parquet")

Model,Eval_Task,Dataset,MRR,TKA,Time_Tot,Time_Query,Error_Count
str,str,str,f64,f64,f64,f64,i32
"""sbert""","""gold_ingredients""","""10k""",0.791362,0.84,182.109806,3.642196,0
"""sbert""","""gold_key_ingredients""","""10k""",0.651571,0.68,196.452518,3.92905,0
"""sbert""","""gold_adversarial_ingredients""","""10k""",0.232163,0.12,202.420908,4.048418,0
"""sbert""","""gold_adversarial_subset_ingred…","""10k""",0.70695,0.68,205.593209,4.111864,0
