In [None]:
import pandas as pd
import numpy as np
import os
import json
import sys


original_cwd = os.getcwd()

backend_path = os.path.abspath(os.path.join(original_cwd, "../.."))
print(backend_path)
print(sys.path)

if backend_path not in sys.path:
    sys.path.insert(0, backend_path)

from shared.snowflake.client import SnowflakeClient
from experiments.utils.llm import build_prompt, get_llm_response, compare_ground_truth_vs_llm, count_number_tokens, split_docs_recursively

print(f"Returned to original working directory: {os.getcwd()}")

**load the config file**

In [None]:
if not os.path.exists("config"):
    os.chdir(os.path.abspath(os.path.join(os.getcwd(), "..")))

CONFIG_FILE_PATH = "config/base_config.json"

with open(CONFIG_FILE_PATH, "r", encoding="utf-8") as f:
    config = json.load(f)

**set the experience id**

In [None]:
EXPERIENCE_ID = config["experiments_specifique_params"]["experiment_id"]

print(EXPERIENCE_ID)

**load the embedding file**

In [None]:
INPUT_DATA_FILE = config["input_recipies_file"]

df_recipes_cleaned = pd.read_csv(INPUT_DATA_FILE)

df_recipes_cleaned.head()

## evaluation the LLM and the prompt system using the ground truth ##

**load the test dataset**

In [None]:
QUERY_TEST_FILE_PATH = config["query_test_file_path"]

with open(QUERY_TEST_FILE_PATH, "r", encoding="utf-8") as f:
    ground_truth_results = json.load(f)

print(QUERY_TEST_FILE_PATH)

**load the prompt**

In [None]:
PROMPT_EVAL_PATH = config["eval_prompt_file"]

with open(PROMPT_EVAL_PATH, "r") as f:
    prompt_template = f.read()

**calculate the number of tokens in the prompt template**

In [None]:
LLM_MODEL = config["llm_model"]
LLM_MODEL_CONTEXT_WINDOWS = config["context_window"]
LLM_TEMPERATURE = config["temperature"]
LLM_MAX_TOKENS = config['max_tokens']

num_token_prompt_template = count_number_tokens(client=SnowflakeClient(), model=LLM_MODEL, text=str(prompt_template))
num_token_prompt_template

**define the schema of the response**

In [None]:
# JSON schema for recipe relevance ratings
json_schema = config['llm_json_format_without_justification']
json_schema

In [None]:
COLUMNS_TEXT = config["columns_to_clean"]

llm_results = []

for query in ground_truth_results:
    query_text = query["query_text"]
    num_token_query = count_number_tokens(client=SnowflakeClient(), model=LLM_MODEL, text=str(query_text))
    
    # Build doc entries for the prompt
    doc_entries = []
    for document in query["relevance_documents"]:
        doc_id = document['doc_id']
        recipe_row = df_recipes_cleaned[df_recipes_cleaned["ID"] == doc_id]
        if recipe_row.empty:
            continue

        recipe_info = {}
        for col_key, col_info in COLUMNS_TEXT.items():
            recipe_info[col_info["start_text"]] = recipe_row.iloc[0][col_info["column_name"]]

        doc_entries.append({"doc_id": doc_id, "recipe_info": recipe_info})
    
    num_token_doc = count_number_tokens(client=SnowflakeClient(), model=LLM_MODEL, text=str(doc_entries))
    
    doc_entries_list = split_docs_recursively(client=SnowflakeClient(), model=LLM_MODEL, max_tokens=LLM_MODEL_CONTEXT_WINDOWS, base_template_size=num_token_prompt_template, num_token_query=num_token_query, num_token_doc=num_token_doc, doc_entries=doc_entries)
    
    for doc_batch in doc_entries_list:
        prompt = build_prompt(prompt_template, query_text, doc_entries)

        # Call the LLM
        llm_response = get_llm_response(client=SnowflakeClient(), model=LLM_MODEL, prompt=prompt, response_format=json_schema, temperature=LLM_TEMPERATURE, max_tokens=LLM_MAX_TOKENS)

        # Extract and parse JSON (this now returns a proper dict, not a string)
        json_output = json.loads(llm_response)
        print(json_output)
    
        llm_results.append(json_output)
        
    print(f"âœ“ Parsed JSON for query: {query_text}")

## compare LLM response to the ground truth ##

**compare the LLM response with the ground truth**

In [None]:
coherence_avg_query, coherence_per_query = compare_ground_truth_vs_llm(ground_truth_results, llm_results)

QUERY_LLM_RESULTS_PATH = config['query_llm_file_path'].format(
    experiment_id=EXPERIENCE_ID 
)

print("Final coherence score (avg query):", coherence_avg_query)
print("Per-query coherence:", coherence_per_query)

llm_results.append({'COHERENCE_SCORE_AVG_QUERY': coherence_avg_query})

# Save results - now they're proper dicts, not strings
with open(QUERY_LLM_RESULTS_PATH, "w", encoding="utf-8") as f:
    json.dump(llm_results, f, indent=2, ensure_ascii=False)

**write the config file for that experience**

In [None]:
OUPUT_EXPERIMENT_DIR = config["output_experiments_dir"].format(
    experiment_id=EXPERIENCE_ID 
)

# Write the config file
with open(os.path.join(OUPUT_EXPERIMENT_DIR, "config.json"), "w", encoding="utf-8") as f:
    json.dump(config, f, indent=4)
    
# Write the prompt file
with open(os.path.join(OUPUT_EXPERIMENT_DIR, "eval_prompt.txt"), "w", encoding="utf-8") as f:
    f.write(prompt_template)