In [131]:
# Import necessary libraries
import json
import pandas as pd
import openai
import sys
# Append the parent folder to the system path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), os.pardir)))
# Now you can import config.py
import config
from groq import Groq
from dotenv import load_dotenv
import os
from sklearn.feature_extraction.text import CountVectorizer
from tqdm.auto import tqdm

In [100]:
# Load environment variables
load_dotenv()

True

In [102]:
# Get the current working directory (optional step)
os.getcwd()

'd:\\Projects\\datatalks-final-project\\notebooks'

In [103]:
# Setup the OpenAI client to use either Groq, OpenAI.com, or Ollama API
load_dotenv(override=True)
API_HOST = os.getenv("API_HOST")
API_HOST

'groq'

In [104]:
if API_HOST == "groq":
    client = client = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)
    MODEL_NAME = os.getenv("GROQ_MODEL")

elif API_HOST == "ollama":
    client = openai.OpenAI(
        base_url=os.getenv("OLLAMA_ENDPOINT"),
        api_key="nokeyneeded",
    )
    MODEL_NAME = os.getenv("OLLAMA_MODEL")

elif API_HOST == "openai":
    client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
    MODEL_NAME = os.getenv("OPENAI_MODEL")
else:
    print("No LLM Selected")

In [105]:
client

<groq.Groq at 0x22abeb35a30>

In [106]:
MODEL_NAME

'llama3-8b-8192'

In [107]:
with open(input_data_folder + 'main_faq_database.json', 'rt') as f_in:
    data = json.load(f_in)

In [108]:
documents = []
for dish in data['dishes']:
    dish_name = dish['dish name']
    for doc in dish['documents']:
        doc['dish_name'] = dish_name  # Add dish_name to each document
        documents.append(doc)

In [109]:
documents[1:3]

[{'id': '142_2',
  'question': 'How many calories does it have?',
  'section': 'calories',
  'text': 'The almond fudge banana cake has 224.8 calories.',
  'dish_name': 'almond fudge banana cake'},
 {'id': '142_3',
  'question': 'How much total fat does it contain?',
  'section': 'nutritional',
  'text': 'The almond fudge banana cake has 14 grams of total fat (PDV).',
  'dish_name': 'almond fudge banana cake'}]

Retreival evaluation

In [110]:
import minsearch

index = minsearch.Index(
    text_fields = ['id', 'question','section','text','dish_name'],
    keyword_fields=['dish_name']
)

index.fit(documents)

<minsearch.Index at 0x22ac050b080>

In [111]:
#import ground truth dataset
import pandas as pd
ground_df = pd.read_csv(input_data_folder +'gold_standard_data/ground-truth-data.csv')

In [112]:
ground_truth = ground_df.to_dict(orient='records')

In [16]:
documents[1]

{'id': '142_2',
 'question': 'How many calories does it have?',
 'section': 'calories',
 'text': 'The almond fudge banana cake has 224.8 calories.',
 'dish_name': 'almond fudge banana cake'}

In [17]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)


In [18]:
def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

In [19]:
documents[1]

{'id': '142_2',
 'question': 'How many calories does it have?',
 'section': 'calories',
 'text': 'The almond fudge banana cake has 224.8 calories.',
 'dish_name': 'almond fudge banana cake'}

In [20]:
def minsearch_search(query):
    boost = {
    }

    results = index.search(
        query=query,
        filter_dict={'question': query},
        boost_dict=boost,
        num_results=10
    )

    return results

In [21]:
def minsearch_search(query):
    boost = {
 'question': 2.5,       # High priority: users are searching questions
    'dish_name': 1.7,      # Important for matching specific dishes
    'section': 1.2,        # Moderate boost: helps in filtering by category
    'text': 1.3        # Slight boost: could help match answer content} 
    }

    results = index.search(
        query=query,
        filter_dict={'question': query},
        boost_dict=boost,
        num_results=10
    )

    return results

In [22]:
ground_df.head()

Unnamed: 0,question,id
0,Is almond fudge banana cake made with real ban...,142_1
1,Does almond fudge banana cake contain dairy?,142_1
2,Can I be sure there are no artificial flavors ...,142_1
3,Are the ingredients in almond fudge banana cak...,142_1
4,May I know the types of flour and sugar used i...,142_1


In [23]:
ground_truth = ground_df.to_dict(orient='records')


In [24]:
ground_truth[0]


{'question': 'Is almond fudge banana cake made with real bananas?',
 'id': '142_1'}

In [25]:
def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['id']
        results = search_function(q)
        relevance = [d['id'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

In [26]:
from tqdm.auto import tqdm 

In [27]:
evaluate(ground_truth, lambda q: minsearch_search(q['question']))

  0%|          | 0/16527 [00:00<?, ?it/s]

{'hit_rate': 0.5878259817268712, 'mrr': 0.34479029505350023}

In [28]:
client

<groq.Groq at 0x22abca73c20>

In [29]:
import pandas as pd

def visualize_search_results(ground_truth, search_function):
    # Create an empty list to store the results
    search_results = []

    # Iterate over each query in the ground truth dataset
    for q in tqdm(ground_truth):
        doc_id = q['id']
        ground_truth_question = q['question']
        
        # Perform the search using the provided function
        results = search_function(q['question'])
        
        # Collect the top result from the search
        if results:
            matched_question = results[0]['question']
            matched_doc_id = results[0]['id']
        else:
            matched_question = None
            matched_doc_id = None

        # Append the comparison of the ground truth with the top matched result
        search_results.append({
            'Ground Truth ID': doc_id,
            'Ground Truth Question': ground_truth_question,
            'Matched Document ID': matched_doc_id,
            'Matched Question': matched_question
        })

    # Convert the results into a Pandas DataFrame for visualization
    results_df = pd.DataFrame(search_results)
    
    # Display the DataFrame
    pd.set_option('display.max_colwidth', None)
    return results_df

# Visualize the search results
results_df = visualize_search_results(ground_truth, lambda q: minsearch_search(q))
print(results_df.head())

# Optionally, you can also save this as an Excel or CSV file for further inspection
results_df.to_csv('search_results_comparison.csv', index=False)


  0%|          | 0/16527 [00:00<?, ?it/s]

  Ground Truth ID  \
0           142_1   
1           142_1   
2           142_1   
3           142_1   
4           142_1   

                                                        Ground Truth Question  \
0                         Is almond fudge banana cake made with real bananas?   
1                                Does almond fudge banana cake contain dairy?   
2  Can I be sure there are no artificial flavors in almond fudge banana cake?   
3         Are the ingredients in almond fudge banana cake free from liqueurs?   
4   May I know the types of flour and sugar used in almond fudge banana cake?   

  Matched Document ID                          Matched Question  
0              142_11  What tags are associated with this dish?  
1               142_3       How much total fat does it contain?  
2               142_5          How much sodium is in this dish?  
3               142_1                 What are the ingredients?  
4               142_4                What is the sugar c

In [39]:
ground_df.head(20)

Unnamed: 0,question,id
0,Is almond fudge banana cake made with real bananas?,142_1
1,Does almond fudge banana cake contain dairy?,142_1
2,Can I be sure there are no artificial flavors in almond fudge banana cake?,142_1
3,Are the ingredients in almond fudge banana cake free from liqueurs?,142_1
4,May I know the types of flour and sugar used in almond fudge banana cake?,142_1
5,How many calories does the Almond Fudge Banana Cake have?,142_2
6,Can I get nutrition facts for the Almond Fudge Banana Cake?,142_2
7,What's the calorie count for the Almond Fudge Banana Cake?,142_2
8,How many calories is the Almond Fudge Banana Cake?,142_2
9,What's the calorie content of the Almond Fudge Banana Cake?,142_2


In [40]:
documents[1:20]

[{'id': '142_2',
  'question': 'How many calories does it have?',
  'section': 'calories',
  'text': 'The almond fudge banana cake has 224.8 calories.',
  'dish_name': 'almond fudge banana cake'},
 {'id': '142_3',
  'question': 'How much total fat does it contain?',
  'section': 'nutritional',
  'text': 'The almond fudge banana cake has 14 grams of total fat (PDV).',
  'dish_name': 'almond fudge banana cake'},
 {'id': '142_4',
  'question': 'What is the sugar content?',
  'section': 'nutritional',
  'text': 'The almond fudge banana cake has 87 grams of sugar (PDV).',
  'dish_name': 'almond fudge banana cake'},
 {'id': '142_5',
  'question': 'How much sodium is in this dish?',
  'section': 'nutritional',
  'text': 'The almond fudge banana cake has 10 grams of sodium (PDV).',
  'dish_name': 'almond fudge banana cake'},
 {'id': '142_6',
  'question': 'What is the protein content?',
  'section': 'nutritional',
  'text': 'The almond fudge banana cake has 7 grams of protein (PDV).',
  'dish_

## The RAG flow


In [77]:
def build_prompt(query, search_results):
    prompt_template = """
You're an AI assistant helping with menu queries. Answer the QUESTION based on the CONTEXT provided.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context += f"Section: {doc['section']}\nQuestion: {doc['question']}\nAnswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt


In [114]:
client

<groq.Groq at 0x22abeb35a30>

In [115]:

def llm(prompt):
    response = client.chat.completions.create(
        model= MODEL_NAME,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [116]:
import minsearch

index = minsearch.Index(
    text_fields = ['id', 'question','section','text','dish_name'],
    keyword_fields=['dish_name']
    #keyword_fields=['id']
)


In [117]:
len(documents)

3584

In [118]:
index.fit(documents)

<minsearch.Index at 0x22ac0b1af60>

In [119]:
def minsearch(question):
    return index.search(question)

In [120]:
    def rag(query):
        search_results = minsearch(query)
        prompt=build_prompt(query,search_results)
        answer = llm(prompt)
        return answer


In [121]:
query = "what are the calories of white chocolate cake is it good?"
query = "what is the time taken to prepare momma s fair funnel cake?"

rag(query)

'According to the context, the question is "what is the time taken to prepare momma\'s fair funnel cake?" and the answer is provided in the "preparation" section, which states: "The momma\'s fair funnel cake takes 20 minutes to prepare."'

In [50]:
ground_truth[2]['question']

'Can I be sure there are no artificial flavors in almond fudge banana cake?'

In [51]:
rag(ground_truth[1]['question'])

'Based on the context provided, the almond fudge banana cake does NOT contain dairy. There is no mention of dairy in the nutritional details or any other section.'

## RAG Evaluation - LLM as a judge

In [52]:
record = ground_truth[0]
question =  record['question']
answer_llm = rag(question)
answer_llm

'Based on the context, I couldn\'t find any information in the provided sections (tags, rating, nutritional, price, calorie_status, sodium, sugar, carbohydrate) that suggests whether the almond fudge banana cake is made with real bananas or not. Therefore, my answer to the question would be "Unknown" or "No information available".'

In [53]:
prompt2_template = """
You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: {question}
Generated Answer: {answer_llm}

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}}
""".strip()

In [54]:
prompt = prompt2_template.format(question=question,answer_llm=answer_llm)
prompt

'You are an expert evaluator for a RAG system.\nYour task is to analyze the relevance of the generated answer to the given question.\nBased on the relevance of the generated answer, you will classify it\nas "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".\n\nHere is the data for evaluation:\n\nQuestion: Is almond fudge banana cake made with real bananas?\nGenerated Answer: Based on the context, I couldn\'t find any information in the provided sections (tags, rating, nutritional, price, calorie_status, sodium, sugar, carbohydrate) that suggests whether the almond fudge banana cake is made with real bananas or not. Therefore, my answer to the question would be "Unknown" or "No information available".\n\nPlease analyze the content and context of the generated answer in relation to the question\nand provide your evaluation in parsable JSON without using code blocks:\n\n{\n  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",\n  "Explanation": "[Provide a brief explanation for yo

In [55]:
record = ground_truth[0]
record

{'question': 'Is almond fudge banana cake made with real bananas?',
 'id': '142_1'}

## RAG EVALUATION

In [56]:
len(ground_truth)


16527

In [57]:
record = ground_truth[0]


In [58]:
prompt = prompt2_template.format(question=question, answer_llm=answer_llm)
print(prompt)

You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: Is almond fudge banana cake made with real bananas?
Generated Answer: Based on the context, I couldn't find any information in the provided sections (tags, rating, nutritional, price, calorie_status, sodium, sugar, carbohydrate) that suggests whether the almond fudge banana cake is made with real bananas or not. Therefore, my answer to the question would be "Unknown" or "No information available".

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}

In [59]:
import json
len(ground_df)

16527

In [60]:
df_sample = ground_df.sample(n=50, random_state=1)

In [61]:
sample  = df_sample.to_dict(orient='records')

In [62]:
df_sample

Unnamed: 0,question,id
3881,Are the cod cakes browned on both sides,11252_10
15077,Can you tell me the preparation time for Coconut Shrimp Curry?,30898_9
9392,Should I double the funnel cake recipe?,19611_13
4398,Tells me the calorie total for Streusel Plum Cake,12105_2
15297,Milky Way Lover's Pound Cake prep time,31334_9
776,dirt cake ii provides how much protein,3333_6
13052,Can I get a quote for Grandma Kay's Swedish pancakes?,27764_16
5676,Does the rhubarb stir cake have a high carb count?,14457_8
15891,Can I make the easy rhubarb cake in under 60 minutes?,32454_11
8454,What's the fat content of the key lime cheesecake squares?,18007_7


In [63]:
len(sample)

50

In [64]:
evaluation_results = []
for record in tqdm(sample):
    question = record['question']
    answer_llm = rag(question) 

    prompt = prompt2_template.format(
        question=question,
        answer_llm=answer_llm
    )

    evaluation = llm(prompt)

    # Log the evaluation to inspect if it's valid JSON
    print(f"Evaluation for question {question}: {evaluation}")

    try:
        # Try to parse the evaluation response as JSON
        evaluation_json = json.loads(evaluation)
    except json.JSONDecodeError as e:
        # Handle JSON decode errors gracefully
        print(f"JSONDecodeError for question {question}: {e}")
        evaluation_json = None  # Use None or set a default value

    # Initialize the evaluation result if it's None or empty
    if evaluation_json is None:
        evaluation_json = {"Relevance": "UNKNOWN", "Explanation": "Failed to parse evaluation"}

    # Log and store evaluation
    print(f"Final evaluation for {question}: {evaluation_json}")
    
    # Now append to your results list, ensuring `evaluation` is initialized properly
    evaluation_results.append((record, answer_llm, evaluation_json))


  0%|          | 0/50 [00:00<?, ?it/s]

Evaluation for question Are the cod cakes browned on both sides: {
"Relevance": "RELEVANT",
"Explanation": "The generated answer directly addresses the question 'Are the cod cakes browned on both sides' by referencing specific steps from the provided context. It states that the cod cakes are golden brown, implying they are browned on both sides, making the answer relevant to the question."
}
Final evaluation for Are the cod cakes browned on both sides: {'Relevance': 'RELEVANT', 'Explanation': "The generated answer directly addresses the question 'Are the cod cakes browned on both sides' by referencing specific steps from the provided context. It states that the cod cakes are golden brown, implying they are browned on both sides, making the answer relevant to the question."}
Evaluation for question Can you tell me the preparation time for Coconut Shrimp Curry?: {
  "Relevance": "NON_RELEVANT",
  "Explanation": "The generated answer explicitly states that the provided text does not conta

In [65]:
df_eval = pd.DataFrame(evaluation_results, columns=['record', 'answer', 'evaluation'])

df_eval['id'] = df_eval.record.apply(lambda d: d['id'])
df_eval['question'] = df_eval.record.apply(lambda d: d['question'])

df_eval['relevance'] = df_eval.evaluation.apply(lambda d: d['Relevance'])
df_eval['explanation'] = df_eval.evaluation.apply(lambda d: d['Explanation'])

del df_eval['record']
del df_eval['evaluation']

In [66]:
df_eval.relevance.value_counts(normalize=True)


relevance
UNKNOWN            0.54
RELEVANT           0.24
PARTLY_RELEVANT    0.14
NON_RELEVANT       0.08
Name: proportion, dtype: float64

In [67]:
df_eval.to_csv(input_data_folder+'rag-eval-'+MODEL_NAME+'.csv', index=False)

In [68]:
df_eval[df_eval.relevance == 'RELEVANT']


Unnamed: 0,answer,id,question,relevance,explanation
0,"Based on the context, I see that you're asking about the Newfoundland Cod Cakes, not the New York Style Cheesecake on Shortbread Crust. According to the Section: steps, the answer is: ""in a skillet fry the patties in the oil , 4-5 minutes each side until they are golden brown"". Therefore, the cod cakes are browned on both sides.",11252_10,Are the cod cakes browned on both sides,RELEVANT,"The generated answer directly addresses the question 'Are the cod cakes browned on both sides' by referencing specific steps from the provided context. It states that the cod cakes are golden brown, implying they are browned on both sides, making the answer relevant to the question."
5,"According to the context, The dirt cake ii provides 13 grams of protein.",3333_6,dirt cake ii provides how much protein,RELEVANT,"The generated answer directly responds to the question, providing a specific amount of protein (13 grams) that the dirt cake II is said to provide, thus addressing the inquiry."
8,"Based on the CONTEXT provided, the QUESTION asks: ""Can I make the easy rhubarb cake in under 60 minutes?""\n\nAccording to the REVIEW section, the baking time for this recipe is around 70 minutes, with the reviewer noting that it may take about 45 minutes at a low altitude. However, what's important is that the reviewer mentions planning for at least 45 minutes and checking regularly. This suggests that it may be possible to make the easy rhubarb cake in a time frame of under 60 minutes, but it would require careful monitoring and possibly some adjustments to the recipe.",32454_11,Can I make the easy rhubarb cake in under 60 minutes?,RELEVANT,"The answer partially addresses the question by providing information on the baking time of the easy rhubarb cake, specifically that it may take around 70 minutes, but can be done in under 60 minutes with careful monitoring and adjustments. Although the answer does not provide a direct 'yes' or 'no' answer, it provides relevant and useful information to answer the question."
10,"According to the context, the old fashion butter pound cake has 4213.2 calories.",33150_2,What's the calorie count for the old fashion butter pound cake?,RELEVANT,"The generated answer provides an exact answer to the question, which is the calorie count for the old fashion butter pound cake."
18,"Based on the context provided, the Momma's Fair Funnel Cake has 16 grams of sodium.",916_5,Is the Momma's Fair Funnel Cake high in salt?,RELEVANT,"The generated answer mentions the 16 grams of sodium in the Momma's Fair Funnel Cake, which is a direct answer to the question about the high salt content. The answer provides specific information related to the question, making it relevant."
23,"Based on the context provided, I didn't find any mention of ""7 up pound cake"" or any information related to it. The provided text only mentions ""up pound cake"" and ""five flavor pound cake"", but not ""7 up pound cake"". Therefore, I cannot answer the question about the flavor profile of ""7 up pound cake"" as it is not mentioned in the context.",2923_14,What flavor profile does 7 up pound cake have?,RELEVANT,"The generated answer addresses the question directly, providing a clear explanation of why 7 Up pound cake is not mentioned in the context, which is the main point of the question."
24,"Based on the context provided, the question is ""What's the secret to achieving the Depression Fudge cake's rich color?""\n\nThis question falls under the section of preparation, and the answer is not explicitly mentioned. However, from the steps provided to prepare the Depression Fudge cake, we can infer that it's the addition of cocoa that contributes to the cake's rich color.",12313_13,What's the secret to achieving the Depression Fudge cake's rich color,RELEVANT,"The generated answer attempts to address the question by providing a possible solution to achieve the Depression Fudge cake's rich color. Although it doesn't directly provide the secret, it logically infers the importance of cocoa addition in the preparation process, making it a relevant response to the question."
27,"Based on the nutritional information provided, the Hand Me Down Chocolate Cake contains 14 grams of total fat, 28 grams of saturated fat, and 11 grams of carbohydrates. Since it also has a calorie status of Low Calory, I would say that eating this cake is unlikely to cause significant weight gain, as it is relatively low in calories and fat. However, it's important to keep in mind that consuming large quantities or multiple servings of this cake could potentially lead to weight gain. Enjoy in moderation!",9363_15,Would I gain weight eating Hand Me Down Chocolate Cake?,RELEVANT,"The generated answer directly addresses the question by providing information about the nutritional content of the Hand Me Down Chocolate Cake and its potential impact on weight gain. The answer does not simply repeat the question but provides a detailed explanation and cautionary advice, making it relevant to the question."
37,"I apologize, but the context does not provide any information about the amount of salt in the Chocolate Banana Cake. In fact, there's no mention of salt in any of the reviews or ratings provided. If you're looking for nutritional information, the answer is that the sodium content is not specified.",32082_5,Can you tell me the amount of salt in the Chocolate Banana Cake?,RELEVANT,"Although the answer does not provide a direct answer to the question, it does provide relevant information in relation to the question. The answer acknowledges that the context does not provide information about the amount of salt, and provides an additional piece of information about the sodium content being unspecified, which still offers some insight into the question."
39,"Based on the nutritional information provided, to reduce the sodium in the Winklers Moravian Sugar Cake, here are some suggestions:\n\n1. **Use low-sodium ingredients**: Opt for low-sodium versions of ingredients like butter, sugar, and spices to decrease the overall sodium content.\n2. **Reduce the amount of salt**: Try using half the amount of salt called for in the recipe or omitting it altogether if possible.\n3. **Choose unsalted or low-sodium sweeteners**: Instead of white granulated sugar, which contains a significant amount of sodium, consider using unsalted or low-sodium sweeteners like honey, maple syrup, or coconut sugar.\n4. **Flavor with herbs and spices**: Instead of relying on salt to add flavor, try using herbs and spices like cinnamon, nutmeg, or ginger to give the cake a boost of flavor without adding sodium.\n\nBy implementing these suggestions, you should be able to reduce the sodium content of the Winklers Moravian Sugar Cake and make it a slightly healthier option.",26575_5,how can i reduce the sodium in the winklers moravian sugar cake?,RELEVANT,"The generated answer directly addresses the question of how to reduce sodium in the Winklers Moravian Sugar Cake, providing specific suggestions and alternatives to reduce sodium content."


In [124]:
import streamlit as st
import time
from dotenv import load_dotenv
import os
import openai  # Fix the import statement
from groq import Groq
import minsearch
import json

# Load environment variables
load_dotenv()

# Setup the OpenAI client to use either Groq, OpenAI.com, or Ollama API
API_HOST = os.getenv("API_HOST")
API_HOST

'groq'

NameError: name 'question_embeddings' is not defined