In [137]:
# Import necessary libraries
import json
import pandas as pd
import openai
import elasticsearch
from groq import Groq
from dotenv import load_dotenv
import os
from sklearn.feature_extraction.text import CountVectorizer
from tqdm.auto import tqdm

In [138]:
# Load environment variables
load_dotenv()

True

In [139]:
# Setup project paths
base_folder = 'D:/Projects/AI-Restaurent-Chat-bot/'
input_data_folder = base_folder + 'input_data/'


In [140]:
# Get the current working directory (optional step)
os.getcwd()

'd:\\Projects\\AI-Restaurent-Chat-bot\\notebooks'

In [141]:
# Setup the OpenAI client to use either Groq, OpenAI.com, or Ollama API
load_dotenv(override=True)
API_HOST = os.getenv("API_HOST")
API_HOST

'openai'

In [142]:
if API_HOST == "groq":
    client = client = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)
    MODEL_NAME = os.getenv("GROQ_MODEL")

elif API_HOST == "ollama":
    client = openai.OpenAI(
        base_url=os.getenv("OLLAMA_ENDPOINT"),
        api_key="nokeyneeded",
    )
    MODEL_NAME = os.getenv("OLLAMA_MODEL")

elif API_HOST == "github":
    client = openai.OpenAI(base_url="https://models.inference.ai.azure.com", api_key=os.getenv("GITHUB_TOKEN"))
    MODEL_NAME = os.getenv("GITHUB_MODEL")

else:
    client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
    MODEL_NAME = os.getenv("OPENAI_MODEL")

In [143]:
client

<openai.OpenAI at 0x2146fb25a30>

In [20]:
MODEL_NAME

'llama3-8b-8192'

In [21]:
with open(input_data_folder + 'food_user_qa_dataset.json', 'rt') as f_in:
    data = json.load(f_in)

In [22]:
documents = []
for dish in data['dishes']:
    dish_name = dish['dish name']
    for doc in dish['documents']:
        doc['dish_name'] = dish_name  # Add dish_name to each document
        documents.append(doc)

In [23]:
documents[1:3]

[{'id': '142_2',
  'question': 'How many calories does it have?',
  'section': 'calories',
  'text': 'The almond fudge banana cake has 224.8 calories.',
  'dish_name': 'almond fudge banana cake'},
 {'id': '142_3',
  'question': 'How much total fat does it contain?',
  'section': 'nutritional',
  'text': 'The almond fudge banana cake has 14 grams of total fat (PDV).',
  'dish_name': 'almond fudge banana cake'}]

Retreival evaluation

In [24]:
import minsearch

index = minsearch.Index(
    text_fields = ['id', 'question','section','text','dish_name'],
    keyword_fields=['dish_name']
)

index.fit(documents)

<minsearch.Index at 0x2146fd03e90>

In [25]:
#import ground truth dataset
import pandas as pd
ground_df = pd.read_csv('ground-truth-data.csv')

In [26]:
ground_truth = ground_df.to_dict(orient='records')

In [27]:
documents[1]

{'id': '142_2',
 'question': 'How many calories does it have?',
 'section': 'calories',
 'text': 'The almond fudge banana cake has 224.8 calories.',
 'dish_name': 'almond fudge banana cake'}

In [28]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)


In [29]:
def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

In [30]:
def minsearch_search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={'question': query},
        boost_dict=boost,
        num_results=5
    )

    return results

In [31]:
ground_df.head()

Unnamed: 0,question,id
0,Is almond fudge banana cake made with real ban...,142_1
1,Does almond fudge banana cake contain dairy?,142_1
2,Can I be sure there are no artificial flavors ...,142_1
3,Are the ingredients in almond fudge banana cak...,142_1
4,May I know the types of flour and sugar used i...,142_1


In [32]:
def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['id']
        results = search_function(q)
        relevance = [d['id'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

In [33]:
from tqdm.auto import tqdm 

In [34]:
evaluate(ground_truth, lambda q: minsearch_search(q['question']))

  0%|          | 0/16527 [00:00<?, ?it/s]

{'hit_rate': 0.4593695165486779, 'mrr': 0.33671466892559154}

In [35]:
import pandas as pd

def visualize_search_results(ground_truth, search_function):
    # Create an empty list to store the results
    search_results = []

    # Iterate over each query in the ground truth dataset
    for q in tqdm(ground_truth):
        doc_id = q['id']
        ground_truth_question = q['question']
        
        # Perform the search using the provided function
        results = search_function(q['question'])
        
        # Collect the top result from the search
        if results:
            matched_question = results[0]['question']
            matched_doc_id = results[0]['id']
        else:
            matched_question = None
            matched_doc_id = None

        # Append the comparison of the ground truth with the top matched result
        search_results.append({
            'Ground Truth ID': doc_id,
            'Ground Truth Question': ground_truth_question,
            'Matched Document ID': matched_doc_id,
            'Matched Question': matched_question
        })

    # Convert the results into a Pandas DataFrame for visualization
    results_df = pd.DataFrame(search_results)
    
    # Display the DataFrame
    pd.set_option('display.max_colwidth', None)
    return results_df

# Visualize the search results
results_df = visualize_search_results(ground_truth, lambda q: minsearch_search(q))
print(results_df.head())

# Optionally, you can also save this as an Excel or CSV file for further inspection
results_df.to_csv('search_results_comparison.csv', index=False)


  0%|          | 0/16527 [00:00<?, ?it/s]

  Ground Truth ID  \
0           142_1   
1           142_1   
2           142_1   
3           142_1   
4           142_1   

                                                        Ground Truth Question  \
0                         Is almond fudge banana cake made with real bananas?   
1                                Does almond fudge banana cake contain dairy?   
2  Can I be sure there are no artificial flavors in almond fudge banana cake?   
3         Are the ingredients in almond fudge banana cake free from liqueurs?   
4   May I know the types of flour and sugar used in almond fudge banana cake?   

  Matched Document ID                          Matched Question  
0              142_11  What tags are associated with this dish?  
1               142_3       How much total fat does it contain?  
2               142_5          How much sodium is in this dish?  
3               142_1                 What are the ingredients?  
4               142_4                What is the sugar c

In [36]:
ground_df.head(20)

Unnamed: 0,question,id
0,Is almond fudge banana cake made with real bananas?,142_1
1,Does almond fudge banana cake contain dairy?,142_1
2,Can I be sure there are no artificial flavors in almond fudge banana cake?,142_1
3,Are the ingredients in almond fudge banana cake free from liqueurs?,142_1
4,May I know the types of flour and sugar used in almond fudge banana cake?,142_1
5,How many calories does the Almond Fudge Banana Cake have?,142_2
6,Can I get nutrition facts for the Almond Fudge Banana Cake?,142_2
7,What's the calorie count for the Almond Fudge Banana Cake?,142_2
8,How many calories is the Almond Fudge Banana Cake?,142_2
9,What's the calorie content of the Almond Fudge Banana Cake?,142_2


In [37]:
documents[1:20]

[{'id': '142_2',
  'question': 'How many calories does it have?',
  'section': 'calories',
  'text': 'The almond fudge banana cake has 224.8 calories.',
  'dish_name': 'almond fudge banana cake'},
 {'id': '142_3',
  'question': 'How much total fat does it contain?',
  'section': 'nutritional',
  'text': 'The almond fudge banana cake has 14 grams of total fat (PDV).',
  'dish_name': 'almond fudge banana cake'},
 {'id': '142_4',
  'question': 'What is the sugar content?',
  'section': 'nutritional',
  'text': 'The almond fudge banana cake has 87 grams of sugar (PDV).',
  'dish_name': 'almond fudge banana cake'},
 {'id': '142_5',
  'question': 'How much sodium is in this dish?',
  'section': 'nutritional',
  'text': 'The almond fudge banana cake has 10 grams of sodium (PDV).',
  'dish_name': 'almond fudge banana cake'},
 {'id': '142_6',
  'question': 'What is the protein content?',
  'section': 'nutritional',
  'text': 'The almond fudge banana cake has 7 grams of protein (PDV).',
  'dish_

## The RAG flow


In [38]:
def build_prompt(query, search_results):
    prompt_template = """
You're an AI assistant helping with menu queries. Answer the QUESTION based on the CONTEXT provided.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context += f"Section: {doc['section']}\nQuestion: {doc['question']}\nAnswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt


In [44]:
client

<groq.Groq at 0x2146fa31250>

In [45]:

def llm(prompt):
    response = client.chat.completions.create(
        model= MODEL_NAME,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [46]:
import minsearch

index = minsearch.Index(
    text_fields = ['id', 'question','section','text','dish_name'],
    keyword_fields=['dish_name']
    #keyword_fields=['id']

)


In [47]:
index.fit(documents)

<minsearch.Index at 0x214712a63f0>

In [48]:
def minsearch(question):
    return index.search(question)

In [49]:
def rag(query):
    search_results = minsearch(query)
    prompt=build_prompt(query,search_results)
    answer = llm(prompt)
    return answer


In [53]:
query = "what are the calories of white chocolate cake is it good?"
query = "what are the ingredients in white chocolate cake?"

rag(query)

"According to the provided context, the ingredients in the White Chocolate Cake are:\n\n\n['white chocolate', 'butter', 'sugar', 'egg yolks', 'vanilla', 'cake flour', 'baking powder', 'salt', 'buttermilk', 'pecans', 'flaked coconut', 'egg whites', 'flour', 'milk']"

In [68]:
ground_truth[2]['question']

'Can I be sure there are no artificial flavors in almond fudge banana cake?'

In [69]:
rag(ground_truth[1]['question'])

'Based on the CONTEXT provided, the QUESTION "Does almond fudge banana cake contain dairy?" cannot be answered with a "yes" or "no" because there is no information about the presence or absence of dairy in the provided nutritional information, category, or any other section.'

## RAG Evaluation - LLM as a judge

In [72]:

prompt2_template = """
You are an expert evaluator for a Retrieval-Augmented Generation (RAG) system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: {question}
Generated Answer: {answer_llm}

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}}
""".strip()

In [73]:
len(ground_df)

16527

In [78]:
record = ground_truth[0]
question =  record['question']
answer_llm = rag(question)
answer_llm

'Based on the context, I can answer your question:\n\nIs almond fudge banana cake made with real bananas?\n\nThe answer is likely "Yes", because the dish has bananas in its name and the carbohydrates content is provided, which suggest the presence of real bananas in the recipe.'

In [79]:
prompt = prompt2_template.format(question=question,answer_llm=answer_llm)
prompt

'You are an expert evaluator for a Retrieval-Augmented Generation (RAG) system.\nYour task is to analyze the relevance of the generated answer to the given question.\nBased on the relevance of the generated answer, you will classify it\nas "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".\n\nHere is the data for evaluation:\n\nQuestion: Is almond fudge banana cake made with real bananas?\nGenerated Answer: Based on the context, I can answer your question:\n\nIs almond fudge banana cake made with real bananas?\n\nThe answer is likely "Yes", because the dish has bananas in its name and the carbohydrates content is provided, which suggest the presence of real bananas in the recipe.\n\nPlease analyze the content and context of the generated answer in relation to the question\nand provide your evaluation in parsable JSON without using code blocks:\n\n{\n  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",\n  "Explanation": "[Provide a brief explanation for your evaluation]"\n}'

In [81]:
record = ground_truth[0]
record

{'question': 'Is almond fudge banana cake made with real bananas?',
 'id': '142_1'}

In [89]:
evaluations = {}


In [119]:
sample_ground_truth =ground_truth[1:30]
sample_ground_truth

[{'question': 'Does almond fudge banana cake contain dairy?', 'id': '142_1'},
 {'question': 'Can I be sure there are no artificial flavors in almond fudge banana cake?',
  'id': '142_1'},
 {'question': 'Are the ingredients in almond fudge banana cake free from liqueurs?',
  'id': '142_1'},
 {'question': 'May I know the types of flour and sugar used in almond fudge banana cake?',
  'id': '142_1'},
 {'question': 'How many calories does the Almond Fudge Banana Cake have?',
  'id': '142_2'},
 {'question': 'Can I get nutrition facts for the Almond Fudge Banana Cake?',
  'id': '142_2'},
 {'question': "What's the calorie count for the Almond Fudge Banana Cake?",
  'id': '142_2'},
 {'question': 'How many calories is the Almond Fudge Banana Cake?',
  'id': '142_2'},
 {'question': "What's the calorie content of the Almond Fudge Banana Cake?",
  'id': '142_2'},
 {'question': 'How much fat does the Almond Fudge Banana Cake have?',
  'id': '142_3'},
 {'question': 'What is the total fat content of t

In [120]:
for record in tqdm(sample_ground_truth):
    id = record['id']

    if id in evaluations:
        continue
    question =  record['question']
    answer_llm = rag(question)

  0%|          | 0/29 [00:00<?, ?it/s]

In [136]:
for record in tqdm(sample):
    question = record['question']
    answer_llm = rag(question) 

    prompt = prompt2_template.format(
        question=question,
        answer_llm=answer_llm
    )

    evaluation = llm(prompt)

    # Log the evaluation to inspect if it's valid JSON
    print(f"Evaluation for question {question}: {evaluation}")

    try:
        # Try to parse the evaluation response as JSON
        evaluation = json.loads(evaluation)
    except json.JSONDecodeError as e:
        # Handle JSON decode errors gracefully
        print(f"JSONDecodeError for question {question}: {e}")
        evaluation = None  # You can decide what to do in case of error

    evaluations.append((record, answer_llm, evaluation))


  0%|          | 0/200 [00:00<?, ?it/s]

Evaluation for question Are the cod cakes browned on both sides: {
"Relevance": "RELEVANT",
"Explanation": "The generated answer directly addresses the question and provides a clear and specific answer, citing the steps to prepare the dish as evidence. The answer is relevant because it provides the correct information about whether the cod cakes are browned on both sides."
JSONDecodeError for question Are the cod cakes browned on both sides: Expecting ',' delimiter: line 3 column 284 (char 310)
Evaluation for question Can you tell me the preparation time for Coconut Shrimp Curry?: {
  "Relevance": "RELEVANT",
  "Explanation": "The generated answer is relevant to the question as it directly addresses the user's inquiry about the preparation time for Coconut Shrimp Curry. Although the answer does not provide the actual preparation time, it acknowledges the absence of relevant information in the given context and offers an alternative solution, making it relevant to the user's request."
J

InternalServerError: Error code: 503 - {'error': {'message': 'Service Unavailable', 'type': 'internal_server_error'}}

In [116]:
evaluations

{'142_1': {'id': '142_1',
  'question': 'Does almond fudge banana cake contain dairy?',
  'answer_llm': 'Based on the context, there is no mention of dairy in the context of the almond fudge banana cake.',
  'evaluation': {'Relevance': 'RELEVANT',
   'Explanation': "The, generated, answer, directly, addresses, the, question 'Does, almond, fudge, banana, cake, contain, dairy?', by, stating, that, there, is, no, mention, of, dairy, in, the, context, of, the, cake,, providing, a, clear, and, accurate, response, to, the, question."}}}

In [117]:
df_eval = pd.DataFrame(evaluations).T

In [118]:
df_eval

Unnamed: 0,id,question,answer_llm,evaluation
142_1,142_1,Does almond fudge banana cake contain dairy?,"Based on the context, there is no mention of dairy in the context of the almond fudge banana cake.","{'Relevance': 'RELEVANT', 'Explanation': 'The, generated, answer, directly, addresses, the, question 'Does, almond, fudge, banana, cake, contain, dairy?', by, stating, that, there, is, no, mention, of, dairy, in, the, context, of, the, cake,, providing, a, clear, and, accurate, response, to, the, question.'}"


## RAG EVALUATION

In [123]:
prompt2_template = """
You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: {question}
Generated Answer: {answer_llm}

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}}
""".strip()

In [124]:
len(ground_truth)


16527

In [125]:
record = ground_truth[0]


In [126]:
print(answer_llm)


Based on the CONTEXT, the salt content in the Almond Fudge Banana Cake is:

Answer: The almond fudge banana cake has 10 grams of sodium (PDV).


In [127]:
prompt = prompt2_template.format(question=question, answer_llm=answer_llm)
print(prompt)

You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: What's the salt content in the Almond Fudge Banana Cake?
Generated Answer: Based on the CONTEXT, the salt content in the Almond Fudge Banana Cake is:

Answer: The almond fudge banana cake has 10 grams of sodium (PDV).

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}


In [128]:
import json


In [144]:
df_sample = ground_df.sample(n=50, random_state=1)


In [145]:
sample = df_sample.to_dict(orient='records')


In [146]:
evaluations = []

for record in tqdm(sample):
    question = record['question']
    answer_llm = rag(question) 

    prompt = prompt2_template.format(
        question=question,
        answer_llm=answer_llm
    )

    evaluation = llm(prompt)
    evaluation = json.loads(evaluation)

    evaluations.append((record, answer_llm, evaluation))

  0%|          | 0/50 [00:00<?, ?it/s]

In [147]:
df_eval = pd.DataFrame(evaluations, columns=['record', 'answer', 'evaluation'])

df_eval['id'] = df_eval.record.apply(lambda d: d['id'])
df_eval['question'] = df_eval.record.apply(lambda d: d['question'])

df_eval['relevance'] = df_eval.evaluation.apply(lambda d: d['Relevance'])
df_eval['explanation'] = df_eval.evaluation.apply(lambda d: d['Explanation'])

del df_eval['record']
del df_eval['evaluation']

In [148]:
df_eval.relevance.value_counts(normalize=True)


relevance
RELEVANT           0.38
PARTLY_RELEVANT    0.36
NON_RELEVANT       0.26
Name: proportion, dtype: float64

In [150]:
df_eval.to_csv(input_data_folder+'rag-eval-gpt-3.5-turbo.csv', index=False)


In [152]:
df_eval[df_eval.relevance == 'RELEVANT']


Unnamed: 0,answer,id,question,relevance,explanation
0,"The cod cakes are fried in a skillet for 4-5 minutes on each side until they are golden brown, so yes, they are browned on both sides.",11252_10,Are the cod cakes browned on both sides,RELEVANT,The generated answer directly addresses the question by confirming that the cod cakes are browned on both sides after being fried in a skillet for 4-5 minutes. It provides a clear and relevant response to the question.
4,Section: prep_time\nQuestion: What is the prep time for Milky Way Lover's Pound Cake?\nAnswer: The prep time for Milky Way Lover's Pound Cake is 30 minutes.,31334_9,Milky Way Lover's Pound Cake prep time,RELEVANT,The generated answer directly addresses the question by providing the specific prep time of 30 minutes for Milky Way Lover's Pound Cake. It is fully relevant to the question asked.
7,"No, the rhubarb stir cake does not have a high carb count. It contains 17 grams of carbohydrates.",14457_8,Does the rhubarb stir cake have a high carb count?,RELEVANT,"The generated answer directly addresses the question by stating that the rhubarb stir cake does not have a high carb count. It also provides specific information about the amount of carbohydrates it contains (17 grams), which further supports its relevance to the question."
9,Section: nutritional\nQuestion: What is the fat content of the key lime cheesecake squares?\nAnswer: The key lime cheesecake squares has 6 grams of total fat (PDV).,18007_7,What's the fat content of the key lime cheesecake squares?,RELEVANT,"The generated answer directly addresses the fat content of the key lime cheesecake squares mentioned in the question, providing the specific amount of total fat (6 grams). This indicates a clear and accurate answer to the question asked."
11,"Instruction for baking the 1 Pan Fudge Cake:\n1. Preheat oven to 350 degrees\n2. In an ungreased 9 x 13 pan (yes ungreased), sift all dry ingredients\n3. Add the liquids and stir just until blended\n4. Bake for 25 minutes\n5. Frost with your favorite frosting, cake is extremely moist so care must be taken that you don't tear up the top of the cake.",29935_10,What's the instruction for baking the 1 Pan Fudge Cake?,RELEVANT,"The generated answer provides the step-by-step instructions for baking the 1 Pan Fudge Cake as requested in the question. It includes details such as preheating the oven, using an ungreased pan, sifting dry ingredients, adding liquids, baking time, and frosting the cake. The answer directly addresses the question and is relevant to the topic."
13,"Section: ingredients\nQuestion: Tell me the Million Dollar Pound Cake's composition?\nAnswer: The million dollar pound cake contains the following ingredients: butter, sugar, eggs, all-purpose flour, milk, almond extract, vanilla extract.",16489_1,Tell me the Million Dollar Pound Cake's composition?,RELEVANT,"The answer directly provides the composition of the Million Dollar Pound Cake, listing all the ingredients included."
14,"Yes, based on the nutritional information provided, carrot snack cake is likely a sweet dish as it contains 46 grams of sugar.",8847_14,Is carrot snack cake a sweet dish?,RELEVANT,The generated answer directly answers the question by confirming that carrot snack cake is indeed a sweet dish based on the high sugar content mentioned in the nutritional information provided. The answer is clear and directly relevant to the question.
18,"No, the Momma's Fair Funnel Cake is not high in salt as it only contains 16 grams of sodium.",916_5,Is the Momma's Fair Funnel Cake high in salt?,RELEVANT,The generated answer directly addresses the question by stating that Momma's Fair Funnel Cake is not high in salt and provides a specific amount of sodium it contains. This information is relevant as it directly addresses the salt content of the funnel cake.
21,The peach upside down pudding cake contains 413.0 calories.,19324_2,How many calories does the peach upside down pudding cake contain,RELEVANT,"The generated answer directly addresses the question by providing the specific number of calories in the peach upside down pudding cake, which is 413.0 calories. This makes the answer relevant to the question asked."
23,Flavor profile: The 7 up pound cake has a lemon-lime flavor profile.,2923_14,What flavor profile does 7 up pound cake have?,RELEVANT,"The generated answer directly addresses the question by stating that the 7 up pound cake has a lemon-lime flavor profile, which is relevant to the flavor profile of the cake."
