In [1]:
import json
import pandas as pd
import elasticsearch
from dotenv import load_dotenv
import os
load_dotenv()
from sklearn.feature_extraction.text import CountVectorizer


In [2]:
import os
os.getcwd()

'd:\\Projects\\AI-Restaurent-Chat-bot\\notebooks'

In [3]:
# change path for base folder
base_folder = 'D:/Projects/AI-Restaurent-Chat-bot/'
input_data_folder = base_folder+'input_data/'

In [4]:
# openai api
from openai import OpenAI
client = OpenAI()
openai_api_key = os.getenv("OPENAI_API_KEY")

In [5]:
with open(input_data_folder + 'food_user_qa_dataset.json', 'rt') as f_in:
    data = json.load(f_in)

In [6]:
documents = []
for dish in data['dishes']:
    dish_name = dish['dish name']
    for doc in dish['documents']:
        doc['dish_name'] = dish_name  # Add dish_name to each document
        documents.append(doc)

In [7]:
documents[1]

{'id': '142_2',
 'question': 'How many calories does it have?',
 'section': 'calories',
 'text': 'The almond fudge banana cake has 224.8 calories.',
 'dish_name': 'almond fudge banana cake'}

In [8]:
import wget
url= "https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py"
#wget.download(url)



In [9]:
import minsearch

In [10]:
index = minsearch.Index(
    text_fields = ['id', 'question','section','text','dish_name'],
    keyword_fields=['dish_name']
)

In [11]:
index.fit(documents)

<minsearch.Index at 0x14feed7aed0>

In [12]:
question ="pistachio cake  from mix"

In [13]:
def minsearch(question):
    return index.search(question)

In [14]:
def elastic_search(query, dish_name=None):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": [
                    {
                        "multi_match": {
                            "query": query,
                            "fields": ["dish_name^6", "question^3", "text", "section"],
                            "type": "best_fields"
                        }
                    }
                ]
            }
        }
    }

    # Add a filter by dish_name if it's provided
    if dish_name:
        search_query["query"]["bool"]["filter"] = {
            "term": {
                "dish_name.keyword": dish_name
            }
        }

    # Perform the search
    response = es.search(index=index_name, body=search_query)

    result_docs = []

    for hit in response['hits']['hits']:
        doc = hit['_source']
        result_docs.append(doc)  # Collect all documents
        

    return result_docs


In [15]:
search_results= minsearch(question)
search_results

[{'id': '15664_12',
  'question': 'What is the rating of this dish?',
  'section': 'rating',
  'text': 'The pistachio cake  from mix has a rating of 5.',
  'dish_name': 'pistachio cake  from mix'},
 {'id': '15664_6',
  'question': 'What is the protein content?',
  'section': 'nutritional',
  'text': 'The pistachio cake  from mix has 6 grams of protein (PDV).',
  'dish_name': 'pistachio cake  from mix'},
 {'id': '15664_5',
  'question': 'How much sodium is in this dish?',
  'section': 'nutritional',
  'text': 'The pistachio cake  from mix has 9 grams of sodium (PDV).',
  'dish_name': 'pistachio cake  from mix'},
 {'id': '15664_8',
  'question': 'What are the carbohydrate levels?',
  'section': 'nutritional',
  'text': 'The pistachio cake  from mix has 8 grams of carbohydrates (PDV).',
  'dish_name': 'pistachio cake  from mix'},
 {'id': '15664_7',
  'question': 'How much saturated fat is present?',
  'section': 'nutritional',
  'text': 'The pistachio cake  from mix has 9 grams of saturat

In [21]:
documents[1]

{'id': '142_2',
 'question': 'How many calories does it have?',
 'section': 'calories',
 'text': 'The almond fudge banana cake has 224.8 calories.',
 'dish_name': 'almond fudge banana cake'}

In [None]:
# Converting our database into vectors.



In [16]:
# if using OPENAI 
from openai import OpenAI
client = OpenAI()

def llm(prompt):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [17]:
# iF USING groq
"""
from groq import Groq   

#create client calling Groq class

client = Groq(api_key=os.getenv('GROQ_API_KEY'))

from openai import OpenAI
client = OpenAI()

def llm(prompt):
    response = client.chat.completions.create(
        model="llama3-8b-8192",
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

"""

'\nfrom groq import Groq   \n\n#create client calling Groq class\n\nclient = Groq(api_key=os.getenv(\'GROQ_API_KEY\'))\n\nfrom openai import OpenAI\nclient = OpenAI()\n\ndef llm(prompt):\n    response = client.chat.completions.create(\n        model="llama3-8b-8192",\n        messages=[{"role": "user", "content": prompt}]\n    )\n    \n    return response.choices[0].message.content\n\n'

In [18]:
def build_prompt(query, search_results):
    prompt_template = """
You're an AI assistant helping with menu queries. Answer the QUESTION based on the CONTEXT provided.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context += f"Section: {doc['section']}\nQuestion: {doc['question']}\nAnswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt


In [19]:
def rag(query):
    search_results = minsearch(query)
    prompt=build_prompt(query,search_results)
    answer = llm(prompt)
    return answer

In [20]:
query = "what are the calories of white chocolate cake is it good?"
rag(query)

'Based on the context provided, the white chocolate cake has 314.9 calories.'