In [18]:
#Suppressing unnecessary warnings
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import warnings
warnings.filterwarnings("ignore")
import logging
logging.disable(logging.CRITICAL)
logging.getLogger("chromadb").setLevel(logging.ERROR)

In [20]:
!pip install -q -U \
    numpy==1.26.4 \
    sentence-transformers==3.0.1 \
    langchain==0.3.19 \
    langchain-groq==0.2.4 \
    langchain-community==0.3.18 \
    langchain-huggingface==0.1.2 \
    einops==0.8.1

In [25]:
import os
import getpass
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import ChatPromptTemplate
from langchain.document_loaders import WebBaseLoader

In [4]:
os.environ["GROQ_API_KEY"] = getpass.getpass()

··········


In [5]:
os.environ["HF_TOKEN"] = getpass.getpass()

··········


In [6]:
def load_and_process_data(url):
    loader = WebBaseLoader(url)
    data = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = text_splitter.split_documents(data)
    return chunks

In [7]:
def create_vector_store(chunks):
    embeddings = HuggingFaceEmbeddings(model_name="nomic-ai/nomic-embed-text-v1.5", model_kwargs = {'trust_remote_code': True})
    vectorstore = Chroma.from_documents(chunks, embeddings)
    return vectorstore

In [23]:
def hyde_rag(query, vectorstore, llm):
    hyde_prompt = ChatPromptTemplate.from_template("Given the following question, generate a hypothetical passage that would answer this question:\nQuestion: {query}\nHypothetical Passage:")
    hyde_chain = hyde_prompt | llm
    hypothetical_doc = hyde_chain.invoke({"query": query})
    retrieved_docs = vectorstore.similarity_search(hypothetical_doc.content, k=3)
    context = "\n".join([doc.page_content for doc in retrieved_docs])
    final_prompt = ChatPromptTemplate.from_template("Based on the following context, please answer the question:\nContext: {context}\nQuestion: {query}\nAnswer:")
    final_chain = final_prompt | llm
    final_response = final_chain.invoke({"context": context, "query": query})
    return {
        "hypothetical_document": hypothetical_doc.content,
        "retrieved_context": context,
        "final_answer": final_response.content
    }

In [24]:
llm = ChatGroq(model="llama-3.1-8b-instant", temperature=0.5)
url = "https://en.wikipedia.org/wiki/Burger_King"
chunks = load_and_process_data(url)
vectorstore = create_vector_store(chunks)

In [15]:
query = "What are the most popular items in burger king?"
result = hyde_rag(query, vectorstore, llm)

print("Hypothetical Document:")
print(result["hypothetical_document"])
print("\nRetrieved Context:")
print(result["retrieved_context"])
print("\nFinal Answer:")
print(result["final_answer"])

Hypothetical Document:
Burger King is a well-known fast-food chain that offers a wide variety of menu items to cater to diverse tastes and preferences. Among its extensive offerings, some items have gained immense popularity over the years, becoming staples in the minds of customers. Here are some of the most popular items in Burger King:

One of the most iconic items on the Burger King menu is the Whopper, a flame-grilled burger that consists of a beef patty, lettuce, tomato, mayonnaise, ketchup, pickles, and onions, all sandwiched between a soft sesame seed bun. This mouth-watering sandwich has been a customer favorite for decades and is often considered the quintessential Burger King experience.

Another extremely popular item is the Impossible Whopper, a plant-based version of the classic Whopper. This innovative offering features a beef patty replaced by an Impossible Foods patty, which is made from plant-based ingredients and has a similar taste and texture to traditional beef. T