In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from langchain_groq import ChatGroq
models = {"Google": ["gemma2-9b-it", "gemma-7b-it"],
          "Groq": ["llama3-groq-70b-8192-tool-use-preview", "llama3-groq-8b-8192-tool-use-preview"],
          "Meta": ["llama-3.1-70b-versatile", "llama-3.1-8b-instant", "llama-3.2-1b-preview", "llama-3.2-3b-preview", "llama-3.2-11b-vision-preview", "llama-3.2-90b-vision-preview", "llama-guard-3-8b", "llama3-70b-8192", "llama3-8b-8192"],
          "Mistral": ["mixtral-8x7b-32768"],
          "OpenAI": ["whisper-large-v3", "whisper-large-v3-turbo"]}
model = ChatGroq(temperature=0, model=models["Meta"][0])

In [3]:
from langchain.prompts import ChatPromptTemplate

template = """
You are a helpful assistant that generatess multiple search queries based on a single input query. \n 
Generate multiple search queries related to: {question} \n
Output (4 queries):
"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [4]:
prompt_template = """Answer the following question based on this context:
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(prompt_template)

In [5]:
from langchain.document_loaders import WebBaseLoader
import bs4
loader = WebBaseLoader(
    web_path=("http://blog.langchain.dev/deconstructing-rag/"),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("article-header section", "article-main section")
        )
    )
)
blog_docs = loader.load()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300,
    chunk_overlap=50
)
splits = text_splitter.split_documents(blog_docs)

In [7]:
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
vectorstore = Chroma.from_documents(documents=splits,
                                    embedding=HuggingFaceEmbeddings())

retriever = vectorstore.as_retriever()
print(f"number of document is : {vectorstore._collection.count()}")

  from tqdm.autonotebook import tqdm, trange


number of document is : 11


In [8]:
from langchain_core.output_parsers import StrOutputParser

generate_queries = (
    prompt_rag_fusion
    | model
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

In [9]:
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    fused_scores = {}

    for docs in results:
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0

            previous_score = fused_scores[doc_str]
            fused_scores[doc_str] += 1 / (rank + k)

    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]
    return reranked_results

In [10]:
from operator import itemgetter
from IPython.display import Markdown
def query(query):
    # question = "how can we make retrieal robust to variability in user input?"

    retrieval_chain_rag_fusion = (
        generate_queries
        | retriever.map()
        | reciprocal_rank_fusion
    )
    # docs = retrieval_chain_rag_fusion.invoke({"question": question})
    # print(f"length of documents is {len(docs)}")
    final_rag_chain = (
        {"context": retrieval_chain_rag_fusion, "question": itemgetter("question")}
        | prompt
        | model
        | StrOutputParser()
    )
    result = final_rag_chain.invoke({"question": query})
    display(Markdown(result))
    return result

In [11]:
query("Hello. What is the meaning of retrieval?")

  (loads(doc), score)


Retrieval, in the context of Retrieval Augmented Generation (RAG), refers to the process of retrieving information from a data source (or sources) based on a user's question or query. This information is then passed to a Large Language Model (LLM) to generate a response.

"Retrieval, in the context of Retrieval Augmented Generation (RAG), refers to the process of retrieving information from a data source (or sources) based on a user's question or query. This information is then passed to a Large Language Model (LLM) to generate a response."