# Query Transformations

In [1]:
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

True

In [2]:
import os 

os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = "https://api.smith.langchain.com"
os.environ['LANGCHAIN_PROJECT'] = "langchain-tutorial"
os.environ['LANGCHAIN_API_KEY'] = os.getenv("LANGCHAIN_API_KEY")
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')

In [3]:
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.huggingface import HuggingFaceBgeEmbeddings
from langchain import hub
from langchain_groq import ChatGroq
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

### INDEXING ###

# 1. Load Documents
loader = WebBaseLoader(
    web_path=("https://lilianweng.github.io/posts/2024-07-07-hallucination/",), 
    bs_kwargs=dict(
        parse_only = bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

docs = loader.load()

# 2. Split the documents/Chunking
## chunk first 1000 characters, then take next 1000 but overlap 200, eg: 1 - 1000, 800 - 1800 (We do this to reduce the error due to losing context )
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
splits = text_splitter.split_documents(documents=docs)

# 3. Embedding Documents.
## See if you can find a better model for embeddings  
model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}

hf_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name, 
    model_kwargs = model_kwargs, 
    encode_kwargs = encode_kwargs
)

vector_store = FAISS.from_documents(
    documents=splits, 
    embedding=hf_embeddings
)

# Taking Dense Retrieval - Embeddings/Context Based
retriever = vector_store.as_retriever()

### Retrieval & Generation ### 
prompt = hub.pull("rlm/rag-prompt")
# Since this wasnt working for me I checked the documentation and created my own function
# prompt = lang_prompt

llm = ChatGroq(
    model = "llama3-8b-8192", 
    temperature = 0
)

# post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt 
    | llm
    | StrOutputParser()
)

# Question 
print(rag_chain.invoke("What is in-context hallucination?"))
 

USER_AGENT environment variable not set, consider setting it to identify your requests.
  from tqdm.autonotebook import tqdm, trange


In-context hallucination refers to the model output being consistent with the source content in context, but not necessarily grounded in world knowledge. This type of hallucination is different from extrinsic hallucination, which is when the model output is not grounded in either the provided context or world knowledge.


## Multi Query

## Indexing

In [7]:
pip install tiktoken

Collecting tiktoken
  Using cached tiktoken-0.8.0-cp312-cp312-win_amd64.whl.metadata (6.8 kB)
Downloading tiktoken-0.8.0-cp312-cp312-win_amd64.whl (883 kB)
   ---------------------------------------- 0.0/883.8 kB ? eta -:--:--
   ---------------------------------------- 883.8/883.8 kB 8.0 MB/s eta 0:00:00
Installing collected packages: tiktoken
Successfully installed tiktoken-0.8.0
Note: you may need to restart the kernel to use updated packages.


In [13]:
#### INDEXING ####
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.huggingface import HuggingFaceBgeEmbeddings
from langchain import hub
from langchain_groq import ChatGroq
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

loader = WebBaseLoader(
    web_path=("https://lilianweng.github.io/posts/2024-07-07-hallucination/", ), 
    bs_kwargs=dict(
        parse_only = bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    )
)

docs = loader.load()

In [14]:
## Spliting
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, 
    chunk_overlap=50
)

splits = text_splitter.split_documents(docs)

In [16]:
model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
hf_embeddings = HuggingFaceBgeEmbeddings(
    model_name = model_name, 
    model_kwargs = model_kwargs, 
    encode_kwargs = encode_kwargs
)

vector_store = FAISS.from_documents(
    documents=splits, 
    embedding=hf_embeddings
)

retriever = vector_store.as_retriever()

In [22]:
from langchain.prompts import ChatPromptTemplate

# Multi Query: Different Perspectives. 

template = """You are an AI Language model assistant. Your task is to generate 5 different versions of the given user question to retrieve relevant documents from a vector database. By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based similarity search. Provide these alternative questions separated by newlines. Original question: {question}
"""

prompt_perspectives = ChatPromptTemplate.from_template(template)

generate_queries = (
    prompt_perspectives
    | ChatGroq(temperature=0.9) 
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

In [23]:
generate_queries.invoke("What is task decomposition")

['1. "Can you explain the concept of breaking down a task into smaller components, also known as task decomposition?"',
 '2. "How does task decomposition work in the context of complex problem-solving?"',
 '3. "What are the methods or techniques used for task decomposition?"',
 '4. "Can you provide examples of task decomposition in real-world scenarios or in project management?"',
 '5. "What is the significance of task decomposition in improving efficiency and managing complex tasks?"']

In [24]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """Unique union of retrieved docs"""

    # Flatten list of lists and convert each document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]

    # Get Unique documents
    unique_docs = list(set(flattened_docs))

    return [loads(doc) for doc in unique_docs]

# Retrieve
question = "What is in-context hallucination?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question": question})

print(len(docs))

7


In [26]:
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough


template = """Answer the following question based on this context: {context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = ChatGroq(temperature=0)

final_rag_chain = (
    {"context": retrieval_chain, 
    "question": itemgetter("question") 
    }
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question": question})

'In-context hallucination refers to the situation where the model output should be consistent with the source content provided in the context, but the model generates unfaithful, fabricated, inconsistent, or nonsensical content that is not grounded in the given context.'

## RAG Fusion

In [27]:
from langchain.prompts import ChatPromptTemplate

# RAG-Fusion Related
template = """You are a helpful assistant that generates multiple search queries based on a single input query\n Generate multiple search queries related to: {question} \n Output (4 queries):"""

prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [28]:
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    """
    Reciprocal rank fusion that takes multiple lists of ranked documents and calculates the RRF value
    """

    fused_score = {}

    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)

            if doc_str not in fused_score:
                fused_score[doc_str] = 0
            
            previous_score = fused_score[doc_str]

            fused_score[doc_str] += 1 / (rank + k)

    ranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_score.items(), key=lambda x: x[1], reverse=True)
    ]

    return ranked_results

retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion

docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)

7

In [31]:
from langchain_core.runnables import RunnablePassthrough

template = """Answer the following question based on this context: 
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain_rag_fusion, 
    "question": itemgetter("question")
    }
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question": question})

'In-context hallucination refers to the situation where the model output should be consistent with the source content in the given context, but the model generates content that is not faithful to the source content. This is one of the two types of hallucination in large language models, with the other type being extrinsic hallucination.'

## Decomposition


In [32]:
from langchain.prompts import ChatPromptTemplate

# Decomposition Prompt
template = """You are a helpful assistant that generates multiple sub-question related to an input question\nThe goal is to break down the input into a set of sub-problems/sub-questions that can be answered in isolation\nGenerate multiple search queries related to {question}\nOutput (3 queries)"""

decomposition_prompt = ChatPromptTemplate.from_template(template)

In [33]:
from langchain_groq import ChatGroq
from langchain_core.output_parsers import StrOutputParser

llm = ChatGroq(temperature=0.9)

# Chain 
generate_decomposition_queries = (decomposition_prompt | llm | StrOutputParser() | (lambda x: x.split("\n")))

# Run
question = "Qhat are the main components of an LLM-powered autonomous agent system?"

questions = generate_decomposition_queries.invoke({"question": question})

In [34]:
questions

['1. "What are the key modules in a language model-based autonomous agent system?"',
 '2. "What are the different components that make up an LLM-powered autonomous agent?"',
 '3. "Can you list the main building blocks of a autonomous agent system using large language models?"']

In [35]:
template = """Here is te question you need to answer:
\n --- \n {question} \n --- \n

Here is any available background question + answer pairs

\n --- \n {q_a_pairs} \n --- \n

Here is additional context relevant to the question

\n --- \n {context} \n --- \n

use the above context and any bakground question + answer pairs to answer the given question: {question}
"""

decomposition_prompt = ChatPromptTemplate.from_template(template)

In [36]:
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser

def format_qa_pair(question, answer): 
    """Format Q and A pairs"""

    formatted_string = ""
    formatted_string += f"Question: {question}\n Answer: {answer}\n\n"

    return formatted_string.strip()

In [37]:
llm = ChatGroq(temperature=0)

q_a_pairs = ""

for q in questions:
    rag_chain = (
        {"context": itemgetter("question") | retriever, "question": itemgetter("question"), "q_a_pairs": itemgetter("q_a_pairs")}
        | decomposition_prompt
        | llm
        | StrOutputParser()
    )

    answer = rag_chain.invoke({"question": q, "q_a_pairs": q_a_pairs})
    q_a_pair = format_qa_pair(q, answer)
    q_a_pairs = q_a_pairs + "\n --- \n" + q_a_pair

In [38]:
answer

"Based on the provided context and background question-answer pairs, the main building blocks of an autonomous agent system using large language models (LLMs) can include:\n\n1. Language Model (LLM): This is the core module responsible for understanding and generating human-like text. It is crucial for processing and responding to user queries.\n\n2. Uncertainty Learning: This module helps the language model learn uncertainty in words, improving the factuality of the generated text. It can help the model better understand and express its level of certainty in the generated responses.\n\n3. Factuality Enhancement: Techniques like fine-grained hallucination detection and editing, factuality-aware alignment, and fine-tuning language models for factuality can be employed in this module to enhance the factuality of the generated text and address issues like hallucinations or factual inconsistencies.\n\n4. Self-Reflection and Critique: This module enables the model to reflect on its generate