<a href="https://colab.research.google.com/github/ArshHp/LLM/blob/main/Multi_Turn_2_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# List of capabilities in this notebook

1. Basic RAG pipeline with public test dataset
2. CRAG using LLM chain filter & Rerankers
3. Rephrase the RAG query based on previpus coversation for correct RAG retrievals
4. Add Multi-tun Logic with Vector Based Embedding approach

In [8]:
!pip install -q langchain
!pip install -q torch
!pip install -q transformers
!pip install -q sentence-transformers
!pip install -q datasets
!pip install -q faiss-cpu
!pip install -q langchain_community

In [9]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain import HuggingFacePipeline

In [10]:
# Define the path to the pre-trained model you want to use
modelPath = "sentence-transformers/all-MiniLM-l6-v2"

# Create a dictionary with model configuration options, specifying to use the CPU for computations
model_kwargs = {'device':'cpu'}

# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False
encode_kwargs = {'normalize_embeddings': False}

# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,     # Provide the pre-trained model's path
    model_kwargs=model_kwargs, # Pass the model configuration options
    encode_kwargs=encode_kwargs # Pass the encoding options
)


  embeddings = HuggingFaceEmbeddings(


In [11]:
import os
os.environ["GROQ_API_KEY"]="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"

In [12]:
!pip install -q langchain-groq

In [13]:
from langchain_groq import ChatGroq

llm = ChatGroq(model="llama-3.1-8b-instant") #Replace with Finetune LLM model, its mainly for quick POC work


Get the external documents for Vector Database

In [14]:
!pip install -q langchain-chroma

In [15]:
from langchain_chroma import Chroma

In [16]:
!gdown 1oWBnoxBZ1Mpeond8XDUSO6J9oAjcRDyW

Downloading...
From (original): https://drive.google.com/uc?id=1oWBnoxBZ1Mpeond8XDUSO6J9oAjcRDyW
From (redirected): https://drive.google.com/uc?id=1oWBnoxBZ1Mpeond8XDUSO6J9oAjcRDyW&confirm=t&uuid=f134eee4-c5ac-48a5-8e35-fbb1b8e7fa90
To: /content/simplewiki-2020-11-01.jsonl.gz
100% 50.2M/50.2M [00:00<00:00, 68.5MB/s]


In [17]:
import gzip
import json

wikipedia_filepath = 'simplewiki-2020-11-01.jsonl.gz'

docs = []
with gzip.open(wikipedia_filepath, 'rt', encoding='utf8') as fIn:
    for line in fIn:
        data = json.loads(line.strip())

        #Add all paragraphs
        #passages.extend(data['paragraphs'])

        #Only add the first paragraph
        docs.append({
                        'metadata': {
                                        'title': data.get('title'),
                                        'article_id': data.get('id')
                        },
                        'data': ' '.join(data.get('paragraphs')[0:3]) # restrict data to first 3 paragraphs to run later modules faster
        })

print("Passages:", len(docs))

Passages: 169597


In [18]:
# We subset our data so we only use a subset of wikipedia documents to run things faster
docs = [doc for doc in docs for x in ['linguistics', 'india', 'cheetah']
              if x in doc['data'].lower().split()]

In [19]:
len(docs)

1364

In [20]:
from langchain.docstore.document import Document

docs = [Document(page_content=doc['data'],
                 metadata=doc['metadata']) for doc in docs]

In [21]:
len(docs)

1364

In [22]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=300)
chunked_docs = splitter.split_documents(docs)

In [23]:
len(chunked_docs)

1388

In [24]:
# create vector DB of docs and embeddings - takes < 30s on Colab

chroma_db = Chroma.from_documents(documents=chunked_docs,
                                 collection_name='rag_db',
                                  embedding=embeddings,
                                  # need to set the distance function to cosine else it uses euclidean by default
                                  # check https://docs.trychroma.com/guides#changing-the-distance-function
                                  collection_metadata={"hnsw:space": "cosine"},
                                  persist_directory="./chroma_rag_db")

In [25]:
# load from disk
chroma_db = Chroma(persist_directory="./chroma_rag_db",
                   collection_name='rag_db',
                   embedding_function=embeddings)

# Retriever alomg with LLM Chain FIlter & ReRankers

In [26]:
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain.retrievers.document_compressors import LLMChainFilter
from langchain.retrievers import ContextualCompressionRetriever

# Retriever 1 - simple cosine distance based retriever
similarity_retriever = chroma_db.as_retriever(search_type="similarity",
                                              search_kwargs={"k": 5})

#  decides which of the initially retrieved documents to filter out and which ones to return
_filter = LLMChainFilter.from_llm(llm=llm)
# Retriever 2 - retrieves the documents similar to query and then applies the filter
compressor_retriever = ContextualCompressionRetriever(
    base_compressor=_filter, base_retriever=similarity_retriever
)

# download an open-source reranker model - BAAI/bge-reranker-v2-m3
reranker = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-large")
reranker_compressor = CrossEncoderReranker(model=reranker, top_n=3)
# Retriever 3 - Uses a Reranker model to rerank retrieval results from the previous retriever
final_retriever = ContextualCompressionRetriever(
    base_compressor=reranker_compressor, base_retriever=compressor_retriever
)

Lets test the flow

In [108]:
query = "what is the old capital of India?"
docs = final_retriever.invoke(query)
docs

[Document(id='f1e05767-f10b-4b99-9caa-ab6e49b3bcc4', metadata={'article_id': '5117', 'title': 'New Delhi'}, page_content='New Delhi () is the capital of India and a union territory of the megacity of Delhi. It has a very old history and is home to several monuments where the city is expensive to live in. In traditional Indian geography it falls under the North Indian zone. The city has an area of about 42.7\xa0km. New Delhi has a population of about 9.4 Million people.'),
 Document(id='0fca1e4f-89ba-4e19-9109-ec90e2dd6128', metadata={'article_id': '22106', 'title': 'Delhi'}, page_content='Delhi (; "Dillī"; "Dillī"; "Dēhlī"), officially the National Capital Territory of Delhi (NCT), is a territory in India. It includes the country\'s capital New Delhi. It covers an area of . It is bigger than the Faroe Islands but smaller than Guadeloupe. Delhi is a part of the National Capital Region, which has 12.5 million residents. The governance of Delhi is like that of a state in India. It has its

# Build the LangChain along with Retrivers

In [28]:
from langchain_core.prompts import ChatPromptTemplate

prompt = """You are an assistant for question-answering tasks.
            Use the following pieces of retrieved context to answer the question.
            If no context is present or if you don't know the answer, just say that you don't know.
            Do not make up the answer unless it is there in the provided context.
            Give a detailed answer with regard to the question.

            Question:
            {question}

            Context:
            {context}

            Answer:
         """

prompt_template = ChatPromptTemplate.from_template(prompt)

In [54]:
SYS_PROMPT = """ You are an AI assistant designed for high-quality question-answering. Your goal is to provide precise, well-structured, and contextually relevant responses based on the conversation history, context, and the user's query.

## **Instructions for Response Generation:**
1. **Understand Context:**
   - Consider both the provided historical conversation messages and explicit context.
   - Identify relevant information from the history that aligns with the user's latest question.
   - Avoid redundant information if it has already been addressed.

2. **Answer Clearly & Concisely:**
   - Provide direct, accurate, and structured responses.
   - When needed, break down complex answers into steps or bullet points.
   - Use formal yet conversational language to ensure clarity.

3. **Handle Ambiguity:**
   - If the user's question is unclear, request clarification instead of making assumptions.
   - If multiple interpretations exist, briefly list possible meanings and ask for confirmation.

4. **Leverage Contextual Memory:**
   - Use relevant prior exchanges to maintain continuity in the conversation.
   - If a response depends on prior answers, ensure consistency while avoiding contradictions.

5. **Adapt to User Intent & Preferences:**
   - If the user has specific preferences (e.g., detailed explanations vs. concise summaries), align your response accordingly.
   - Detect tone and formality based on prior interactions and adjust your response style.

6. **Accuracy & Verification:**
   - Base your responses on verifiable facts.
   - If uncertain or if external verification is required, acknowledge the need for further confirmation.

7. **Avoid Unnecessary Repetition:**
   - Do not restate previous answers unless specifically requested or required for clarity.
   - Summarize previous points efficiently when relevant.

8. **Multi-Turn Conversations:**
   - Keep track of unresolved queries and follow up if necessary.
   - If a conversation thread requires closure, suggest the next steps.
   """


In [52]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import VectorStoreRetrieverMemory
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

In [78]:
from langchain.prompts import ChatPromptTemplate

# Define an improved ChatPromptTemplate with conditional handling
prompt_template = ChatPromptTemplate.from_template(
    """
    You are an AI assistant that answers user questions based on provided context and conversation history.

    Guidelines:
    1. Use conversation history if relevant to maintain continuity.
    2. If history is empty, rely only on the provided context.
    3. If both history and context are empty, let the user know that you need more information.
    4. Be precise, structured, and avoid repetition.

    {% if history.strip() %}
    Previous Conversation History:
    {{ history }}
    {% endif %}

    {% if context.strip() %}
    Retrieved Context:
    {{ context }}
    {% else %}
    [Note: No additional context was provided.]
    {% endif %}

    User Question:
    {{ question }}

    Provide a clear and informative response.
    """
)


In [140]:
SYS_PROMPT = """Act as a helpful assistant and give brief answers"""
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", SYS_PROMPT),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{query}"),
    ]
)

# load 2 most similar conversation messages from vector db history for each new message \ prompt
# this uses cosine embedding similarity to load the top 2 similar messgages to the input prompt \ query
retriever = chroma_db.as_retriever(search_type="similarity",
                                   search_kwargs={"k": 5})
memory = VectorStoreRetrieverMemory(retriever=retriever, return_messages=True)

# creating our conversation chain now
def get_memory_messages(query):
    return [memory.load_memory_variables(query)['history']]

conversation_chain = (
    RunnablePassthrough.assign(
        history=RunnableLambda(get_memory_messages)
    ) # sends current query (input by user at runtime) and history messages to next step
      |
    prompt # creates prompt using the previous two variables
      |
    llm # generates response using the prompt from previous step
)


In [141]:
query = {'query': 'Tell me about AI'}
response = conversation_chain.invoke(query)
memory.save_context(query, {"output": response.content}) # remember to save your current conversation in memory
print(response.content)

Artificial Intelligence (AI) is a field of computer science that focuses on creating intelligent machines that can perform tasks that typically require human intelligence, such as learning, reasoning, problem-solving, and perception.

**Types of AI:**

1. **Narrow or Weak AI**: Designed to perform a specific task, such as facial recognition or language translation.
2. **General or Strong AI**: A hypothetical AI system that can perform any intellectual task that a human can.
3. **Superintelligence**: An AI system that is significantly more intelligent than the best human minds.

**Applications of AI:**

1. **Virtual assistants**: Siri, Alexa, and Google Assistant.
2. **Image recognition**: Facebook's facial recognition feature.
3. **Self-driving cars**: Companies like Tesla and Waymo are developing autonomous vehicles.
4. **Healthcare**: AI is used to analyze medical images and diagnose diseases.
5. **Customer service**: Chatbots are used to provide customer support.

**Benefits of AI:*

In [105]:
query = {'query': 'What about deep learning'}
response = conversation_chain.invoke(query)
memory.save_context(query, {"output": response.content}) # remember to save your current conversation in memory
print(response.content)

**Deep Learning** is a subfield of Machine Learning that focuses on the use of artificial neural networks with multiple layers to analyze and interpret data. These networks are designed to mimic the structure and function of the human brain, with each layer processing the input data in a more abstract and meaningful way.

**Key Features of Deep Learning:**

1. **Multiple Layers**: Deep learning models consist of multiple layers of artificial neurons, each of which processes the input data in a different way.
2. **Hierarchical Representations**: Each layer in a deep learning model learns to represent the input data in a more abstract and meaningful way, allowing the model to capture complex patterns and relationships.
3. **Non-Linear Transformations**: Deep learning models use non-linear transformations to map the input data to the output, allowing the model to learn complex and non-linear relationships.
4. **Backpropagation**: Deep learning models use an algorithm called backpropagatio

In [106]:
print(memory.load_memory_variables({'query': 'What about machine learning?'})['history'])

Computational Linguistics is a field of linguistics that deals with making computers understand human language. Some of the biggest sub-fields of computational linguistics are: Speech Recognition, which is a computer program that listens to people talk and writes down what they said Speech Synthesis, which is a computer program that takes writing and reads it out loud
Computational Linguistics is a field of linguistics that deals with making computers understand human language. Some of the biggest sub-fields of computational linguistics are: Speech Recognition, which is a computer program that listens to people talk and writes down what they said Speech Synthesis, which is a computer program that takes writing and reads it out loud
query: Tell me about AI
output: Artificial Intelligence (AI) is a field of computer science that focuses on creating intelligent machines that can perform tasks that typically require human intelligence, such as:

1. **Learning**: AI systems can learn from d

In [109]:
from langchain_core.prompts import ChatPromptTemplate

prompt = """You are an assistant for question-answering tasks.
            Use the following pieces of retrieved context to answer the question.
            If no context is present or if you don't know the answer, just say that you don't know.
            Do not make up the answer unless it is there in the provided context.
            Give a detailed answer with regard to the question.


            Question:
            {question}

            Context:
            {context}

            Answer:
         """

prompt_template = ChatPromptTemplate.from_template(prompt)

In [118]:
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

qa_rag_chain = (
    {
        "context": (final_retriever
                      |
                    format_docs),
        "question": RunnablePassthrough()
    }
      |
    prompt_template
      |
    llm
)

from IPython.display import Markdown, display

query = "What is the financial capital of India?"
result = qa_rag_chain.invoke(query)
display(Markdown(result.content))

Based on the provided context, I don't know what the financial capital of India is. The context only mentions New Delhi as the capital of India and a union territory of the megacity of Delhi, but it doesn't provide any information related to the financial capital of India.

# We need to merge Question, Answers , Message history into the same chain


In [187]:
def get_memory_messages(query):
    # Pass query as a dictionary with the expected key ('query')
    return [memory.load_memory_variables({"query": query})['history']]

In [174]:
from langchain_core.prompts import ChatPromptTemplate

prompt = """You are an assistant for question-answering tasks.
            Use the following pieces of retrieved context to answer the question.
            If no context is present or if you don't know the answer, just say that you don't know.
            Do not make up the answer unless it is there in the provided context.
            Give a detailed answer with regard to the question.
            Guidelines:
                1. Use conversation history if relevant to maintain continuity.
                2. If history is empty, rely only on the provided context.
                3. If both history and context are empty, let the user know that you need more information.
                4. Be precise, structured, and avoid repetition.

            Question:
            {question}

            Context:
            {context}

            History:
            {history}

            Answer:
         """

merge_prompt_template = ChatPromptTemplate.from_template(prompt)

In [197]:
merged_chain = (
    {
        "history": RunnableLambda(get_memory_messages),
        "context": (final_retriever | format_docs),
        "question": RunnablePassthrough(), # Extract the question from the input directly

    }
    |
    merge_prompt_template
    |
    llm
)


In [198]:
query = "What is the financial capital of India?"
result = merged_chain.invoke(query) # Invoke with the query
memory.save_context({"query": query}, {"output": result.content}) # remember to save your current conversation in memory
display(Markdown(result.content))

Based on the provided context, I do not have enough information to determine the financial capital of India. However, I can infer that the context does not mention the financial capital of India explicitly.

However, since the question is about the financial capital of India, it might be related to the stock exchange in India. The context does mention the National Stock Exchange of India Limited (NSE), which is based in Mumbai. Mumbai is often considered the financial capital of India due to the presence of the Bombay Stock Exchange (BSE) and the National Stock Exchange of India (NSE), which are two of the largest stock exchanges in India.

To answer your question more accurately, I would say that Mumbai is often considered the financial capital of India. However, please note that this is an inference based on the context provided, and I do not have explicit information about the financial capital of India.

In [202]:
query = "What is the capitol of India?"
result = merged_chain.invoke(query) # Invoke with the query
memory.save_context({"query": query}, {"output": result.content}) # remember to save your current conversation in memory
display(Markdown(result.content))

The capital of India is New Delhi. This information is present in the context provided, specifically in the paragraphs that describe New Delhi as the capital of India and a union territory of the megacity of Delhi.

# Query Rephrase Logic, if ambigous

In [208]:
from langchain.schema import SystemMessage, HumanMessage



def rewrite_query(current_query, previous_query=None, previous_answer=None, domain="General"):
    """Uses LLM model to rewrite a user's query, considering past queries and answers for context resolution."""

    system_prompt = """You are an intelligent AI assistant that refines user queries for a Retrieval-Augmented Generation (RAG) system.
    - Improve clarity, completeness, and domain specificity.
    - If the previous query and its answer provide context, use them to resolve ambiguity.
    - Preserve the original intent while making the query more effective for retrieval.

    Return only the rewritten query, without extra text.
    """

    human_prompt = f"User's new query: {current_query}\n"

    if previous_query:
        human_prompt += f"Previous query: {previous_query}\n"

    if previous_answer:
        human_prompt += f"Previous answer: {previous_answer}\n"

    human_prompt += f"Domain: {domain}\n\nRewrite the query."

    # Call LLM to refine the query
    response = llm([SystemMessage(content=system_prompt), HumanMessage(content=human_prompt)])
    print ("LLM Response after query rewrite:", response.content.strip())
    return response.content.strip()


In [32]:
import re

def is_ambiguous_query_rule_based(query):
    """Returns True if the query is ambiguous based on simple rules."""

    # Short queries with no context
    if len(query.split()) <= 3:
        return True

    # Follow-up words indicating implicit reference  #Use the local language words if there is no translation service
    follow_up_words = ["this", "that", "it", "one", "those", "these", "what about", "can I use it"]
    if any(word in query.lower() for word in follow_up_words):
        return True

    # Generic queries that need expansion #Use the local language words if there is no translation service
    vague_patterns = [
        r"\bhow\b", r"\bwhy\b", r"\bwhat about\b", r"\bdoes it\b", r"\bcan it\b",
        r"\bis it\b", r"\bscaling\b", r"\bperformance\b"
    ]
    if any(re.search(pattern, query.lower()) for pattern in vague_patterns):
        return True

    return False

def is_ambiguous_query_llm(query):
    """Uses LLM to detect if a query needs rewriting."""
    system_prompt = """You are an AI assistant that detects ambiguous queries.
    If a query is vague, implicit, or requires prior context, return 'Yes'.
    Otherwise, return 'No'.
    """

    response = llm([
        SystemMessage(content=system_prompt),
        HumanMessage(content=f"Query: {query}\n\nIs this query ambiguous? (Yes/No)")
    ])

    return response.content.strip().lower() == "yes"


In [209]:
def rewrite_query_if_needed(current_query, previous_query=None, previous_answer=None, domain="General", use_llm_check=False):
    """Decides whether to rewrite the query based on ambiguity detection."""

    # First, use rule-based detection
    if not is_ambiguous_query_rule_based(current_query):
        print ("Rule based says, it's not ambiguous")
        return current_query  # Return original if clear

    # Optional: Use LLM for ambiguity check
    if use_llm_check and not is_ambiguous_query_llm(current_query):
        print ("LLM based says, it's not ambiguous")
        return current_query  # LLM says it's not ambiguous

    # If ambiguous, rewrite the query
    #print ("Calling Rewriting Module...")
    return rewrite_query(current_query, previous_query, previous_answer, domain)

In [34]:
def RAG_pipeline(user_query, conversation_history, use_llm_check=False):
    """Handles full RAG flow: query rewriting → retrieval → response generation."""

    # Retrieve previous query & answer from history
    previous_query = conversation_history[-1]['query'] if conversation_history else None
    previous_answer = conversation_history[-1]['answer'] if conversation_history else None

    # Rewrite the query only if necessary
    rewritten_query = rewrite_query_if_needed(user_query, previous_query, previous_answer, use_llm_check=use_llm_check)

    # Retrieve documents
    result = qa_rag_chain.invoke(rewritten_query)

    # Store query & answer in history
    conversation_history.append({"query": rewritten_query, "answer": result.content})
    return result.content


Lets test the new rephrase logic

In [35]:
conversation_history = []

# Example: Well-formed query (no rewriting needed)
query1 = "What is locator?"
response1 = RAG_pipeline(query1, conversation_history)
print("\n🤖 AI Response 1:\n\n", display(Markdown(response1)))


Calling Rewriting Module...


  response = llm([SystemMessage(content=system_prompt), HumanMessage(content=human_prompt)])


LLM Response after query rewrite: What is the definition of a locator in general terms.


Based on general knowledge, I found that a locator is a term used in various fields, including geography, computer science, and navigation.

In general terms, a locator is an item or device that helps to find or identify a specific location, object, or position. It provides information or a means to determine the exact position or location of something.

In geography, a locator can refer to a geographical feature such as a mountain, river, or landmark that helps to identify a location.

In computer science, a locator can be a software component or algorithm that helps to locate an object, user, or device within a network or system. Examples of locators in computer science include IP addresses, port numbers, and DNS (Domain Name System) servers.

In navigation, a locator can be a device or instrument that helps to determine one's position or location, such as a compass, GPS (Global Positioning System) device, or a map.

In general, a locator serves as a reference point or a means to identify a location or position, making it easier to navigate, communicate, or interact with the world around us.


🤖 AI Response 1:

 None


In [36]:
query1 = "How to automate them?"
response1 = RAG_pipeline(query1, conversation_history)
print("\n🤖 AI Response 1:\n", display(Markdown(response1)))

Calling Rewriting Module...
LLM Response after query rewrite: How can I automate the process of finding or identifying a specific location, object, or position using locators in computer science or navigation?


Based on the provided context, I don't see any information that directly answers your question. However, I can provide a general explanation on how to automate the process of finding or identifying a specific location, object, or position using locators in computer science or navigation.

In computer science, locators are used to identify or find specific elements or objects within a system. This can be achieved through various techniques and tools, including:

1. **Object Detection**: This involves using computer vision and machine learning algorithms to detect and identify specific objects within an image or video. This can be used in applications such as self-driving cars, surveillance systems, and robotics.
2. **Geolocation**: This involves using GPS, Wi-Fi, and other location-based services to determine the location of a device or object. This can be used in applications such as ride-hailing services, navigation systems, and location-based advertising.
3. **HTML/CSS Selectors**: These are used to locate specific elements within an HTML document, such as buttons, forms, and links. This is commonly used in web automation and web scraping applications.
4. **Mouse and Keyboard Events**: These can be used to simulate user interactions with a system, such as clicking on specific buttons or typing in specific fields.

In navigation, locators can be used to identify specific locations, such as:

1. **GPS Coordinates**: These can be used to identify the location of a device or object on a map.
2. **Address Matching**: This involves using algorithms to match a given address with a specific location on a map.
3. **Geocoding**: This involves converting a human-readable address into a set of GPS coordinates.

To automate the process of finding or identifying a specific location, object, or position using locators, you can use various tools and techniques, including:

1. **Programming languages**: Such as Python, Java, and C++, which can be used to write scripts and programs that interact with systems and perform tasks such as object detection and geolocation.
2. **APIs and libraries**: Such as OpenCV, TensorFlow, and Google Maps API, which can be used to access and manipulate data related to object detection, geolocation, and navigation.
3. **Automation frameworks**: Such as Selenium, Appium, and Robot Framework, which can be used to automate interactions with systems and perform tasks such as web scraping and navigation.

I hope this provides a general explanation of how to automate the process of finding or identifying a specific location, object, or position using locators in computer science or navigation.


🤖 AI Response 1:
 None


In [38]:
#New Example
conversation_history = []

# Example: Well-formed query (no rewriting needed)
query1 = "What is the finanacial capital of India?"
response1 = RAG_pipeline(query1, conversation_history)
print("\n🤖 AI Response 1:\n", display(Markdown(response1)))

query2 = "How to go there from Bangalore?"
response2 = RAG_pipeline(query2, conversation_history,use_llm_check=True)
print("\n🤖 AI Response 2:\n", display(Markdown(response2)))

query3 = "distance between both"
response3 = RAG_pipeline(query3, conversation_history,use_llm_check=True)
print("\n🤖 AI Response 3:\n", display(Markdown(response3)))

Calling Rewriting Module...
LLM Response after query rewrite: What is the financial capital of India?


I don't know the financial capital of India based on the provided context. The context does mention the capital of India as New Delhi and provides general information about Delhi, but it does not mention the financial capital.


🤖 AI Response 1:
 None
Calling Rewriting Module...
LLM Response after query rewrite: What is the financial capital of India and how to reach it from Bangalore?


The financial capital of India is Mumbai. 

To reach Mumbai from Bangalore, there are several options:

1. **By Air**: The fastest way to reach Mumbai from Bangalore is by taking a flight. The Bengaluru International Airport (BLR) is the nearest airport to Bangalore, and the Chhatrapati Shivaji Maharaj International Airport (BOM) is the nearest airport to Mumbai. You can take a flight from BLR to BOM, which takes approximately 1.5 hours. From the airport, you can take a taxi or a metro to reach your destination.

2. **By Train**: Another option to reach Mumbai from Bangalore is by taking a train. The Yesvantpur Junction (YPR) is the nearest railway station to Bangalore, and the Mumbai Central (BCT) is the nearest railway station to Mumbai. You can take a train from YPR to BCT, which takes approximately 14-16 hours. You can book your ticket in advance on the Indian Railways website or at the railway station.

3. **By Bus**: You can also reach Mumbai from Bangalore by taking a bus. The BMTC (Bangalore Metropolitan Transport Corporation) operates bus services from Bangalore to Mumbai. The journey takes approximately 24-30 hours, depending on the type of bus and the route taken.

4. **By Car**: If you prefer to drive, you can take a car from Bangalore to Mumbai. The journey takes approximately 14-16 hours, depending on the traffic and the route taken. You can take the National Highway 48 (NH 48) from Bangalore to Mumbai.

It's worth noting that the best option for you will depend on your budget, time constraints, and personal preferences.


🤖 AI Response 2:
 None
Calling Rewriting Module...
LLM Response after query rewrite: What are the distances between Bangalore and Mumbai by air, train, bus, and car?


Unfortunately, the provided context is empty. However, I can provide the typical distances between Bangalore and Mumbai by different modes of transport.

The distance between Bangalore and Mumbai is approximately 765 kilometers (475 miles). 

1. **Air:** The distance by air is not a physical distance but rather the flight duration, which is approximately 1 hour and 20 minutes to 1 hour and 30 minutes.
 
2. **Train:** The distance by train varies depending on the route taken. However, the approximate distance by train is also around 765 kilometers. 

3. **Bus:** The distance by bus is also around 765 kilometers. The exact distance may vary depending on the bus route taken.

4. **Car:** The distance by car is approximately 765 kilometers. The driving duration is around 12-14 hours, depending on traffic conditions and the route taken.

Please note that these distances are approximate and may vary depending on the specific route taken and mode of transport.


🤖 AI Response 3:
 None


In [39]:
conversation_history

[{'query': 'What is the financial capital of India?',
  'answer': "I don't know the financial capital of India based on the provided context. The context does mention the capital of India as New Delhi and provides general information about Delhi, but it does not mention the financial capital."},
 {'query': 'What is the financial capital of India and how to reach it from Bangalore?',
  'answer': "The financial capital of India is Mumbai. \n\nTo reach Mumbai from Bangalore, there are several options:\n\n1. **By Air**: The fastest way to reach Mumbai from Bangalore is by taking a flight. The Bengaluru International Airport (BLR) is the nearest airport to Bangalore, and the Chhatrapati Shivaji Maharaj International Airport (BOM) is the nearest airport to Mumbai. You can take a flight from BLR to BOM, which takes approximately 1.5 hours. From the airport, you can take a taxi or a metro to reach your destination.\n\n2. **By Train**: Another option to reach Mumbai from Bangalore is by taking 

In [40]:
user_query='distance between both'
previous_query='What is the best route or mode of transportation from Bangalore to Mumbai?'
previous_answer="""transportation options from Bangalore to Mumbai.\n\nTypically, the most convenient and efficient way to travel from Bangalore to Mumbai is by flight, which takes approximately 1.5 hours. You can fly from Kempegowda International Airport (BLR) in Bangalore to Chhatrapati Shivaji Maharaj International Airport (BOM) in Mumbai."""
rewritten_query = rewrite_query_if_needed(user_query, previous_query, previous_answer, use_llm_check=True)

Calling Rewriting Module...
LLM Response after query rewrite: What are the distances between Kempegowda International Airport in Bangalore and Chhatrapati Shivaji Maharaj International Airport in Mumbai?


Quick testing only snippets

In [41]:
ret = is_ambiguous_query_rule_based("Hello, I want to know about something")
print (ret)

False


In [42]:
ret2 = is_ambiguous_query_llm("Hello, I want to know about something")
print (ret2)

True


In [50]:
user_query='How to automate them ?'
previous_query='What are locators?'
previous_answer="""A general answer to know about locators."""
rewritten_query = rewrite_query_if_needed(user_query, previous_query, previous_answer, use_llm_check=True)

Calling Rewriting Module...
LLM Response after query rewrite: Automating what specific process or tasks related to locators?


In [51]:
print (rewritten_query)

Automating what specific process or tasks related to locators?


# Add Multi-turn Logic alog with repharse ambigous user queries

In [203]:
def Rephrase_Multiturn_RAG_pipeline(user_query, conversation_history, use_llm_check=False):
    """Handles full RAG flow: query rewriting → retrieval → response generation."""

    # Retrieve previous query & answer from history
    previous_query = conversation_history[-1]['query'] if conversation_history else None
    previous_answer = conversation_history[-1]['answer'] if conversation_history else None

    # Rewrite the query only if necessary
    rewritten_query = rewrite_query_if_needed(user_query, previous_query, previous_answer, use_llm_check=use_llm_check)

    # Retrieve documents
    result = merged_chain.invoke(rewritten_query)
    memory.save_context({"query": query}, {"output": result.content}) # remember to save your current conversation in memory

    # Store query & answer in history
    conversation_history.append({"query": rewritten_query, "answer": result.content})
    return result.content


In [211]:
#New Example
conversation_history = []

# Example: Well-formed query (no rewriting needed)
query1 = "What is the finanacial capital of India?"
response1 = Rephrase_Multiturn_RAG_pipeline(query1, conversation_history)
#print("\n🤖 AI Response 1:\n", display(Markdown(response1)))

query2 = "How to go there from Bangalore?"
response2 = Rephrase_Multiturn_RAG_pipeline(query2, conversation_history,use_llm_check=True)
#print("\n🤖 AI Response 2:\n", display(Markdown(response2)))

query3 = "distance between both"
response3 = Rephrase_Multiturn_RAG_pipeline(query3, conversation_history,use_llm_check=True)
#print("\n🤖 AI Response 3:\n", display(Markdown(response3)))

LLM Response after query rewrite: What is the financial capital of India?
LLM Response after query rewrite: What is the best route to travel from Bangalore to Mumbai?
LLM Response after query rewrite: What is the approximate road distance between Bangalore and Mumbai?


In [212]:
# Example: Well-formed query (no rewriting needed)
#New Example
conversation_history = []

query1 = "What is the finanacial capital of India?"
response1 = Rephrase_Multiturn_RAG_pipeline(query1, conversation_history)
#print("\n🤖 AI Response 1:\n", display(Markdown(response1)))

query2 = "How to go there from Bangalore?"
response2 = Rephrase_Multiturn_RAG_pipeline(query2, conversation_history,use_llm_check=False)
#print("\n🤖 AI Response 2:\n", display(Markdown(response2)))

query3 = "distance between both"
response3 = Rephrase_Multiturn_RAG_pipeline(query3, conversation_history,use_llm_check=False)
#print("\n🤖 AI Response 3:\n", display(Markdown(response3)))

LLM Response after query rewrite: What is the financial capital of India with a population of over 1.3 billion people?
LLM Response after query rewrite: What are the directions from Bangalore to Mumbai?
LLM Response after query rewrite: What is the approximate road distance and flight duration between Bangalore and Mumbai?


In [215]:
#New Example
conversation_history = []

# Example: Well-formed query (no rewriting needed)
query1 = "What is locators"
response1 = Rephrase_Multiturn_RAG_pipeline(query1, conversation_history,use_llm_check=False)
#print("\n🤖 AI Response 1:\n", display(Markdown(response1)))

query2 = "How to automate them"
response2 = Rephrase_Multiturn_RAG_pipeline(query2, conversation_history,use_llm_check=False)
#print("\n🤖 AI Response 2:\n", display(Markdown(response2)))



LLM Response after query rewrite: What do you mean by "locators", specifically in what context (e.g. computing, geography, etc.)?
LLM Response after query rewrite: Given the context from the previous conversation, I'm assuming you're looking to automate UI interactions. Can you provide more information on what you're trying to automate, specifically in terms of software testing or UI automation?


RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.1-8b-instant` in organization `org_01j52rz6n7emgtktsg2hy04h2x` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Used 5814, Requested 675. Please try again in 4.888s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}