In [1]:
%%capture --no-stderr
%pip install --upgrade --quiet langchain langchain-community langchainhub langchain-chroma beautifulsoup4
!pip install --q langchain_google_genai

In [2]:
import os
os.environ["LANGSMITH_TRACING_V2"] = "true"
os.environ["LANGSMITH_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGSMITH_API_KEY"] = "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"
os.environ["LANGSMITH_PROJECT"] = "rag-based-chatbot"
os.environ["GOOGLE_API_KEY"] = "AIzaSyARcgWkkSy9bgggJhYWcjWK85ZQP9BYAMQ"

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
gemini_embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [5]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model="gemini-1.5-flash-001",convert_system_message_to_human=True)

In [6]:
print(model.invoke("Hi").content)

Hi! What can I do for you today? 


In [7]:
import bs4
from langchain import hub

In [8]:
from langchain.chains import create_retrieval_chain

In [9]:
from langchain.chains.combine_documents import create_stuff_documents_chain

In [10]:
from langchain_chroma import Chroma

In [11]:
from langchain_community.document_loaders import WebBaseLoader



In [12]:
from langchain_core.prompts import ChatPromptTemplate

In [13]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [14]:
from langchain_core.prompts import MessagesPlaceholder

In [15]:
loader = WebBaseLoader(web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),bs_kwargs=dict(parse_only=bs4.SoupStrainer(class_=("post-content", "post-title", "post-header"))),)

In [16]:
doc = loader.load()

In [17]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(doc)

In [18]:
vectorstore = Chroma.from_documents(documents=splits, embedding=gemini_embeddings)
retriever = vectorstore.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x7ef9d4112e50>, search_kwargs={})

In [19]:
system_prompt = (
    "You are an assistant designed to answer questions based on the provided context. "
    "Rely only on the given information to respond. "
    "If the answer isn't present in the context, simply say you don't know. "
    "Keep your response brief and to the point, using no more than three sentences."
    "\n\n"
    "{context}"
)

In [20]:
chat_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [21]:
question_answering_chain=create_stuff_documents_chain(model, chat_prompt)

In [22]:
rag_chain = create_retrieval_chain(retriever, question_answering_chain)

In [23]:
response = rag_chain.invoke({"input":"what is FAISS?"})

In [24]:
response["answer"]

'FAISS is a library for efficient similarity search and clustering of high-dimensional data. It applies vector quantization to partition the vector space into clusters, then refines quantization within each cluster. FAISS performs search by first looking for cluster candidates with coarse quantization and then further refining the search within each cluster. '

In [25]:
import time
import statistics

def measure_baseline(query, runs=10):
    times = []
    for i in range(runs):
        start_time = time.time()
        # Invoke the chain with an empty chat_history list as required by the template.
        _ = rag_chain.invoke({"input": query, "chat_history": []})
        end_time = time.time()
        elapsed_time = end_time - start_time
        print(f"Run {i+1}: {elapsed_time:.3f} seconds")
        times.append(elapsed_time)
    # Calculate and return the average time over all runs
    return statistics.mean(times)

# Specify the query for baseline measurement.
baseline_query = "what is FAISS?"
baseline_time = measure_baseline(baseline_query, runs=10)
print("\nBaseline time for query '{}': {:.3f} seconds".format(baseline_query, baseline_time))

Run 1: 1.033 seconds
Run 2: 1.008 seconds
Run 3: 1.107 seconds
Run 4: 1.079 seconds
Run 5: 0.830 seconds
Run 6: 1.181 seconds
Run 7: 1.024 seconds
Run 8: 1.141 seconds
Run 9: 1.575 seconds
Run 10: 0.993 seconds

Baseline time for query 'what is FAISS?': 1.097 seconds


In [26]:
from langchain.chains import create_history_aware_retriever

In [27]:
retriever_prompt = (
    "Given a chat history and the user's latest question, which may refer to previous messages, "
    "rephrase the question into a standalone version that makes sense without the prior context. "
    "Do not answer the question—only reformulate it if necessary, or return it unchanged if it's already clear."
)

In [28]:
contextualize_q_prompt  = ChatPromptTemplate.from_messages(
    [
        ("system", retriever_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}"),


     ]
)

In [29]:
history_aware_retriever = create_history_aware_retriever(model,retriever,contextualize_q_prompt)

In [30]:
from langchain.chains import create_retrieval_chain

In [31]:
from langchain.chains.combine_documents import create_stuff_documents_chain

In [32]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [33]:
question_answer_chain = create_stuff_documents_chain(model, qa_prompt)

In [34]:
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [35]:
from langchain_core.messages import HumanMessage, AIMessage

In [36]:
chat_history = []

In [37]:
question1 = "what is Task Decomposition?"

In [38]:
message1= rag_chain.invoke({"input": question1, "chat_history": chat_history})

In [39]:
message1["answer"]

"Task decomposition is a technique for breaking down complex tasks into smaller, simpler steps. It can be done by an LLM using simple prompting, task-specific instructions, or human input. The goal is to make tasks more manageable and provide insights into the model's thinking process. "

In [40]:
chat_history.extend(
    [
        HumanMessage(content=question1),
        AIMessage(content=message1["answer"]),
    ]
)

In [41]:
chat_history

[HumanMessage(content='what is Task Decomposition?', additional_kwargs={}, response_metadata={}),
 AIMessage(content="Task decomposition is a technique for breaking down complex tasks into smaller, simpler steps. It can be done by an LLM using simple prompting, task-specific instructions, or human input. The goal is to make tasks more manageable and provide insights into the model's thinking process. ", additional_kwargs={}, response_metadata={})]

In [42]:
second_question = "What are common ways of doing it?"
message2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})

print(message2["answer"])

The provided text describes three common ways of doing task decomposition:

1. **LLM with simple prompting:**  Using prompts like "Steps for XYZ.\n1." or "What are the subgoals for achieving XYZ?"
2. **Task-specific instructions:** Providing instructions tailored to the task, such as "Write a story outline." for writing a novel.
3. **Human inputs:** Directly providing the task decomposition steps to the system. 


In [43]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

In [44]:
store = {}

In [45]:
def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

In [46]:
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [48]:
conversational_rag_chain.invoke(
    {"input": "What is Task Decomposition?"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

'Task decomposition is the process of breaking down a complex task into smaller, more manageable steps. This can be done using chain of thought (CoT) prompting, which instructs the model to "think step by step" to utilize more test-time computation.  Tree of Thoughts (ToT) extends CoT by exploring multiple reasoning possibilities at each step, creating a tree structure. '

In [49]:
store

{'abc123': InMemoryChatMessageHistory(messages=[HumanMessage(content='What is Task Decomposition?', additional_kwargs={}, response_metadata={}), AIMessage(content='Task decomposition is the process of breaking down a complex task into smaller, more manageable steps. This can be done using chain of thought (CoT) prompting, which instructs the model to "think step by step" to utilize more test-time computation.  Tree of Thoughts (ToT) extends CoT by exploring multiple reasoning possibilities at each step, creating a tree structure. ', additional_kwargs={}, response_metadata={}), HumanMessage(content='What is Task Decomposition?', additional_kwargs={}, response_metadata={}), AIMessage(content='Task decomposition is the process of breaking down a complex task into smaller, more manageable steps. This can be done using chain of thought (CoT) prompting, which instructs the model to "think step by step" to utilize more test-time computation.  Tree of Thoughts (ToT) extends CoT by exploring mu

In [50]:
conversational_rag_chain.invoke(
    {"input": "What are common ways of doing it?"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

'The provided text mentions three common ways of doing task decomposition:\n\n1. **LLM with simple prompting:**  Using prompts like "Steps for XYZ.\\n1." or "What are the subgoals for achieving XYZ?" to guide the LLM.\n2. **Task-specific instructions:**  Providing instructions tailored to the task, such as "Write a story outline" for writing a novel.\n3. **Human inputs:**  Involving human input to decompose the task. '

In [51]:
for message in store["abc123"].messages:
    if isinstance(message, AIMessage):
        prefix = "AI"
    else:
        prefix = "User"

    print(f"{prefix}: {message.content}\n")

User: What is Task Decomposition?

AI: Task decomposition is the process of breaking down a complex task into smaller, more manageable steps. This can be done using chain of thought (CoT) prompting, which instructs the model to "think step by step" to utilize more test-time computation.  Tree of Thoughts (ToT) extends CoT by exploring multiple reasoning possibilities at each step, creating a tree structure. 

User: What is Task Decomposition?

AI: Task decomposition is the process of breaking down a complex task into smaller, more manageable steps. This can be done using chain of thought (CoT) prompting, which instructs the model to "think step by step" to utilize more test-time computation.  Tree of Thoughts (ToT) extends CoT by exploring multiple reasoning possibilities at each step, creating a tree structure. 

User: What are common ways of doing it?

AI: The provided text mentions three common ways of doing task decomposition:

1. **LLM with simple prompting:**  Using prompts like 

In [52]:
conversational_rag_chain.invoke(
    {"input": "What is a prompt technique like step xyz?"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

'The prompt "Steps for XYZ.\\n1." is an example of a simple prompting technique used for task decomposition. It instructs the LLM to generate a list of steps for completing the task represented by "XYZ."  The "1." at the end indicates that the LLM should start listing the steps from number one. '

In [53]:
store

{'abc123': InMemoryChatMessageHistory(messages=[HumanMessage(content='What is Task Decomposition?', additional_kwargs={}, response_metadata={}), AIMessage(content='Task decomposition is the process of breaking down a complex task into smaller, more manageable steps. This can be done using chain of thought (CoT) prompting, which instructs the model to "think step by step" to utilize more test-time computation.  Tree of Thoughts (ToT) extends CoT by exploring multiple reasoning possibilities at each step, creating a tree structure. ', additional_kwargs={}, response_metadata={}), HumanMessage(content='What is Task Decomposition?', additional_kwargs={}, response_metadata={}), AIMessage(content='Task decomposition is the process of breaking down a complex task into smaller, more manageable steps. This can be done using chain of thought (CoT) prompting, which instructs the model to "think step by step" to utilize more test-time computation.  Tree of Thoughts (ToT) extends CoT by exploring mu

In [54]:
for message in store["abc123"].messages:
    if isinstance(message, AIMessage):
        prefix = "AI"
    else:
        prefix = "User"

    print(f"{prefix}: {message.content}\n")

User: What is Task Decomposition?

AI: Task decomposition is the process of breaking down a complex task into smaller, more manageable steps. This can be done using chain of thought (CoT) prompting, which instructs the model to "think step by step" to utilize more test-time computation.  Tree of Thoughts (ToT) extends CoT by exploring multiple reasoning possibilities at each step, creating a tree structure. 

User: What is Task Decomposition?

AI: Task decomposition is the process of breaking down a complex task into smaller, more manageable steps. This can be done using chain of thought (CoT) prompting, which instructs the model to "think step by step" to utilize more test-time computation.  Tree of Thoughts (ToT) extends CoT by exploring multiple reasoning possibilities at each step, creating a tree structure. 

User: What are common ways of doing it?

AI: The provided text mentions three common ways of doing task decomposition:

1. **LLM with simple prompting:**  Using prompts like 

Response Accuracy Improvement

In [56]:
import time

# --- Current version measurement ---
start_time = time.time()
# Supply an empty chat_history as a list rather than a string.
response = rag_chain.invoke({"input": "what is FAISS?", "chat_history": []})
end_time = time.time()
processing_time = end_time - start_time

print("Current processing time for 'what is FAISS?': {:.3f} seconds".format(processing_time))

# Assuming a recorded baseline from an earlier version
# baseline_time = 2.0  # seconds, replace with your actual baseline value
time_reduction = ((baseline_time - processing_time) / baseline_time) * 100
print("Processing time reduction: {:.2f}%".format(time_reduction))

Current processing time for 'what is FAISS?': 0.853 seconds
Processing time reduction: 22.26%


In [57]:
import time
from statistics import mean
from langchain_core.messages import HumanMessage, AIMessage

# Define a set of test queries.
test_queries = [
    "what is FAISS?",
    "what is Task Decomposition?",
    "What are common ways of doing task decomposition?",
    "What is a prompt technique like step xyz?",
]

# Updated evaluation function:
def evaluate_chain(chain, queries, chat_history=None, session_id=None):
    response_times = []  # in seconds
    answer_lengths = []  # number of words in the answer
    successes = 0
    total = len(queries)

    # If no chat_history is provided, use an empty list.
    if chat_history is None:
        chat_history = []

    for query in queries:
        start_time = time.time()
        # Prepare the payload.
        # Some chain prompt templates expect "chat_history" even for basic chains.
        payload = {"input": query, "chat_history": chat_history}

        # Use session-based configuration if a session_id is provided.
        if session_id:
            response = chain.invoke(payload, config={"configurable": {"session_id": session_id}})
        else:
            response = chain.invoke(payload)

        elapsed = time.time() - start_time
        response_times.append(elapsed)

        # Extract answer text and compute its word count.
        answer_text = response.get("answer", "").strip()
        answer_word_count = len(answer_text.split())
        answer_lengths.append(answer_word_count)

        # Consider an answer successful if it contains text and isn't a default "don't know" answer.
        if answer_text and "don't know" not in answer_text.lower():
            successes += 1

        # For conversational chains, update the history.
        if session_id:
            chat_history.extend([HumanMessage(content=query), AIMessage(content=answer_text)])

        # Log details for each query.
        print(f"Query: '{query}'\n-> Response: '{answer_text}'\n-> Time: {elapsed:.2f} sec, Words: {answer_word_count}\n")

    # Aggregate statistics.
    stats = {
        "total_queries": total,
        "avg_response_time_sec": mean(response_times) if response_times else 0,
        "min_response_time_sec": min(response_times) if response_times else 0,
        "max_response_time_sec": max(response_times) if response_times else 0,
        "avg_word_count": mean(answer_lengths) if answer_lengths else 0,
        "success_rate_percent": (successes / total) * 100,
    }
    return stats

# --- Example Usage ---

print("Evaluating Basic RAG Chain:")
basic_stats = evaluate_chain(rag_chain, test_queries)
print("Basic Chain Metrics:", basic_stats)

print("\nEvaluating Conversational RAG Chain with session_id 'abc123':")
# Initialize a fresh chat history for a conversational evaluation:
chat_history = []
conversational_stats = evaluate_chain(conversational_rag_chain, test_queries, chat_history, session_id="abc123")
print("Conversational Chain Metrics:", conversational_stats)


Evaluating Basic RAG Chain:
Query: 'what is FAISS?'
-> Response: 'FAISS is a library for efficient similarity search and clustering of high-dimensional data. It uses vector quantization to partition the vector space into clusters and then refines the quantization within clusters.  FAISS first looks for cluster candidates with coarse quantization and then further looks into each cluster with finer quantization.'
-> Time: 0.87 sec, Words: 50

Query: 'what is Task Decomposition?'
-> Response: 'Task decomposition is the process of breaking down a complex task into smaller, more manageable steps. It can be done by an LLM using prompting techniques like "Steps for XYZ.\n1.", by using task-specific instructions, or with human input.'
-> Time: 0.83 sec, Words: 38

Query: 'What are common ways of doing task decomposition?'
-> Response: 'Task decomposition can be done in three ways: (1) by using simple prompts, (2) by using task-specific instructions, or (3) with human inputs.'
-> Time: 0.80 sec