In [41]:
# --- LangChain and LLM Imports ---
from langchain_openai import ChatOpenAI 
from langchain_groq import ChatGroq

# --- Document Loading and Vector Store ---
from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings 

# --- Prompting and Document Utilities ---
from langchain.prompts import PromptTemplate
from langchain.docstore.document import Document
from langchain.chains.summarize import load_summarize_chain

# --- Core and Output Parsers ---
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables.graph import MermaidDrawMethod

# --- LangGraph for Workflow Graphs ---
from langgraph.graph import END, StateGraph

# --- Standard Library Imports ---
from time import monotonic
from dotenv import load_dotenv
from pprint import pprint
import os

# --- Datasets and Typing ---
from datasets import Dataset
from typing_extensions import TypedDict
from IPython.display import display, Image
from typing import List, TypedDict
from ragas import evaluate
from ragas.metrics import (
    answer_correctness,
    faithfulness,
    answer_relevancy,
    context_recall,
    answer_similarity
)
from pathlib import Path

import langgraph



# --- Load environment variables (e.g., API keys) ---
load_dotenv(dotenv_path=Path().resolve() / ".env", override=True)


True

In [42]:
# Set the OpenAI API key from environment variable (for use by OpenAI LLMs)
os.environ["GOOGLE_API_KEY"] = os.getenv('GOOGLE_API_KEY')

# Retrieve the Groq API key from environment variable (for use by Groq LLMs)
groq_api_key = os.getenv('GROQ_API_KEY')

In [43]:
# Define the path to the Harry Potter PDF file.
# This variable will be used throughout the notebook for loading and processing the book.
hp_pdf_path =r"C:\Users\NAFEES J\Downloads\Harry Potter - Book 1 - The Sorcerers Stone.pdf"

In [8]:
from helper_functions import split_into_chapters, replace_t_with_space
# --- Split the PDF into chapters and preprocess the text ---

# 1. Split the PDF into chapters using the provided helper function.
#    This function takes the path to the PDF and returns a list of Document objects, each representing a chapter.
chapters = split_into_chapters(hp_pdf_path)

# 2. Clean up the text in each chapter by replacing unwanted characters (e.g., '\t') with spaces.
#    This ensures the text is consistent and easier to process downstream.
chapters = replace_t_with_space(chapters)

# 3. Print the number of chapters extracted to verify the result.
print(len(chapters))

a
17


In [18]:
import importlib
import helper_functions
importlib.reload(helper_functions)

# --- Load and Preprocess the PDF, then Extract Quotes ---

# 1. Load the PDF
loader = PyPDFLoader(hp_pdf_path)
document = loader.load()

# 2. Clean the document (remove tabs)
document_cleaned = helper_functions.replace_t_with_space(document)
print("📄 document_cleaned length:", len(document_cleaned))

# 3. Extract quotes as Documents
book_quotes_list = helper_functions.extract_book_quotes_as_documents(document_cleaned)

print("✅ Quotes Extracted:", len(book_quotes_list))

a
📄 document_cleaned length: 221
abc
Found 2 quotes on one doc
Found 10 quotes on one doc
Found 9 quotes on one doc
Found 13 quotes on one doc
Found 10 quotes on one doc
Found 9 quotes on one doc
Found 7 quotes on one doc
Found 3 quotes on one doc
Found 2 quotes on one doc
Found 8 quotes on one doc
Found 8 quotes on one doc
Found 3 quotes on one doc
Found 1 quotes on one doc
Found 6 quotes on one doc
Found 3 quotes on one doc
Found 5 quotes on one doc
Found 5 quotes on one doc
Found 10 quotes on one doc
Found 2 quotes on one doc
Found 4 quotes on one doc
Found 4 quotes on one doc
Found 4 quotes on one doc
Found 3 quotes on one doc
Found 5 quotes on one doc
Found 5 quotes on one doc
Found 11 quotes on one doc
Found 5 quotes on one doc
Found 7 quotes on one doc
Found 9 quotes on one doc
Found 6 quotes on one doc
Found 4 quotes on one doc
Found 10 quotes on one doc
Found 2 quotes on one doc
Found 5 quotes on one doc
Found 9 quotes on one doc
Found 10 quotes on one doc
Found 3 quotes on on

In [19]:
# --- Summarization Prompt Template for LLM-based Summarization ---

# Define the template string for summarization.
# This template instructs the language model to write an extensive summary of the provided text.
summarization_prompt_template = """Write an extensive summary of the following:

{text}

SUMMARY:"""

# Create a PromptTemplate object using the template string.
# The input variable "text" will be replaced with the content to summarize.
summarization_prompt = PromptTemplate(
    template=summarization_prompt_template,
    input_variables=["text"]
)

In [50]:
from langchain_google_genai import ChatGoogleGenerativeAI
importlib.reload(helper_functions)


# Gemini doesn't have an official tokenizer, so we use a rough heuristic
def num_tokens_from_string(string: str, model_name: str) -> int:
    return len(string) // 4  # Rough estimate: 4 characters per token

def create_chapter_summary(chapter):
    """
    Creates a summary of a chapter using a large language model (LLM).

    Args:
        chapter: A Document object representing the chapter to summarize.

    Returns:
        A Document object containing the summary of the chapter.
    """

    # Extract the text content from the chapter
    chapter_txt = chapter.page_content

    # Specify the LLM model and configuration
    model_name = "gemini-2.5-pro"
    llm = ChatGoogleGenerativeAI(
        model=model_name,
        temperature=0,
        convert_system_message_to_human=True
    )
    gpt_35_turbo_max_tokens = 16000  # Keep your original logic/variable name
    verbose = False  # Set to True for more detailed output

    # Calculate the number of tokens in the chapter text
    num_tokens = num_tokens_from_string(chapter_txt, model_name)

    # Choose the summarization chain type based on token count
    if num_tokens < gpt_35_turbo_max_tokens:
        # For shorter chapters, use the "stuff" chain type
        chain = load_summarize_chain(
            llm,
            chain_type="stuff",
            prompt=summarization_prompt,
            verbose=verbose
        )
    else:
        # For longer chapters, use the "map_reduce" chain type
        chain = load_summarize_chain(
            llm,
            chain_type="map_reduce",
            map_prompt=summarization_prompt,
            combine_prompt=summarization_prompt,
            verbose=verbose
        )

    # Start timer to measure summarizatime
    start_time = monotonic()

    # Create a Document object for the chapter
    doc_chapter = Document(page_content=chapter_txt)

    # Generate the summary using the selected chain
    summary_result = chain.invoke([doc_chapter])

    # Print chain type and execution time for reference
    print(f"Chain type: {chain.__class__.__name__}")
    print(f"Run time: {monotonic() - start_time}")

    # Clean up the summary text (remove double newlines, etc.)
    summary_text = helper_functions.replace_double_lines_with_one_line(summary_result["output_text"])

    # Create a Document object for the summary, preserving chapter metadata
    doc_summary = Document(page_content=summary_text, metadata=chapter.metadata)

    return doc_summary


In [41]:
import pickle
from pathlib import Path

# Where to save the checkpoint
checkpoint_path = Path("chapter_summaries_checkpoint.pkl")

# Load checkpoint if exists
if checkpoint_path.exists():
    with open(checkpoint_path, "rb") as f:
        chapter_summaries = pickle.load(f)
    start_index = len(chapter_summaries)
else:
    chapter_summaries = []
    start_index = 0

# Set how many chapters to process per run
max_chapters_per_run = 10
total_chapters = 17  # or len(chapters), but you specified 1–17
end_index = min(start_index + max_chapters_per_run, total_chapters)

# Process chapters from start_index to end_index - 1
for i in range(start_index, end_index):
    chapter = chapters[i]
    print(f"Processing chapter {i+1} of {total_chapters}")
    
    try:
        summary = create_chapter_summary(chapter)
        chapter_summaries.append(summary)
    except Exception as e:
        print(f"Error processing chapter {i+1}: {e}")
        break  # or continue to skip errors

    # Save checkpoint after each chapter for safety
    with open(checkpoint_path, "wb") as f:
        pickle.dump(chapter_summaries, f)
    print(f"Checkpoint saved at chapter {i+1}")


Processing chapter 14 of 17




Chain type: StuffDocumentsChain
Run time: 38.26162130001467
Checkpoint saved at chapter 14
Processing chapter 15 of 17




Chain type: StuffDocumentsChain
Run time: 42.91590920003364
Checkpoint saved at chapter 15
Processing chapter 16 of 17




Chain type: StuffDocumentsChain
Run time: 46.75584999995772
Checkpoint saved at chapter 16
Processing chapter 17 of 17




Chain type: StuffDocumentsChain
Run time: 45.50914509996073
Checkpoint saved at chapter 17


In [42]:
from langchain_community.embeddings import HuggingFaceEmbeddings

def encode_book(path, chunk_size=1000, chunk_overlap=200):
    """
    Encodes a PDF book into a FAISS vector store using HuggingFace embeddings.

    Args:
        path (str): The path to the PDF file.
        chunk_size (int): The desired size of each text chunk.
        chunk_overlap (int): The amount of overlap between consecutive chunks.

    Returns:
        FAISS: A FAISS vector store containing the encoded book content.
    """

    # 1. Load the PDF document using PyPDFLoader
    loader = PyPDFLoader(path)
    documents = loader.load()

    # 2. Split the document into chunks for embedding
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len
    )
    texts = text_splitter.split_documents(documents)

    # 3. Clean up the text chunks (replace unwanted characters)
    cleaned_texts = replace_t_with_space(texts)

    # 4. Create HuggingFace embeddings and encode the cleaned text chunks into a FAISS vector store
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vectorstore = FAISS.from_documents(cleaned_texts, embeddings)

    # 5. Return the vector store
    return vectorstore


In [43]:
def encode_chapter_summaries(chapter_summaries):
    """
    Encodes a list of chapter summaries into a FAISS vector store using HuggingFace embeddings.

    Args:
        chapter_summaries (list): A list of Document objects representing the chapter summaries.

    Returns:
        FAISS: A FAISS vector store containing the encoded chapter summaries.
    """
    # Create HuggingFace embeddings instance
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    
    # Encode the chapter summaries into a FAISS vector store
    chapter_summaries_vectorstore = FAISS.from_documents(chapter_summaries, embeddings)
    
    # Return the vector store
    return chapter_summaries_vectorstore

In [44]:
def encode_quotes(book_quotes_list):
    """
    Encodes a list of book quotes into a FAISS vector store using HuggingFace embeddings.

    Args:
        book_quotes_list (list): A list of Document objects, each representing a quote from the book.

    Returns:
        FAISS: A FAISS vector store containing the encoded book quotes.
    """
    # Create HuggingFace embeddings instance
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    
    # Encode the book quotes into a FAISS vector store
    quotes_vectorstore = FAISS.from_documents(book_quotes_list, embeddings)
    
    # Return the vector store
    return quotes_vectorstore

In [3]:
from langchain_community.embeddings import HuggingFaceEmbeddings
# --- Create or Load Vector Stores for Book Chunks, Chapter Summaries, and Book Quotes ---

# Check if the vector stores already exist on disk
if (
    os.path.exists("chunks_vector_store") and
    os.path.exists("chapter_summaries_vector_store") and
    os.path.exists("book_quotes_vectorstore")
):
    # If vector stores exist, load them using HuggingFace embeddings
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    chunks_vector_store = FAISS.load_local(
        "chunks_vector_store", embeddings, allow_dangerous_deserialization=True
    )
    chapter_summaries_vector_store = FAISS.load_local(
        "chapter_summaries_vector_store", embeddings, allow_dangerous_deserialization=True
    )
    book_quotes_vectorstore = FAISS.load_local(
        "book_quotes_vectorstore", embeddings, allow_dangerous_deserialization=True
    )
else:
    print("not")
    # If vector stores do not exist, encode and save them

    # 1. Encode the book into a vector store of chunks
    chunks_vector_store = encode_book(hp_pdf_path, chunk_size=1000, chunk_overlap=200)

    # 2. Encode the chapter summaries into a vector store
    chapter_summaries_vector_store = encode_chapter_summaries(chapter_summaries)

    # 3. Encode the book quotes into a vector store
    book_quotes_vectorstore = encode_quotes(book_quotes_list)

    # 4. Save the vector stores to disk for future use
    chunks_vector_store.save_local("chunks_vector_store")
    chapter_summaries_vector_store.save_local("chapter_summaries_vector_store")
    book_quotes_vectorstore.save_local("book_quotes_vectorstore")


  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [4]:
# --- Create Query Retrievers from Vector Stores ---

# The following retrievers are used to fetch relevant documents from the vector stores
# based on a query. The number of results returned can be controlled via the 'k' parameter.

# Retriever for book chunks (returns the top 1 most relevant chunk)
chunks_query_retriever = chunks_vector_store.as_retriever(search_kwargs={"k": 1})

# Retriever for chapter summaries (returns the top 1 most relevant summary)
chapter_summaries_query_retriever = chapter_summaries_vector_store.as_retriever(search_kwargs={"k": 1})

# Retriever for book quotes (returns the top 10 most relevant quotes)
book_quotes_query_retriever = book_quotes_vectorstore.as_retriever(search_kwargs={"k": 10})

In [44]:
import importlib
import helper_functions
importlib.reload(helper_functions)
def retrieve_context_per_question(state):
    """
    Retrieves relevant context for a given question by aggregating content from:
    - Book chunks
    - Chapter summaries
    - Book quotes

    Args:
        state (dict): A dictionary containing the question to answer, with key "question".

    Returns:
        dict: A dictionary with keys:
            - "context": Aggregated context string from all sources.
            - "question": The original question.
    """
    question = state["question"]

    # Retrieve relevant book chunks
    print("Retrieving relevant chunks...")
    docs = chunks_query_retriever.get_relevant_documents(question)
    context = " ".join(doc.page_content for doc in docs)

    # Retrieve relevant chapter summaries
    print("Retrieving relevant chapter summaries...")
    docs_summaries = chapter_summaries_query_retriever.get_relevant_documents(question)
    context_summaries = " ".join(
        f"{doc.page_content} (Chapter {doc.metadata['chapter']})" for doc in docs_summaries
    )

    # Retrieve relevant book quotes
    print("Retrieving relevant book quotes...")
    docs_book_quotes = book_quotes_query_retriever.get_relevant_documents(question)
    book_quotes = " ".join(doc.page_content for doc in docs_book_quotes)
    # Aggregate all contexts and escape problematic characters
    all_contexts = context + context_summaries + book_quotes
    all_contexts =helper_functions.escape_quotes(all_contexts)

    return {"context": all_contexts, "question": question}

In [45]:
from langchain_google_genai import ChatGoogleGenerativeAI
importlib.reload(helper_functions)
# --- Prompt template ---
keep_only_relevant_content_prompt_template = """
You receive a query: {query} and retrieved documents: {retrieved_documents} from a vector store.
You need to filter out all the non relevant information that doesn't supply important information regarding the {query}.
Your goal is just to filter out the non relevant information.
You can remove parts of sentences that are not relevant to the query or remove whole sentences that are not relevant to the query.
DO NOT ADD ANY NEW INFORMATION THAT IS NOT IN THE RETRIEVED DOCUMENTS.
Output the filtered relevant content.
"""

# --- Output schema ---
class KeepRelevantContent(BaseModel):
    relevant_content: str = Field(
        description="The relevant content from the retrieved documents that is relevant to the query."
    )

# --- Prompt + chain ---
keep_only_relevant_content_prompt = PromptTemplate(
    template=keep_only_relevant_content_prompt_template,
    input_variables=["query", "retrieved_documents"],
)

keep_only_relevant_content_llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-pro",
    temperature=0,
    convert_system_message_to_human=True
)

keep_only_relevant_content_chain = (
    keep_only_relevant_content_prompt
    | keep_only_relevant_content_llm.with_structured_output(KeepRelevantContent)
)

# --- Filtering function ---
def keep_only_relevant_content(state):
    """
    Filters and keeps only the relevant content from the retrieved documents that is relevant to the query.
    """
    question = state["question"]
    context = state["context"]

    input_data = {
        "query": question,
        "retrieved_documents": context
    }

    print("Keeping only the relevant content...")
    pprint("--------------------")
    output = keep_only_relevant_content_chain.invoke(input_data)
    relevant_content = output.relevant_content
    relevant_content = "".join(relevant_content)

    # Escape quotes if needed (define escape_quotes function elsewhere if used)
    relevant_content = helper_functions.escape_quotes(relevant_content)

    return {
        "relevant_context": relevant_content,
        "context": context,
        "question": question
    }

In [46]:
# --- LLM-based Function to Rewrite a Question for Better Vectorstore Retrieval ---

class RewriteQuestion(BaseModel):
    """
    Output schema for the rewritten question.
    """
    rewritten_question: str = Field(
        description="The improved question optimized for vectorstore retrieval."
    )
    explanation: str = Field(
        description="The explanation of the rewritten question."
    )

# Create a JSON parser for the output schema
rewrite_question_string_parser = JsonOutputParser(pydantic_object=RewriteQuestion)

# Initialize the LLM for rewriting questions
rewrite_llm = ChatGroq(
    temperature=0,
    model_name="llama3-70b-8192",
    groq_api_key=groq_api_key,
    max_tokens=4000
)

# Define the prompt template for question rewriting
rewrite_prompt_template = """
You are a question re-writer that converts an input question to a better version optimized for vectorstore retrieval.
Analyze the input question {question}  like it maybe a trick way to answer a simple question and try to reason about the underlying semantic intent / meaning.
{format_instructions}
"""
# Create the prompt object
rewrite_prompt = PromptTemplate(
    template=rewrite_prompt_template,
    input_variables=["question"],
    partial_variables={"format_instructions": rewrite_question_string_parser.get_format_instructions()},
)

# Combine prompt, LLM, and parser into a chain
question_rewriter = rewrite_prompt | rewrite_llm | rewrite_question_string_parser

def rewrite_question(state):
    """
    Rewrites the given question using the LLM to optimize it for vectorstore retrieval.

    Args:
        state (dict): A dictionary containing the question to rewrite, with key "question".

    Returns:
        dict: A dictionary with the rewritten question under the key "question".
    """
    question = state["question"]
    print("Rewriting the question...")
    result = question_rewriter.invoke({"question": question})
    new_question = result["rewritten_question"]
    return {"question": new_question}

In [55]:

# --- LLM-based Function to Answer a Question from Context Using Chain-of-Thought Reasoning ---

# Define the output schema for the answer
class QuestionAnswerFromContext(BaseModel):
    answer_based_on_content: str = Field(
        description="Generates an answer to a query based on a given context."
    )


# Initialize the LLM for answering questions with chain-of-thought reasoning
question_answer_from_context_llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-pro",
    temperature=0,
    convert_system_message_to_human=True
)
# Define the prompt template with chain-of-thought examples and instructions
question_answer_cot_prompt_template = """ 
Examples of Chain-of-Thought Reasoning

Example 1

Context: Mary is taller than Jane. Jane is shorter than Tom. Tom is the same height as David.
Question: Who is the tallest person?
Reasoning Chain:
The context tells us Mary is taller than Jane
It also says Jane is shorter than Tom
And Tom is the same height as David
So the order from tallest to shortest is: Mary, Tom/David, Jane
Therefore, Mary must be the tallest person

Example 2
Context: Harry was reading a book about magic spells. One spell allowed the caster to turn a person into an animal for a short time. Another spell could levitate objects.
 A third spell created a bright light at the end of the caster's wand.
Question: Based on the context, if Harry cast these spells, what could he do?
Reasoning Chain:
The context describes three different magic spells
The first spell allows turning a person into an animal temporarily
The second spell can levitate or float objects
The third spell creates a bright light
If Harry cast these spells, he could turn someone into an animal for a while, make objects float, and create a bright light source
So based on the context, if Harry cast these spells he could transform people, levitate things, and illuminate an area
Instructions.

Example 3 
Context: Harry Potter woke up on his birthday to find a present at the end of his bed. He excitedly opened it to reveal a Nimbus 2000 broomstick.
Question: Why did Harry receive a broomstick for his birthday?
Reasoning Chain:
The context states that Harry Potter woke up on his birthday and received a present - a Nimbus 2000 broomstick.
However, the context does not provide any information about why he received that specific present or who gave it to him.
There are no details about Harry's interests, hobbies, or the person who gifted him the broomstick.
Without any additional context about Harry's background or the gift-giver's motivations, there is no way to determine the reason he received a broomstick as a birthday present.

For the question below, provide your answer by first showing your step-by-step reasoning process, breaking down the problem into a chain of thought before arriving at the final answer,
 just like in the previous examples.
Context
{context}
Question
{question}
"""

# Create the prompt object
question_answer_from_context_cot_prompt = PromptTemplate(
    template=question_answer_cot_prompt_template,
    input_variables=["context", "question"],
)

# Combine the prompt and LLM into a chain with structured output
question_answer_from_context_cot_chain = (
    question_answer_from_context_cot_prompt
    | question_answer_from_context_llm.with_structured_output(QuestionAnswerFromContext)
)

def answer_question_from_context(state):
    """
    Answers a question from a given context using chain-of-thought reasoning.

    Args:
        state (dict): A dictionary containing:
            - "question": The query question.
            - "context" or "aggregated_context": The context to answer the question from.

    Returns:
        dict: A dictionary containing:
            - "answer": The answer to the question from the context.
            - "context": The context used.
            - "question": The original question.
    """
    # Use 'aggregated_context' if available, otherwise fall back to 'context'
    question = state["question"]
    context = state["aggregated_context"] if "aggregated_context" in state else state["context"]

    input_data = {
        "question": question,
        "context": context
    }

    print("Answering the question from the retrieved context...")

    # Invoke the LLM chain to get the answer
    output = question_answer_from_context_cot_chain.invoke(input_data)
    answer = output.answer_based_on_content
    print(f'answer before checking hallucination: {answer}')

    return {
        "answer": answer,
        "context": context,
        "question": question
    }

LLM based function to determine if retrieved content is relevant to the question

In [48]:
# --- LLM-based Function to Determine Relevance of Retrieved Content ---

# Prompt template for checking if the retrieved context is relevant to the query
is_relevant_content_prompt_template = """
You receive a query: {query} and a context: {context} retrieved from a vector store. 
You need to determine if the document is relevant to the query. 
{format_instructions}
"""

# Output schema for the relevance check
class Relevance(BaseModel):
    is_relevant: bool = Field(description="Whether the document is relevant to the query.")
    explanation: str = Field(description="An explanation of why the document is relevant or not.")

# JSON parser for the output schema
is_relevant_json_parser = JsonOutputParser(pydantic_object=Relevance)

# Initialize the LLM for relevance checking
is_relevant_llm = ChatGroq(
    temperature=0,
    model_name="llama3-70b-8192",
    groq_api_key=groq_api_key,
    max_tokens=4000
)

# Create the prompt object for the LLM
is_relevant_content_prompt = PromptTemplate(
    template=is_relevant_content_prompt_template,
    input_variables=["query", "context"],
    partial_variables={"format_instructions": is_relevant_json_parser.get_format_instructions()},
)

# Combine prompt, LLM, and parser into a chain
is_relevant_content_chain = is_relevant_content_prompt | is_relevant_llm | is_relevant_json_parser
def is_relevant_content(state):
    """
    Determines if the retrieved context is relevant to the query.

    Args:
        state (dict): A dictionary containing:
            - "question": The query question.
            - "context": The retrieved context to check for relevance.

    Returns:
        str: "relevant" if the context is relevant, "not relevant" otherwise.
    """
    question = state["question"]
    context = state["context"]

    input_data = {
        "query": question,
        "context": context
    }

    # Invoke the LLM chain to determine if the document is relevant
    output = is_relevant_content_chain.invoke(input_data)
    print("Determining if the document is relevant...")
    if output["is_relevant"]:
        print("The document is relevant.")
        return "relevant"
    else:
        print("The document is not relevant.")
        return "not relevant"

CHAIN to CHeck Grounded on FActs or Hallucination

In [63]:
'''--- LLM Chain to Check if an Answer is Grounded in the Provided Context ---'''

# Define the output schema for the grounding check
class IsGroundedOnFacts(BaseModel):
    """
    Output schema for checking if the answer is grounded in the provided context.
    """
    grounded_on_facts: bool = Field(description="Answer is grounded in the facts, 'yes' or 'no'")

# Initialize the LLM for fact-checking (using GPT-4o)
is_grounded_on_facts_llm =ChatGroq(
    temperature=0,
    model_name="llama3-70b-8192",
    groq_api_key=groq_api_key,
    max_tokens=4000
)

# Define the prompt template for fact-checking
is_grounded_on_facts_prompt_template = """
You are a fact-checker that determines if the given answer {answer} is grounded in the given context {context}
You don't mind if it doesn't make sense, as long as it is grounded in the context.
Output a JSON with the field 'grounded_on_facts' set to True or False, and apart from the JSON format don't output any additional text.
"""


# Create the prompt object
is_grounded_on_facts_prompt = PromptTemplate(
    template=is_grounded_on_facts_prompt_template,
    input_variables=["context", "answer"],
)

# Create the LLM chain for fact-checking
is_grounded_on_facts_chain = (
    is_grounded_on_facts_prompt
    | is_grounded_on_facts_llm.with_structured_output(IsGroundedOnFacts)
)

CAN the Question be Fully ANswered or not 

In [50]:
# --- LLM Chain to Determine if a Question Can Be Fully Answered from Context ---

# Define the prompt template for the LLM
can_be_answered_prompt_template = """
You receive a query: {question} and a context: {context}. 
You need to determine if the question can be fully answered based on the context.
{format_instructions}
"""

# Define the output schema for the LLM's response
class QuestionAnswer(BaseModel):
    can_be_answered: bool = Field(
        description="binary result of whether the question can be fully answered or not"
    )
    explanation: str = Field(
        description="An explanation of why the question can be fully answered or not."
    )

# Create a JSON parser for the output schema
can_be_answered_json_parser = JsonOutputParser(pydantic_object=QuestionAnswer)

# Create the prompt object for the LLM
answer_question_prompt = PromptTemplate(
    template=can_be_answered_prompt_template,
    input_variables=["question", "context"],
    partial_variables={"format_instructions": can_be_answered_json_parser.get_format_instructions()},
)

# Initialize the LLM (Groq Llama3) for this task
can_be_answered_llm = ChatGroq(
    temperature=0,
    model_name="llama3-70b-8192",
    groq_api_key=groq_api_key,
    max_tokens=4000
)

# Compose the chain: prompt -> LLM -> output parser
can_be_answered_chain = answer_question_prompt | can_be_answered_llm | can_be_answered_json_parser

Functions to call both

In [64]:
def grade_generation_v_documents_and_question(state):
    """
    Grades the generated answer to a question based on:
    - Whether the answer is grounded in the provided context (fact-checking)
    - Whether the question can be fully answered from the context

    Args:
        state (dict): A dictionary containing:
            - "context": The context used to answer the question
            - "question": The original question
            - "answer": The generated answer

    Returns:
        str: One of "hallucination", "useful", or "not_useful"
    """

    # Extract relevant fields from state
    context = state["context"]
    answer = state["answer"]
    question = state["question"]

    # 1. Check if the answer is grounded in the provided context (fact-checking)
    print("Checking if the answer is grounded in the facts...")
    result = is_grounded_on_facts_chain.invoke({"context": context, "answer": answer})
    grounded_on_facts = result.grounded_on_facts

    if not grounded_on_facts:
        # If not grounded, label as hallucination
        print("The answer is hallucination.")
        return "hallucination"
    else:
        print("The answer is grounded in the facts.")

        # 2. Check if the question can be fully answered from the context
        input_data = {
            "question": question,
            "context": context
        }
        print("Determining if the question is fully answered...")
        output = can_be_answered_chain.invoke(input_data)
        can_be_answered = output["can_be_answered"]

        if can_be_answered:
            print("The question can be fully answered.")
            return "useful"
        else:
            print("The question cannot be fully answered.")
            return "not_useful"

GRAPH till Now 

In [65]:

# LangGraph Workflow Construction
class QualitativeRetievalAnswerGraphState(TypedDict):
    question: str
    context: str
    answer: str

qualitative_retrieval_answer_workflow = StateGraph(QualitativeRetievalAnswerGraphState)

qualitative_retrieval_answer_workflow.add_node("retrieve_context_per_question", retrieve_context_per_question)
qualitative_retrieval_answer_workflow.add_node("keep_only_relevant_content", keep_only_relevant_content)
qualitative_retrieval_answer_workflow.add_node("rewrite_question", rewrite_question)
qualitative_retrieval_answer_workflow.add_node("answer_question_from_context", answer_question_from_context)

qualitative_retrieval_answer_workflow.set_entry_point("retrieve_context_per_question")
qualitative_retrieval_answer_workflow.add_edge("retrieve_context_per_question", "keep_only_relevant_content")
qualitative_retrieval_answer_workflow.add_conditional_edges(
    "keep_only_relevant_content",
    is_relevant_content,
    {
        "relevant": "answer_question_from_context",
        "not relevant": "rewrite_question"
    },
)
qualitative_retrieval_answer_workflow.add_edge("rewrite_question", "retrieve_context_per_question")
qualitative_retrieval_answer_workflow.add_conditional_edges(
    "answer_question_from_context",
    grade_generation_v_documents_and_question,
    {
        "hallucination": "answer_question_from_context",
        "not_useful": "rewrite_question",
        "useful": END
    },
)

qualitative_retrieval_answer_retrival_app = qualitative_retrieval_answer_workflow.compile()

In [66]:
def generate_answer(state):
    return qualitative_retrieval_answer_retrival_app.invoke({"question": state["question"]})


In [67]:
import gradio as gr

# This is your RAG function using Gemini
def rag_interface(question):
    state = {"question": question}

    # Step 1: Generate answer
    output = generate_answer(state)

    # Step 2: Grade the answer (grounded, useful, hallucination, etc.)
    grade = grade_generation_v_documents_and_question(output)

    # Step 3: Format final response
    answer = output["answer"]

    if grade == "useful":
        status = "✅ Answer is grounded and useful."
    elif grade == "not_useful":
        status = "⚠️ Answer is grounded but doesn't fully answer the question."
    else:  # "hallucination"
        status = "🚫 Warning: The answer may not be grounded in the provided context."

    return f"{answer}\n\n---\n{status}"

# Launch a Gradio interface
demo = gr.Interface(
    fn=rag_interface,
    inputs=gr.Textbox(lines=2, placeholder="Ask a question about the book..."),
    outputs="text",
    title="📘 Book Q&A with Gemini",
    description="Ask any question about the book. Answers are generated using Gemini and retrieved from book chunks, chapter summaries, and quotes."
)

demo.launch()


* Running on local URL:  http://127.0.0.1:7874
* To create a public link, set `share=True` in `launch()`.




Retrieving relevant chunks...
Retrieving relevant chapter summaries...
Retrieving relevant book quotes...
Keeping only the relevant content...
'--------------------'




Determining if the document is relevant...
The document is relevant.
Answering the question from the retrieved context...




answer before checking hallucination: Reasoning Chain:
The user is asking a question about a specific event in the provided text.
The text includes a direct quote from Dumbledore to Professor McGonagall: “Would you care for a lemon drop?”.
Professor McGonagall declines the offer.
However, the provided context does not mention the location 'Privet Drive'.
Therefore, based on the text, we can confirm the offer of the lemon drop but cannot confirm the location.

Answer: Yes, based on the text, Professor Dumbledore offers Professor McGonagall a lemon drop. He says, “Would you care for a lemon drop?” However, the provided context does not specify that this exchange takes place on Privet Drive.
Checking if the answer is grounded in the facts...
The answer is grounded in the facts.
Determining if the question is fully answered...
The question can be fully answered.
Checking if the answer is grounded in the facts...
The answer is grounded in the facts.
Determining if the question is fully answ



Determining if the document is relevant...
The document is relevant.
Answering the question from the retrieved context...




answer before checking hallucination: Reasoning Chain:
The user is asking about the smell of Mrs. Figg's house.
The context explains that on Dudley's birthday, Harry was usually left with Mrs. Figg.
The text describes Harry's feelings about being there and mentions a specific smell.
It explicitly states, "The whole house smelled of cabbage."

Final Answer:
Based on the text provided, Mrs. Figg's house smelled of cabbage.
Checking if the answer is grounded in the facts...
The answer is grounded in the facts.
Determining if the question is fully answered...
The question can be fully answered.
Checking if the answer is grounded in the facts...
The answer is grounded in the facts.
Determining if the question is fully answered...
The question can be fully answered.
