In [1]:
!pip install torch
!pip install transformers
!pip install accelerate
!pip install langchain
!pip install -U langchain langchain-community
!pip install langgraph
!pip install chromadb
!pip install faiss-cpu
!pip install pdfplumber
!pip install PyPDF2
!pip install sentence-transformers
!pip install huggingface-hub
!pip install -U langchain-huggingface
!pip install pypdf

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [2]:
from huggingface_hub import login

login(token="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")

In [8]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline

# Load Harry Potter book and split into chunks for retrieval
pdf_path = "/content/Harrypotter.pdf"
loader = PyPDFLoader(pdf_path)
documents = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_documents(documents)

# Set up embeddings and retrieval system
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MPNet-base-v2")
vectorstore = FAISS.from_documents(chunks, embedding_model)
retriever = vectorstore.as_retriever()

# Load language model and tokenizer
model_name = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    low_cpu_mem_usage=True
)
gen_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    do_sample=True,
    temperature=0.75
)
llm = HuggingFacePipeline(pipeline=gen_pipeline)

def generate_character_response(question: str, character: str) -> str:
    # Retrieve relevant context from the book based on character and question
    query = f"{character}: {question}"
    passages = retriever.get_relevant_documents(query)
    context = "\n".join([doc.page_content for doc in passages])
    if not context.strip():
        return "Sorry, I couldn't find enough information in the book to answer that."

    # Manually written traits for each character
    character_traits = {
        "Hermione Granger": "Logical, book-smart, rule-follower",
        "Harry Potter": "Brave, protective, seeks justice",
        "Ron Weasley": "Loyal, nervous, humorous",
        "Albus Dumbledore": "Wise, protective, mysterious",
        "Draco Malfoy": "Smug, competitive, arrogant"
    }
    traits = character_traits.get(character, "No specific traits")

    # Prompt for generating a character response
    prompt = (
        f"You are {character} from Harry Potter.\n"
        f"Traits: {traits}\n"
        f"Stay in character. Do not mention assistants, AI, or anything artificial.\n"
        f"Use these book excerpts to answer:\n{context}\n"
        f"Question: {question}\n"
        "Give a response as the character would."
    )
    response = llm.invoke(prompt)
    return response

def context_coverage_check(response: str, context: str) -> bool:
    # Checks if response sentences are present in the book context
    resp_sentences = response.lower().split(".")
    ctx_lower = context.lower()
    for s in resp_sentences:
        if s.strip() and s.strip() not in ctx_lower:
            return False
    return True

def evaluate_response(response: str, traits: str, question: str, context: str) -> dict:
    relevance = "✅" if question.lower() in response.lower() else "❌"
    authenticity = "✅" if all(trait.lower() in response.lower() for trait in traits.split(", ")) else "❌"
    accuracy = "✅" if context_coverage_check(response, context) else "❌"
    return {
        "Relevance": relevance,
        "Character Authenticity": authenticity,
        "Context Accuracy": accuracy
    }

def print_output(question, character, response, evaluation):
    print("=" * 30)
    print(f"Question: {question}")
    print(f"Character: {character}")
    print(f"Response:\n{response}")
    print("Evaluation:")
    for key, value in evaluation.items():
        print(f"  {key}: {value}")
    print("=" * 30)

def main():
    question = "What’s your plan to win a Quidditch match?"
    character = "Draco Malfoy"
    response = generate_character_response(question, character)
    traits = "Smug, competitive, arrogant"
    # Retrieve context again for evaluation
    passages = retriever.get_relevant_documents(f"{character}: {question}")
    context = "\n".join([doc.page_content for doc in passages])
    evaluation = evaluate_response(response, traits, question, context)
    print_output(question, character, response, evaluation)

if __name__ == "__main__":
    main()



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



    ⚡ **Character Response Report** ⚡

    🔍 **Input Details:**
       - **Question**: What would you say to someone bullying a friend?
       - **Character**: Harry Potter

    🛠️ **Process**:
       1. Extracted and indexed the PDF text.
       2. Retrieved passages mentioning Harry Potter and related context.
       3. Generated the response based on the retrieved context.

    💬 **Generated Response**:
       "Human: 
    You are going to roleplay as Harry Potter from Harry Potter.
    NEVER break character or mention that you are an AI.
    Traits of Harry Potter: Brave, protective, seeks justice

    Use the following excerpts from Harry Potter to inform your response:
    thickset and looked extremely mean. Standing on either side of the pale
boy, they looked like bodyguards.
"Oh, this is Crabbe and this is Goyle," said the pale boy carelessly,
noticing where Harry was looking. "And my name's Malfoy, Draco Malfoy."
Ron gave a slight cough, which might have been hiding a snigget

In [36]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain_core.messages import HumanMessage, AIMessage
from langchain.prompts import ChatPromptTemplate
from typing import List

# Step 1: Load and Split the Document
pdf_path = "/content/Harrypotter.pdf"
loader = PyPDFLoader(pdf_path)
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
document_chunks = text_splitter.split_documents(documents)

# Step 2: Set up the Vector Store for Retrieval
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MPNet-base-v2")
retriever = FAISS.from_documents(document_chunks, embeddings).as_retriever()

# Step 3: Configure the Language Model
model_name = "meta-llama/Llama-3.2-3B-Instruct"  # Replace with a smaller model if necessary

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load model with manual device mapping
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",  # Use 'cuda' if you're running on GPU
    low_cpu_mem_usage=True
)

# Define the pipeline
llm_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    do_sample=True,
    temperature=0.75
)
llm = HuggingFacePipeline(pipeline=llm_pipeline)

# Step 4: Define the Retrieval-Augmented Generation (RAG) Pipeline with Dynamic Evaluation
def generate_character_response_with_guardrail(question: str, character: str) -> str:
    """
    Generate an authentic response from a Harry Potter character based on the provided question.
    Includes a hallucination guardrail to validate responses but ensures meaningful answers when context is valid.
    """
    # Retrieve relevant context
    search_query = f"{character}: {question}"
    results = retriever.get_relevant_documents(search_query)
    context = "\n".join([doc.page_content for doc in results])

    # If no context is retrieved, return a fallback message
    if not context.strip():
        return "I'm sorry, I couldn't find any relevant information to answer your question."

    # Define traits for each character
    character_traits = {
        "Hermione Granger": "Logical, book-smart, rule-follower",
        "Harry Potter": "Brave, protective, seeks justice",
        "Ron Weasley": "Loyal, nervous, humorous",
        "Albus Dumbledore": "Wise, protective, mysterious",
        "Draco Malfoy": "Smug, competitive, arrogant"
    }

    # Get traits for the given character
    traits = character_traits.get(character, "Unknown traits")

    # Prompt template
    prompt_template = ChatPromptTemplate.from_template("""
    You are going to roleplay as {character} from Harry Potter.
    NEVER break character or mention that you are an AI.
    Traits of {character}: {traits}

    Use the following excerpts from Harry Potter to inform your response:
    {context}

    Question: {question}

    Answer the user's question exactly as {character} would, using their distinctive voice,
    vocabulary, and perspective based on the book.
    """)

    # Generate response
    response = llm.invoke(prompt_template.format(
        character=character,
        traits=traits,
        context=context,
        question=question
    ))

    # Hallucination Guardrail: Validate response against retrieved context
    if not validate_response_against_context(response, context):
        return (
            f"Note: The response could not be fully validated against the context.\n"
            f"Retrieved Context:\n{context}\n\nGenerated Response:\n{response}"
        )

    # Return the validated response
    return response

def validate_response_against_context(response: str, context: str) -> bool:
    """
    Validates the generated response against the retrieved context to prevent hallucinations.
    Returns True if the response is grounded in the context, False otherwise.
    """
    # Basic validation: Check if key sentences or phrases from the response exist in the context
    response_sentences = response.lower().split(".")
    context_lower = context.lower()

    for sentence in response_sentences:
        if sentence.strip() and sentence.strip() not in context_lower:
            return False  # Found a sentence that is not grounded in the context
    return True

# Step 5: Evaluate Response Dynamically
def evaluate_response(response: str, traits: str, question: str, context: str) -> dict:
    """
    Dynamically evaluates the response based on relevance, character authenticity, and context accuracy.
    """
    evaluation = {
        "Relevance": "✅" if question.lower() in response.lower() else "❌",
        "Character Authenticity": "✅" if all(trait.lower() in response.lower() for trait in traits.split(", ")) else "❌",
        "Context Accuracy": "✅" if validate_response_against_context(response, context) else "❌"
    }
    return evaluation

# Step 6: Format Output for Better Readability
def format_output(question, character, response, evaluation):
    """
    Formats the output for better readability.
    """
    formatted_output = f"""
    =============================
    ⚡ **RAG OUTPUT** ⚡
    =============================

    🔍 **Input Details:**
       - **Question**: {question}
       - **Character**: {character}

    🛠️ **Process**:
       1. Extracted and indexed the PDF text.
       2. Retrieved passages mentioning {character} and related context.
       3. Generated the response based on the retrieved context.

    💬 **Generated Response**:
       "{response}"

    ✅ **Evaluation**:
       - **Relevance**: {evaluation['Relevance']}
       - **Character Authenticity**: {evaluation['Character Authenticity']}
       - **Context Accuracy**: {evaluation['Context Accuracy']}

    =============================
    """
    return formatted_output

# Step 7: Example Workflow
def example_workflow():
    """
    Demonstrates the RAG pipeline and its evaluation with an example question and character.
    Includes balanced hallucination guardrails.
    """
    question = "What’s your plan to win a Quidditch match?"
    character = "Draco Malfoy"

    # Generate response with guardrail
    response = generate_character_response_with_guardrail(question, character)

    # Define traits for evaluation
    character_traits = "Smug, competitive, arrogant"

    # Retrieve context (for dynamic evaluation)
    search_query = f"{character}: {question}"
    results = retriever.get_relevant_documents(search_query)
    context = "\n".join([doc.page_content for doc in results])

    # Dynamically evaluate the response
    evaluation = evaluate_response(response, character_traits, question, context)

    # Format and print the output
    formatted_output = format_output(question, character, response, evaluation)
    print(formatted_output)

# Run the example workflow
example_workflow()



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



    ⚡ **RAG OUTPUT** ⚡

    🔍 **Input Details:**
       - **Question**: What’s your plan to win a Quidditch match?
       - **Character**: Draco Malfoy

    🛠️ **Process**:
       1. Extracted and indexed the PDF text.
       2. Retrieved passages mentioning Draco Malfoy and related context.
       3. Generated the response based on the retrieved context.

    💬 **Generated Response**:
       "Note: The response could not be fully validated against the context.
Retrieved Context:
"A stone that makes gold and stops you from ever dying!" said Harry. "No
wonder Snape's after it! Anyone would want it."
"And no wonder we couldn't find Flamel in that Study of Recent
Developments in Wizardry," said Ron. "He's not exactly recent if he's
six hundred and sixty-five, is he?"
The next morning in Defense Against the Dark Arts, while copying down
different ways of treating werewolf bites, Harry and Ron were still
discussing what they'd do with a Sorcerer's Stone if they had one. It
wasn't until Ron

**AGENTIC RAG**

In [35]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline

# Load and split the Harry Potter PDF
pdf_path = "/content/Harrypotter.pdf"
loader = PyPDFLoader(pdf_path)
documents = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_documents(documents)

# Set up vector store for retrieval
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MPNet-base-v2")
vectorstore = FAISS.from_documents(chunks, embedding_model)
retriever = vectorstore.as_retriever()

# Load language model and tokenizer
model_name = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    low_cpu_mem_usage=True
)
gen_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    do_sample=True,
    temperature=0.75
)
llm = HuggingFacePipeline(pipeline=gen_pipeline)

def generate_character_response(question: str, character: str) -> str:
    # Retrieve relevant context from the book for the character and question
    query = f"{character}: {question}"
    passages = retriever.get_relevant_documents(query)
    context = "\n".join([doc.page_content for doc in passages])
    if not context.strip():
        return "Sorry, I couldn't find enough information in the book to answer that."

    # Human-written traits for each character
    character_traits = {
        "Hermione Granger": "Logical, book-smart, rule-follower",
        "Harry Potter": "Brave, protective, seeks justice",
        "Ron Weasley": "Loyal, nervous, humorous",
        "Albus Dumbledore": "Wise, protective, mysterious",
        "Draco Malfoy": "Smug, competitive, arrogant"
    }
    traits = character_traits.get(character, "No specific traits")

    # Prompt for generating a character-based reply
    prompt = (
        f"You are {character} from Harry Potter.\n"
        f"Traits: {traits}\n"
        f"Stay in character. Do not mention assistants, AI, or anything artificial.\n"
        f"Use these book excerpts to answer:\n{context}\n"
        f"Question: {question}\n"
        "Give a response as the character would."
    )
    response = llm.invoke(prompt)
    return response

def check_context_coverage(response: str, context: str) -> bool:
    # Checks if sentences from the response are present in the context
    resp_sentences = response.lower().split(".")
    ctx_lower = context.lower()
    for s in resp_sentences:
        if s.strip() and s.strip() not in ctx_lower:
            return False
    return True

def evaluate_response(response: str, traits: str, question: str, context: str) -> dict:
    relevance = "✅" if question.lower() in response.lower() else "❌"
    authenticity = "✅" if all(trait.lower() in response.lower() for trait in traits.split(", ")) else "❌"
    accuracy = "✅" if check_context_coverage(response, context) else "❌"
    return {
        "Relevance": relevance,
        "Character Authenticity": authenticity,
        "Context Accuracy": accuracy
    }

def print_output(question, character, response, evaluation):
    print("=" * 30)
    print(f"Question: {question}")
    print(f"Character: {character}")
    print(f"Response:\n{response}")
    print("Evaluation:")
    for k, v in evaluation.items():
        print(f"  {k}: {v}")
    print("=" * 30)

def main():
    question = "What’s your plan to win a Quidditch match?"
    character = "Draco Malfoy"
    response = generate_character_response(question, character)
    traits = "Smug, competitive, arrogant"
    passages = retriever.get_relevant_documents(f"{character}: {question}")
    context = "\n".join([doc.page_content for doc in passages])
    evaluation = evaluate_response(response, traits, question, context)
    print_output(question, character, response, evaluation)

if __name__ == "__main__":
    main()



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cpu


--- Agentic RAG (OpenChat 3.5) ---


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAssistant is a large language model trained by OpenAI.

Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.

Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide r