### **RAG PIPELINE USING LANGCHAIN**

In [None]:
import os
import glob
import time
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_classic.chains import RetrievalQA
from langchain_core.prompts import PromptTemplate
from langchain_community.llms import HuggingFaceEndpoint
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama import ChatOllama 

### USNG HUGGINGFACE API KEY INORDER TO ACCESS THE EMBEDDING MODEL

In [None]:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = ""#paste key here (i removed mine) 
print(" API Key set manually")

 API Key set manually


### LOADING THE PDF FILE 

In [26]:
file_path = "../data/Bazaar-Return-Refund.pdf"


if os.path.exists(file_path):
    print(f" File found at: {file_path}")
    loader = PyPDFLoader(file_path)
    docs = loader.load()
    
    print(f" Successfully loaded {len(docs)} pages.")
else:
    print(f" File NOT found at: {file_path}")
    print("Current working directory:", os.getcwd())
    print("Files in 'data' folder:", os.listdir("data") if os.path.exists("data") else "Data folder missing")


 File found at: ../data/Bazaar-Return-Refund.pdf
 Successfully loaded 3 pages.


**TEXT IN THE PDF DOCUMENTS ARE DIVIDED INTO CHUNKS (IF I DO TOKENISATION HERE,THE EMBEDDING MODEL BECOMES COMPUTATIONALLY EXPENSIVE AND DIFFICULT FOR THE LLM TO PROCESS THE TEXT AS THE TOKEN LENGTH IS LARGE)**

In [None]:
CHUNK_SIZE = 800
CHUNK_OVERLAP = 150

In [27]:

print("STEP 2: TEXT CHUNKING")
print(f"Chunk Size: {CHUNK_SIZE}, Overlap: {CHUNK_OVERLAP}")

# Create text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=CHUNK_SIZE,
    chunk_overlap=CHUNK_OVERLAP,
    length_function=len,
    separators=["\n\n", "\n", ". ", " ", ""]
)

# Split documents into chunks
if 'docs' in globals() and docs:
    all_chunks = text_splitter.split_documents(docs)
    
    print(f" Created {len(all_chunks)} text chunks")
    if all_chunks:
        avg_len = sum(len(chunk.page_content) for chunk in all_chunks) // len(all_chunks)
        print(f"   Average chunk length: {avg_len} characters")
else:
    print(" Error: 'docs' variable is empty or not defined. Run the PDF loading cell first.")


STEP 2: TEXT CHUNKING
Chunk Size: 800, Overlap: 150
 Created 12 text chunks
   Average chunk length: 648 characters


### LOADING THE EMBEDDING MODEL USING HUGGINGFACE API

In [None]:
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"

### CONFIGURE VECTOR DATABASE SETTINGS

In [None]:
#Here we define where the vector data will be stored locally:
PERSIST_DIRECTORY = "./chroma_db"
COLLECTION_NAME = "company_policies"

### EMBEDDING AND VECTOR STORAGE

In [30]:
# Initialize embedding model 
print("Initializing embedding model...")
embedding_model = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL_NAME,
    model_kwargs={'device': 'cpu'}, 
    encode_kwargs={'normalize_embeddings': True}
)
print(f"successfully Loaded: {EMBEDDING_MODEL_NAME}")

# 2. Create Chroma vector store
print("Creating Chroma vector store...")

vector_store = Chroma.from_documents(
    documents=all_chunks,
    embedding=embedding_model,
    persist_directory=PERSIST_DIRECTORY,
    collection_name=COLLECTION_NAME
)

print(f"Vector store created and saved to: {PERSIST_DIRECTORY}")
print(f"Total embeddings: {len(all_chunks)}")


Initializing embedding model...
successfully Loaded: sentence-transformers/all-MiniLM-L6-v2
Creating Chroma vector store...
Vector store created and saved to: ./chroma_db
Total embeddings: 12


### QUESTION EMBEDDING AND SIMILARITY SEARCH

In [32]:
TOP_K = 3 # which means we will get top 3 chunk embedding that are closest (cosine similarity) to the input embedding in the vector database

# Create retriever for similarity search
retriever = vector_store.as_retriever(
    search_kwargs={"k": TOP_K}
)

# Test retrieval
print("Testing retrieval with sample query...")
test_query = "What is the cancellation charge if I cancel a service 9 days after placing the order?"

test_results = retriever.invoke(test_query)


print(f"   Retrieved {len(test_results)} relevant chunks for: '{test_query}'")


if test_results:
    print(f"\nTop Result Preview:\n{test_results[0].page_content[:200]}...")

Testing retrieval with sample query...
   Retrieved 3 relevant chunks for: 'What is the cancellation charge if I cancel a service 9 days after placing the order?'

Top Result Preview:
amount refunded to you will be less 9% than the total amount order that you paid on 
the date of placing the order. 
‚Ä¢ If you cancel your order on or after 16 days from the date of placing the order, ...


### PROMPT ENGINEERING INORDER TO GET A REFINED OUTPUT AND AVOID HALLUCINATION

In [33]:
structured_prompt_template = """You are a specific Policy Assistant for Rainbow Bazaar.
Your goal is to answer the user question based ONLY on the provided context chunks.

CONTEXT:
{context}

USER QUESTION:
{question}

---
STRICT RULES:
1. **Focus:** Answer the question directly. Do not comment on the quality or repetition of the context text.
2. **Grounding:** If the answer is found in *any* part of the context, use it. Ignore duplicates.
3. **No Filler:** Do not start with "I apologize" or "The context mentions." Start directly with the answer.
4. **Citation:** Support your answer with source IDs (e.g., ).
5. **Missing Info:** If the answer is strictly NOT in the context, say: "I cannot find this information in the policy."

FORMAT:
**Answer:** [Direct Answer]
**Details:** [Bullet points with citations]
"""

PROMPT = PromptTemplate(
    template=structured_prompt_template,
    input_variables=["context", "question"]
)

print("Prompt updated to reduce LLM 'chatter'.")

Prompt updated to reduce LLM 'chatter'.


### LOADING THE LLM (llama3.2) LOCALLY USING OLLAMA

In [34]:
# i installed ollama and then downloaded the LLM via command prompt to access it
LLM_MODEL_NAME = "llama3.2" 
print(f"Connecting to local Ollama instance...")

try:
    llm = ChatOllama(
        model=LLM_MODEL_NAME,
        temperature=0.1,
        # Llama 3.2 is fast, so we can ask for a good amount of detail
        num_predict=512 
    )
    
    print(f"‚úÖ Local LLM initialized successfully: {LLM_MODEL_NAME}")

except Exception as e:
    print(f"‚ùå Error initializing local LLM: {e}")

# Test immediately
print("\n Testing local connection...")
try:
    test_msg = "Are you ready to answer questions? Reply with 'Yes, ready'."
    response = llm.invoke(test_msg)
    print(f"‚úÖ Response: {response.content}")
except Exception as e:
    print(f"‚ùå Connection Failed: {e}")
    print("Hint: Ensure the 'Ollama' app is running in the background!")

Connecting to local Ollama instance...
‚úÖ Local LLM initialized successfully: llama3.2

 Testing local connection...
‚úÖ Response: Yes, ready.


### CREATE RETRIEVAL QA CHAIN (THIS CHAIN IS USED TO CONNECT RETRIEVAL AND GENERATION)

In [35]:
# Create the complete QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={
        "prompt": PROMPT,
        "verbose": False
    },
    return_source_documents=True,
    input_key="query"
)

print("QA Chain created successfully!")

QA Chain created successfully!


### CREATING QUESTIONS MANUALLY FROM THE PDF AND TESTING THE OUTPUT GENERATED BY THE LLM

In [36]:
def ask_question(question):
    print(f"\n{'='*60}")
    print(f"‚ùì QUESTION: {question}")
    print(f"{'='*60}")
    
    try:
        #Similarity Search
        print("üîÑ Step 1: Searching relevant policy sections...")
        similar_chunks = retriever.invoke(question)
        print(f"   ‚úì Found {len(similar_chunks)} relevant chunks")
        
        #Generation
        print("\nüîÑ Step 2: Llama 3.2 is thinking...")
        result = qa_chain.invoke({"query": question})
        
        # Display Result
        answer = result['result'].strip()
        
        print("\n" + "üìù " + "ANSWER:" + "="*50)
        # Check if answer is empty
        if not answer:
            print("‚ö†Ô∏è [The LLM returned an empty answer. Try increasing 'num_predict' in Step 8]")
        else:
            print(answer)
        
        print("\n" + "üìÑ " + "SOURCES:" + "="*45)
        sources_used = set()
        for doc in result['source_documents']:
            src = doc.metadata.get('source', 'Unknown')
            page = doc.metadata.get('page', '0') 
            sources_used.add(f"{src} (Page {page})")
        
        for source in sources_used:
            print(f"   ‚Ä¢ {source}")
            
        print("="*60)
        
    except Exception as e:
        print(f"‚ùå Error: {e}")

# Sample questions based on company policies
sample_questions = [
    "what happens if i cancel my order on or after 16 days ?",
    "What happens if I cancel my order before it ships",
    "How long do I have to submit a return request for a product?",
    "How long does it take for a refund to appear in my account?",
    "What details must I include in a return request?",
    "What is the cancellation fee for services if I cancel between 7 and 15 days after ordering",
    "Can a person under the age of 18 purchase items on the marketplace?",
    "What email address should I use for general support or inquiries?",
    "Are taxes included in the prices listed on the website?",
    "Which court has jurisdiction over legal disputes related to these policies?"
]

# Test first 3 questions
for i, question in enumerate(sample_questions[:3], 2):
    print(f"\nüìä Test {i}/3")
    ask_question(question)    


üìä Test 2/3

‚ùì QUESTION: what happens if i cancel my order on or after 16 days ?
üîÑ Step 1: Searching relevant policy sections...
   ‚úì Found 3 relevant chunks

üîÑ Step 2: Llama 3.2 is thinking...

**Answer:** A cancellation fee will be charged that is 100% of the total Order value.

**Details:**
‚Ä¢ If you cancel your order on or after 16 days from the date of placing the order, a 
cancellation fee will be charged that is 100% of the total Order value.

   ‚Ä¢ ../data/Bazaar-Return-Refund.pdf (Page 1)

üìä Test 3/3

‚ùì QUESTION: What happens if I cancel my order before it ships
üîÑ Step 1: Searching relevant policy sections...
   ‚úì Found 3 relevant chunks

üîÑ Step 2: Llama 3.2 is thinking...

**Answer:** You will not be charged any cancellation fee.

**Details:** 
‚Ä¢ If you cancel your order BEFORE it has been shipped, you will not be charged any cancellation fee.

   ‚Ä¢ ../data/Bazaar-Return-Refund.pdf (Page 0)

üìä Test 4/3

‚ùì QUESTION: How long do I have to su

### CREATING GOLDEN DATASET FOR EVALUATION (MANUALLY)

In [37]:
eval_dataset = [
    # TYPE 1: FULLY ANSWERABLE (EASY)
    {
        "type": "Answerable",
        "question": "What is the time limit for submitting a return request after delivery?",
        "expected": "10 days"
    },
    {
        "type": "Answerable",
        "question": "Which court has exclusive jurisdiction over disputes?",
        "expected": "Courts at Delhi"
    },
    {
        "type": "Answerable",
        "question": "Are taxes like GST included in the listed prices?",
        "expected": "Yes, prices are inclusive of VAT/CST, service tax, GST, duties, and cesses"
    },

    # TYPE 2:CONDITIONAL / TRICKY (Medium) 
    {
        "type": "Conditional",
        "question": "I want to cancel a service order 9 days after placing it. What is the fee?",
        "expected": "9% of the total amount (Fee percentage equals number of days for cancellations between 7-15 days)"
    },
    {
        "type": "Conditional",
        "question": "What happens if I cancel a product order after it has already been shipped?",
        "expected": "It is treated as a Return with all applicable fees"
    },
    {
        "type": "Conditional",
        "question": "I refused delivery of an order. How much is the cancellation fee?",
        "expected": "100% of the total Order value (unless evidenced that product was tampered/faulty)"
    },

    # TYPE 3: UNANSWERABLE / OUT OF SCOPE (Hard) 
    {
        "type": "Unanswerable",
        "question": "What payment methods do you accept (e.g., Credit Card, PayPal)?",
        "expected": "I cannot find this information (Policy mentions taxes/fees but not specific payment methods)"
    },
    {
        "type": "Unanswerable",
        "question": "What is the customer support phone number?",
        "expected": "I cannot find this information (Only the email rb@thepridecircle.com is provided)"
    },
    {
        "type": "Unanswerable",
        "question": "Do you ship internationally?",
        "expected": "I cannot find this information (Policy mentions Indian laws/taxes but does not specify shipping destinations)"
    }
]

print(f"Defined {len(eval_dataset)} evaluation questions.")

Defined 9 evaluation questions.


### EVALUATION USING LLM-as-a-Judge


In [38]:
# 1. Define the Grading Prompt
grading_prompt = """You are a strict teacher grading an exam. 
Compare the ACTUAL ANSWER with the EXPECTED ANSWER.

Question: {question}
Expected Answer: {expected}
Actual Answer: {actual}

Rules:
- Grade 1: Completely wrong or hallucinated.
- Grade 3: Partially correct but missing key details.
- Grade 5: Perfect match (ignoring phrasing differences).

Reply ONLY with the number (1, 2, 3, 4, or 5). Do not write words."""

def grade_answer(question, actual, expected):
    """Uses the LLM to score the answer quality"""
    # Create the prompt text
    final_prompt = grading_prompt.format(
        question=question, 
        expected=expected, 
        actual=actual
    )
    
    # Ask the LLM to grade
    try:
        score_response = llm.invoke(final_prompt)
        score_text = score_response.content.strip()
        import re
        match = re.search(r'\d', score_text)
        return int(match.group()) if match else 1
    except:
        return 1

print("LLM Judge function initialized.")

LLM Judge function initialized.


### COMPREHENSIVE EVALUATION & REPORT

In [42]:
print("üìä RUNNING FINAL EVALUATION...")

results_data = []


for item in eval_dataset:
    # Get Model Response
    try:
        start_time = time.time()
        response = qa_chain.invoke({"query": item['question']})
        latency = round(time.time() - start_time, 2)
        actual_answer = response['result'].strip()
    except:
        actual_answer = "ERROR"
        latency = 0

    # Determine "Pass/Fail" 
    # If Unanswerable: Pass if it says "cannot find/no info"
    # If Answerable: Pass if it contains key words from expected answer
    is_correct = False
    
    if item['type'] == "Unanswerable":
        valid_refusals = ["cannot find", "not mention", "no information", "does not specify", "context does not"]
        if any(phrase in actual_answer.lower() for phrase in valid_refusals):
            is_correct = True 
    else:
        keywords = item['expected'].split()[:3]
        if any(k.lower() in actual_answer.lower() for k in keywords):
            is_correct = True

    # Get Clarity Score (Using LLM Judge function)
    #(If grade_answer fails or isn't defined, default to 3)
    try:
        clarity_score = grade_answer(item['question'], actual_answer, item['expected'])
    except:
        clarity_score = 3 

    results_data.append({
        "Type": item['type'],
        "Question": item['question'],
        "Model Answer": actual_answer,
        "Expected": item['expected'],
        "Pass/Fail": "‚úÖ PASS" if is_correct else "‚ùå FAIL",
        "Clarity (1-5)": clarity_score
    })

# CREATE DATAFRAME & SHOW DETAILED REPORT
df_final = pd.DataFrame(results_data)

print("\n" + "="*80)
print("DETAILED REPORT")
print("="*80)
display(df_final)

# 3. CALCULATE METRICS
# Accuracy
total = len(df_final)
passed = len(df_final[df_final["Pass/Fail"] == "‚úÖ PASS"])
accuracy = (passed / total) * 100

#Hallucination Avoidance
# (% of Unanswerable questions that were correctly refused)
unanswerable_df = df_final[df_final["Type"] == "Unanswerable"]
if len(unanswerable_df) > 0:
    correct_refusals = len(unanswerable_df[unanswerable_df["Pass/Fail"] == "‚úÖ PASS"])
    hallucination_score = (correct_refusals / len(unanswerable_df)) * 100
else:
    hallucination_score = 100.0

# Answer Clarity
# (Average of the 1-5 scores converted to %)
avg_clarity_score = df_final["Clarity (1-5)"].mean()
clarity_pct = (avg_clarity_score / 5) * 100

# 4. PRINT FINAL METRICS CARD
print("\n" + "="*50)
print(" FINAL PROJECT REPORT CARD")
print("="*50)
print(f" OVERALL ACCURACY:        {accuracy:.1f}%")
print(f" HALLUCINATION AVOIDANCE: {hallucination_score:.1f}%")
print(f" ANSWER CLARITY:          {clarity_pct:.1f}% ({avg_clarity_score:.1f}/5)")
print("="*50)



üìä RUNNING FINAL EVALUATION...

DETAILED REPORT


Unnamed: 0,Type,Question,Model Answer,Expected,Pass/Fail,Clarity (1-5)
0,Answerable,What is the time limit for submitting a return...,**Answer:** 10 days from date of delivery of p...,10 days,‚úÖ PASS,5
1,Answerable,Which court has exclusive jurisdiction over di...,**Answer:** Courts at Delhi.\n\n**Details:** \...,Courts at Delhi,‚úÖ PASS,5
2,Answerable,Are taxes like GST included in the listed prices?,**Answer:** Yes\n**Details:** \n* All prices a...,"Yes, prices are inclusive of VAT/CST, service ...",‚úÖ PASS,5
3,Conditional,I want to cancel a service order 9 days after ...,**Answer:** 100% of the total Order value.\n\n...,9% of the total amount (Fee percentage equals ...,‚úÖ PASS,3
4,Conditional,What happens if I cancel a product order after...,**Answer:** You will be charged a cancellation...,It is treated as a Return with all applicable ...,‚úÖ PASS,3
5,Conditional,I refused delivery of an order. How much is th...,**Answer:** 100% of the total Order value.\n\n...,100% of the total Order value (unless evidence...,‚úÖ PASS,3
6,Unanswerable,"What payment methods do you accept (e.g., Cred...",**Answer:** Credit Card and PayPal.\n\n**Detai...,I cannot find this information (Policy mention...,‚úÖ PASS,3
7,Unanswerable,What is the customer support phone number?,**Answer:** I cannot find this information in ...,I cannot find this information (Only the email...,‚úÖ PASS,4
8,Unanswerable,Do you ship internationally?,**Answer:** No\n**Details:** \n* The applicati...,I cannot find this information (Policy mention...,‚ùå FAIL,3



 FINAL PROJECT REPORT CARD
 OVERALL ACCURACY:        88.9%
 HALLUCINATION AVOIDANCE: 66.7%
 ANSWER CLARITY:          75.6% (3.8/5)


### EDGE CASE HANDLING

In [None]:
print(" EDGE CASE TEST: Hallucination Avoidance")


# This question is definitely NOT in the document
trick_question = "Who is the CEO of Rainbow Bazaar?"

print(f"Question: {trick_question}")
print("Thinking...")

response = qa_chain.invoke({"query": trick_question})
final_answer = response['result'].strip()

print(f"\nModel Response:\n'{final_answer}'")

if "cannot find" in final_answer.lower() or "not provided" in final_answer.lower():
    print("\n‚úÖ SUCCESS: The model admitted it doesn't know.")
else:
    print("\n‚ùå FAILURE: The model hallucinated an answer!")


 EDGE CASE TEST: Hallucination Avoidance
Question: Who is the CEO of Rainbow Bazaar?
Thinking...

Model Response:
'I cannot find this information in the policy.'

‚úÖ SUCCESS: The model admitted it doesn't know.
