In [None]:
!pip install langchain==0.2.0
#!pip install langchain-openai==0.1.7
!pip install langchain-community==0.2.0
!pip install langgraph==0.1.1
!pip install langchain-chroma==0.1.1
!pip install huggingface_hub
!pip install --force-reinstall chromadb==0.5.23 tokenizers==0.20.3 transformers==4.29.2

In [None]:
 !pip install -q --force-reinstall chromadb==0.4.24 transformers==4.28.1 tokenizers==0.13.3


In [None]:
from getpass import getpass
from huggingface_hub import InferenceClient
huggingface_key = getpass('Enter your hugging face API Key:')

In [None]:
TAVILY_API_KEY = getpass('Enter your Tavily API Key:')

In [None]:
# from getpass import getpass
# openai_key = getpass('Enter your OpenAI API Key:')

In [None]:
import os
os.environ['HUGGINGFACEHUB_API_TOKEN'] = huggingface_key
#os.environ['OPENAI_API_KEY'] = openai_key
os.environ['TAVILY_API_KEY'] = TAVILY_API_KEY

Build search index for wikipedia data

In [None]:
# from langchain_openai import OpenAIEmbeddings
# openai_embedding = OpenAIEmbeddings(model = 'text-embedding-3-small')

In [None]:
!pip install sentence-transformers

In [None]:
!pip install scipy==1.10.1 --force-reinstall

In [None]:
!pip install sentence-transformers

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")

In [None]:
import json
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter

wikipedia_filepath = '/content/sample_data/simplewiki-2020-11-01.jsonl'
docs = []

with open(wikipedia_filepath, 'r', encoding='utf-8') as fIn:
    for line in fIn:
        data = json.loads(line.strip())
        # only first 3 paras
        text = ''.join(data.get('paragraphs', [])[:3])
        metadata = {
            "title": data.get('title'),
            "article_id": data.get('id')
        }
        docs.append({'metadata': metadata, 'data': text})

# taking subset to keep small in size
docs = [doc for doc in docs if 'india' in doc['data'].lower()]

# create docs
langchain_docs = [
    Document(page_content=doc['data'], metadata=doc['metadata']) for doc in docs
]

# chunk docs
splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=300)
chunked_docs = splitter.split_documents(langchain_docs)

for i, chunk in enumerate(chunked_docs[:3]):
    print(f"Chunk {i+1}:\n", chunk.page_content[:500], "\n")


In [None]:
len(chunked_docs)

In [None]:
chunked_docs[:3]

In [None]:
!pip install numpy==1.26.4 --force-reinstall

In [None]:
import shutil

shutil.rmtree("content_db", ignore_errors=True)
print("Removed existing content_db before rebuild")

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from tqdm import tqdm
#vector db
#test_docs = chunked_docs[:1]
# remove duplicates
unique_docs = list({doc.page_content: doc for doc in chunked_docs}.values())
documents = list(tqdm(unique_docs, desc="Embedding Chunks"))
vectorstore = Chroma.from_documents(
    documents=unique_docs,
    collection_name='wikipedia_data_indexed',
    embedding=embedding_model,
    collection_metadata={"hnsw:space": "cosine"},
    persist_directory="content_db"
)


In [None]:
import os
print("Directory exists:", os.path.exists("content_db"))
print("Contents:", os.listdir("content_db"))

In [None]:
import os

abs_path = os.path.abspath("content_db")
print("Chroma DB stored at:", abs_path)

In [None]:
print(" Total docs in vectorstore:", len(vectorstore.get()))

Retreival with similarity threshold



In [None]:
# vectorstore = Chroma(
#     collection_name='wikipedia_data_indexed',
#     embedding_function=embedding_model,
#     persist_directory="wikipedia_db"
# )
similarity_threshold = vectorstore.as_retriever(search_type='similarity_score_threshold', search_kwargs={"k" : 5,"score_threshold":0.3})

In [None]:
query = "what is the capital of India?"
top5_docs = similarity_threshold.invoke(query)
top5_docs

In [None]:
for i, doc in enumerate(top5_docs):
    print(f"Doc {i+1}:\n{doc.page_content[:200]}")
    print("Metadata:", doc.metadata)
    print("-" * 40)

In [None]:
query = "what is Langraph"
top5_docs = similarity_threshold.invoke(query)
top5_docs

create query retrieval grader

In [None]:
!pip install ctransformers

In [None]:
from ctransformers import AutoModelForCausalLM

llm = AutoModelForCausalLM.from_pretrained(
    "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
    model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
    model_type="mistral",
    max_new_tokens=128,
    context_length=4096,
    temperature=0
)

In [None]:
from typing import Any, List
from langchain_core.language_models import LLM

class CTransformersLLM(LLM):
    model: Any

    def _call(self, prompt: str, stop: List[str] = None) -> str:
        output = self.model(prompt)
        if stop:
            for s in stop:
                output = output.split(s)[0]
        return output

    @property
    def _identifying_params(self):
        return {"model": "ctransformers"}

    @property
    def _llm_type(self):
        return "ctransformers"


In [None]:
from pydantic import BaseModel, Field
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain.schema.runnable import Runnable

class GradeDocuments(BaseModel):
    binary_score: str = Field(description="Documents are relevant to the question, 'yes' or 'no'")

#llm = CTransformersLLM(model=llm)

SYS_PROMPT = """You are an expert grader assessing relevance of a retrieved document to a user question.
Respond in **valid JSON** ONLY and on a single line.
Your answer must be exactly one of these:
{{ "binary_score": "yes" }}
or
{{ "binary_score": "no" }}
Do not write anything else. No 'Grader:', no explanation."""

grade_prompt = ChatPromptTemplate.from_messages([
    ("system", SYS_PROMPT),
    ("human", "Retrieved document: {document}\nUser question: {question}")
])


llm = CTransformersLLM(model=llm)
parser = PydanticOutputParser(pydantic_object=GradeDocuments)

doc_grader: Runnable = grade_prompt | llm | parser

In [None]:
import json
import re

def extract_json(text):
    match = re.search(r"\{.*?\}", text)
    if match:
        try:
            return json.loads(match.group())
        except json.JSONDecodeError:
            return None
    return None

In [None]:
def safe_invoke_with_json_extraction(chain, inputs):
    from langchain_core.exceptions import OutputParserException

    try:
        result = chain.invoke(inputs)
        return result.dict() if hasattr(result, "dict") else result
    except OutputParserException as e:
        raw = e.llm_output.strip()
        print("Parsing failed. Raw output:\n", raw)
        data = extract_json(raw)
        if data:
            return data
        else:
            return {"binary_score": "unclear"}

In [None]:
response = safe_invoke_with_json_extraction(doc_grader, {
    "document": "India is a country in South Asia with a long cultural history.",
    "question": "Where is India located?"
})

print(" Binary Score:", response["binary_score"])


In [None]:
query = "what is the capital of India?"
top5_docs = similarity_threshold.invoke(query)

for doc in top5_docs:
    print("Document:\n", doc.page_content)

    result = safe_invoke_with_json_extraction(doc_grader, {
        "question": query,
        "document": doc.page_content
    })

    print("GRADE:", result["binary_score"])
    print()


In [None]:
query = "what is Langraph?"
top5_docs = similarity_threshold.invoke(query)

for doc in top5_docs:
    print("Document:\n", doc.page_content)

    result = safe_invoke_with_json_extraction(doc_grader, {
        "question": query,
        "document": doc.page_content
    })

    print("GRADE:", result["binary_score"])
    print()

In [None]:
query = "who won the champions league in 2024?"
top5_docs = similarity_threshold.invoke(query)

for doc in top5_docs:
    print("Document:\n", doc.page_content)

    result = safe_invoke_with_json_extraction(doc_grader, {
        "question": query,
        "document": doc.page_content
    })

    print("GRADE:", result["binary_score"])
    print()

build qa rag

In [None]:
from ctransformers import AutoModelForCausalLM

llm_raw = AutoModelForCausalLM.from_pretrained(
    "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
    model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
    model_type="mistral",
    max_new_tokens=512,
    temperature=0.0,
    context_length=4096
)

In [None]:
from langchain_core.language_models import LLM
from typing import Any, List

class CTransformersLLM(LLM):
    model: Any

    def _call(self, prompt: str, stop: List[str] = None) -> str:
        output = self.model(prompt)
        if stop:
            for s in stop:
                output = output.split(s)[0]
        return output

    @property
    def _identifying_params(self):
        return {"model": "ctransformers"}

    @property
    def _llm_type(self):
        return "ctransformers"

llm = CTransformersLLM(model=llm_raw)

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda
from operator import itemgetter

QA_PROMPT = """You are an assistant for question answering tasks. Use the following pieces of retrieved context to answer the question.
If no context is present or you don't know the answer, just say "I don't know".
Do not make up the answer.
Question: {question}
Context: {context}
Answer:"""

prompt_template = ChatPromptTemplate.from_template(QA_PROMPT)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

qa_rag_chain = (
    {
        "context": itemgetter("context") | RunnableLambda(format_docs),
        "question": itemgetter("question")
    }
    | prompt_template
    | llm
    | StrOutputParser()
)

In [None]:
from langchain_core.documents import Document

query = "What is the capital of India?"
retrieved_docs = [
    Document(page_content="New Delhi is the capital of India."),
    Document(page_content="India is a country in South Asia."),
]

result = qa_rag_chain.invoke({
    "question": query,
    "context": retrieved_docs
})

print("Answer:", result)

In [None]:
query = "What is the capital of India?"

top5_docs = similarity_threshold.invoke(query)

result = qa_rag_chain.invoke({
    'question': query,
    'context': top5_docs
})

print("Answer:", result)

create query rewriter

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

SYS_PROMPT = """You are a question rewriter. Your task is to:
1. Convert the following input question into a better version optimized for web search.
2. When rewriting, understand the semantic intent of the original question.
3. Return only the rewritten question and nothing else.
If the question is already optimal, return it unchanged."""

rewrite_prompt = ChatPromptTemplate.from_messages([
    ("system", SYS_PROMPT),
    ("human", "Here is the initial question: {question}. Rewrite it.")
])

question_rewriter = rewrite_prompt | llm | StrOutputParser()


In [None]:
query = "Who won champions league in 2024?"

rewritten = question_rewriter.invoke({"question": query})
print("Rewritten Question:", rewritten)


load web search tool

In [None]:
from langchain_community.tools.tavily_search import TavilySearchResults

tavily_tool = TavilySearchResults(
    max_results=3,
    search_depth="advanced" )

results = tavily_tool.invoke("What is the capital of India?")
#print(results)


build agentic rag component

graph state

In [None]:
from typing import List
from typing_extensions import TypedDict

class GraphState(TypedDict):
    question: str
    generation: str
    web_search_needed: str
    documents: List[str]


retrieve function for reterival from vector db


In [None]:
import hashlib

def hash_text(text):
    return hashlib.md5(text.encode("utf-8")).hexdigest()

def retrieve(state):
    print("\n Entered: retrieve")
    question = state["question"]
    print("Question:", question)
    print(" Total docs in vectorstore:", len(vectorstore.get()))

    documents = similarity_threshold.invoke(question)
    print(f" Retrieved {len(documents)} documents")

    for i, doc in enumerate(documents):
        content_hash = hash_text(doc.page_content)
        print(f"\n Doc #{i+1} (hash: {content_hash})")
        print(doc.page_content[:300].strip())
        print(" Metadata:", doc.metadata)

    return {"documents": documents, "question": question}


grade docs


In [None]:
import hashlib

def hash_text(text):
    return hashlib.md5(text.encode('utf-8')).hexdigest()

def grade_documents(state):
    print("\nEntered: grade_documents")
    question = state["question"]
    documents = state["documents"]
    filtered_docs = []

    if not documents:
        print("No documents retrieved")
        return {"documents": [], "question": question, "web_search_needed": "Yes"}

    print(f"Total documents received: {len(documents)}")
    for i, doc in enumerate(documents):
        content_hash = hash_text(doc.page_content)
        print(f"\n Document #{i+1} (hash: {content_hash})")
        print(doc.page_content[:300].strip())
        print("Metadata:", doc.metadata)

        prompt_input = {
            "question": question,
            "document": doc.page_content
        }
        print(" Prompt Input to Grader:\n", prompt_input)

        grade = safe_invoke_with_json_extraction(doc_grader, prompt_input)
        print("Grader Output:", grade)

        if grade.get("binary_score") == "yes":
            print("Marked relevant")
            filtered_docs.append(doc)
        else:
            print(" Marked irrelevant")

    web_search_needed = "Yes" if len(filtered_docs) == 0 else "No"

    print(f"\n Final relevant docs: {len(filtered_docs)} / {len(documents)}")
    print(f" Web search needed? {web_search_needed}")

    return {
        "documents": filtered_docs,
        "question": question,
        "web_search_needed": web_search_needed
    }


rewrite query

In [None]:
def rewrite_query(state):
  print("rewrite query")
  question = state["question"]
  documents = state["documents"]
  #rewrite
  better_questions = question_rewriter.invoke({"question": question})
  return {"documents": documents, "question": better_questions}

web search

In [None]:
from langchain.schema import Document

def web_search(state):
    print("Entered: web_search")
    question = state["question"]
    documents = state["documents"]

    # web search
    docs = tavily_tool.invoke(question)

    if isinstance(docs, str):
        try:
            import json
            docs = json.loads(docs)
        except Exception as e:
            #print("Failed to parse Tavily output")
            return {"documents": documents, "question": question}

    if not isinstance(docs, list):
        #print("Unexpected Tavily response type:", type(docs))
        return {"documents": documents, "question": question}

    web_docs = [
        Document(page_content=d["content"], metadata={"source": d.get("url")})
        for d in docs if isinstance(d, dict) and "content" in d
    ]

    if web_docs:
        #print(f"Added {len(web_docs)} Tavily web docs")
        documents += web_docs
    else:
        #print(" No usable content from Tavily")

    return {
        "documents": documents,
        "question": question
    }


generate answer

In [None]:
def generate_answer(state):
  print("generate answer")
  question = state["question"]
  documents = state["documents"]
  #generate answer
  generation = qa_rag_chain.invoke({"question": question, "context": documents})
  return {"documents":documents,"generation": generation, "question": question}

In [None]:
def decide_to_generate(state):
  print("decide to generate")
  web_search_needed = state["web_search_needed"]
  if web_search_needed == "Yes":
    print("relevant docs not available")
    return "rewrite_query"
  else:
    print("relevant docs avilable")
    return "generate_answer"


build agent graph


In [None]:
from langgraph.graph import END, StateGraph

agentic_rag = StateGraph(GraphState)

agentic_rag.add_node("retrieve", retrieve)
agentic_rag.add_node("grade_documents", grade_documents)
agentic_rag.add_node("rewrite_query", rewrite_query)
agentic_rag.add_node("web_search", web_search)
agentic_rag.add_node("generate_answer", generate_answer)

agentic_rag.set_entry_point("retrieve")
agentic_rag.add_edge("retrieve", "grade_documents")
agentic_rag.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "rewrite_query": "rewrite_query",
        "generate_answer": "generate_answer"
    }
)
agentic_rag.add_edge("rewrite_query", "web_search")
agentic_rag.add_edge("web_search", "generate_answer")
agentic_rag.add_edge("generate_answer", END)

agentic_rag = agentic_rag.compile()

In [None]:
from IPython.display import display, Image

display(Image(agentic_rag.get_graph().draw_mermaid_png()))

test

In [None]:
query = "what is the capital of India?"

RESPONSE = agentic_rag.invoke({"question": query})

In [None]:
from IPython.display import display, Markdown
display(Markdown(RESPONSE["generation"]))

In [None]:
query = "Who won the champions league in 2024?"

RESPONSE = agentic_rag.invoke({"question": query})


In [None]:
from IPython.display import display, Markdown
display(Markdown(RESPONSE["generation"]))

In [None]:
query = "Tell me about India"

RESPONSE = agentic_rag.invoke({"question": query})


In [None]:
display(Markdown(RESPONSE["generation"]))