In [None]:
# Install important Python libraries

  !pip install langchain langchain-openai langchain-community langchain-chroma tavily-python langgraph



**Setting API Keys**

In [None]:
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = '' # add open api key here,  OpenAI for LLMs
#os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
os.environ["TAVILY_API_KEY"] = "" # addr tavily key here, Tavily for doing web search

In [None]:
from typing import List
from typing_extensions import TypedDict

class GraphState(TypedDict):  # A TypedDict called GraphState is created.
    question: str
    documents: List[str]
    generation: str   #LLM generation

**Route---router_chain and related function**

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

# System Prompt
system = """You are an expert at routing a user question to a vectorstore or web search.
   if the question is related to Viswanathan or chess, answer 'vectorstore'. Otherwise, answer 'web'."""
route_prompt = ChatPromptTemplate.from_messages(
    [("system", system),
     ("human", "{question}")])
# Combines the system instruction and user's question dynamically to create a full prompt for the LLM.
llm = ChatOpenAI()
router_chain = route_prompt | llm

#------------------------------------------------------------------------------

def route_question(state):
    question = state["question"] #Taking the current state where the user's question is stored
    source = router_chain.invoke({"question": question}) # Pass the question to the router_chain
    if source.content == "web":
       return "web_search"
    elif source.content == "vectorstore":
       return "vectorstore"


**RAG_chain: used for generation in both web and vectorstore**

In [None]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser

prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI()

rag_chain = prompt | llm | StrOutputParser()



**The branch of web search**

In [None]:
from langchain_community.tools.tavily_search import TavilySearchResults


web_search_tool = TavilySearchResults(k=3)

def web_search(state):
    print("-----This is for web search------")
    question = state["question"]

    docs = web_search_tool.invoke({"query": question})
    web_results = "\n".join([d["content"] for d in docs])
    web_results = Document(page_content=web_results)

    return {"documents": web_results, "question": question}

#the generation part for web branch

def generate_web(state):

    print("-------generation of web------")
    question = state["question"]
    documents = state["documents"]

    generation = rag_chain.invoke({"context": documents, "question": question})
    return {"documents": documents, "question": question, "generation": generation}

**The branch of vectorstore**

In [None]:
# creating internal database
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.schema import Document

loader = TextLoader('/content/Vishva.txt')

data=loader.load()

text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=300)
docs = text_splitter.split_documents(data) # split document into smaller pieces (manageable chunks)

#  create embeddings using OpenAI, Store them into a Chroma vectorstore, Make a retriever for searching later
embedding = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(docs,embedding=embedding)

retriever = vectorstore.as_retriever()

def retrieve(state):
    print("------This is for vectorstore---")
    question = state["question"]

    documents = retriever.invoke(question)
    return {"documents": documents, "question": question}

**Vectorstore branch -- Grade part(used for checking relevance)**

In [None]:
#chain to check if retrieved documents are relevant
system = """You are a grader assessing relevance of a retrieved document to a user question. \n
    If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    If the document is relevent, answer 'yes'; if the document is not relevent, answer 'no'."""
grade_prompt = ChatPromptTemplate.from_messages(
    [("system", system),
     ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),]
)

llm = ChatOpenAI()
grade_chain = grade_prompt | llm

#-------------------------------------------------------------------------------
#function
def grade_documents(state):
   # print("--------checking relevance----------")
    question = state["question"]
    documents = state["documents"]

    # Score each doc
    related_docs = []
    for d in documents:
        result = grade_chain.invoke({"question": question, "document": d.page_content})
        if result.content == "Yes":
            related_docs.append(d)
        else:
            continue
    return {"documents": related_docs, "question": question}
#------------------------------------------------------------------------
#if there is relevant docs after checking, we can generate
def decide_to_generate(state):
    state["question"]
    related_documents = state["documents"]

    if not related_documents:
        return "rewrite_question"
    else:
        return "generate"

**Vectorstore branch -- rewriting query **

In [None]:
#chain
system = """You a question re-writer that converts an input question to a better version that is optimized \n
     for vectorstore retrieval. If the question is about Viswanathan's achievement and the year is not 2007,  rewrite the year to 2007."""
rewrite_prompt = ChatPromptTemplate.from_messages(
    [("system", system),
     ("human",
      "Here is the initial question: \n\n {question} \n Formulate an improved question.",),
    ]
)

llm = ChatOpenAI()
rewrite_chain = rewrite_prompt | llm | StrOutputParser()
#-----------------------------------------------------------------------------------------------------
#function
def rewrite_question(state):
    print("---------rewrite the question---------")
    question = state["question"]
    documents = state["documents"]

    better_question = rewrite_chain.invoke({"question": question})
    print("I can't find the relevent record of this year, so I will rewrite to another year")
    print(better_question)
    return {"documents": documents, "question": better_question}

**Vectorstore branch -- Generation part**

In [None]:
def generate_vectorstore(state):
  #  print("------generate for vectorstore---")
    question = state["question"]
    documents = state["documents"]

    generation = rag_chain.invoke({"context": documents, "question": question})
    return {"documents": documents, "question": question, "generation": generation}

**Vectorstore branch -- Hallucination part(checking hallucination)**


In [None]:
system = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n
       answer 'yes' or 'no'. 'yes' means that the answer is grounded in / supported by the set of facts."""
hallucination_prompt = ChatPromptTemplate.from_messages(
    [("system", system),
     ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
    ]
)

llm = ChatOpenAI()
hallucination_chain = hallucination_prompt | llm


**Vectorstore branch--final decision part(give the final decision of "anwser" or "rewrite")**

In [None]:
system = """You are a grader assessing whether an answer addresses / resolves a question \n
     Give an answer 'yes' or 'no'. 'yes' means that the answer resolves the question."""
final_decision_prompt = ChatPromptTemplate.from_messages(
    [("system", system),
     ("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
    ])

llm = ChatOpenAI()
final_decision_chain = final_decision_prompt | llm

In [None]:
def check_hallucination_and_grade_answer(state): # state as input — a dictionary containing:question, documents , generation
   # print("------checking hallucination---")
   # Save the question, documents, and generation separately so we can work with them easily.
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]

    score1 = hallucination_chain.invoke({"documents": documents, "generation": generation}) # The AI model checks:"Is the answer properly supported by the facts in the documents?"

    if score1.content == "Yes":
      #  print("-------no hallucination---------")
        score2 = final_decision_chain.invoke({"question": question, "generation": generation}) # Ask:"Does the generated answer properly resolve the user's original question?"
        if score2.content == "yes":
           # print("------we can output the final answer-------")
            return "solved"
        else:
           # print("-----we can't output the answer, need to rewrite-----")
            return "not solved"
    else:
       # print("-----There is hallucination------")
        return "not supported"

**Build graph**

In [None]:
from langgraph.graph import END, StateGraph, START

workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("web_search", web_search)
workflow.add_node("retrieve", retrieve)
workflow.add_node("grade_documents", grade_documents)
workflow.add_node("generate_vectorstore", generate_vectorstore)
workflow.add_node("generate_web", generate_web)
workflow.add_node("rewrite_question", rewrite_question)

# Add edges
workflow.add_conditional_edges(
    START,
    route_question,
    {
        "web_search": "web_search",
        "vectorstore": "retrieve",
    },
)
workflow.add_edge("web_search", "generate_web")
workflow.add_edge("generate_web", END)
workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "rewrite_question": "rewrite_question",
        "generate": "generate_vectorstore",
    },
)
workflow.add_edge("rewrite_question", "retrieve")
workflow.add_conditional_edges(
    "generate_vectorstore",
    check_hallucination_and_grade_answer,
    {
        "not supported": "generate_vectorstore",
        "solved": END,
        "not solved": "rewrite_question",
    },
)

# Compile
app = workflow.compile()

**Test case1--irrelevent and rewrite**

In [None]:
from pprint import pprint

input1 = {
    "question": "tell me about Viswanathan's achievement in 2003?"
}
for output in app.stream(input1):
    for key, value in output.items():
       pprint(f"Node '{key}':")
    pprint("\n---\n")

pprint(value["generation"])

------This is for vectorstore---
"Node 'retrieve':"
'\n---\n'
"Node 'grade_documents':"
'\n---\n'
"Node 'generate_vectorstore':"
'\n---\n'
---------rewrite the question---------
I can't find the relevent record of this year, so I will rewrite to another year
Tell me about Viswanathan's achievement in 2007.
"Node 'rewrite_question':"
'\n---\n'
------This is for vectorstore---
"Node 'retrieve':"
'\n---\n'
"Node 'grade_documents':"
'\n---\n'
"Node 'generate_vectorstore':"
'\n---\n'
('In 2007, Viswanathan Anand achieved the title of World Chess Champion. '
 "Anand's success marked a significant milestone in his illustrious career as "
 'a chess Grandmaster. His victory in 2007 further solidified his reputation '
 'as one of the greatest chess players of all time.')


**Test case2--relevent**

In [None]:
from pprint import pprint

input2 = {
    "question": "tell me about Viswanathan's achievements?"
}
for output in app.stream(input2):
  for key, value in output.items():
    pprint(f"Node '{key}':")
  pprint("\n---\n")

# Print generation
pprint(value["generation"])

------This is for vectorstore---
"Node 'retrieve':"
'\n---\n'
"Node 'grade_documents':"
'\n---\n'
"Node 'generate_vectorstore':"
'\n---\n'
"Node 'generate_vectorstore':"
'\n---\n'
"Node 'generate_vectorstore':"
'\n---\n'
('Viswanathan Anand is a celebrated Indian chess Grandmaster and former World '
 "Chess Champion. He became India's first chess Grandmaster at the age of 18, "
 'bringing global attention to Indian chess. Anand quickly made his mark on '
 'the chess scene with his speed and accuracy, earning the nickname "Lightning '
 'Kid."')


**Test case3--web**

In [None]:
from pprint import pprint

input3 = {
    "question": "tell me about recent baseball's events?"
}
for output in app.stream(input3):
    for key, value in output.items():
        pprint(f"Node '{key}':")
    pprint("\n---\n")

# Print generation
pprint(value["generation"])

-----This is for web search------
"Node 'web_search':"
'\n---\n'
-------generation of web------
"Node 'generate_web':"
'\n---\n'
('Recent baseball events include J.D. Martinez signing a one-year deal with '
 'the Mets and reigning NL Cy Young Blake Snell signing with the Giants. '
 'Shohei Ohtani declined to comment on an interpreter scandal, and the '
 "Dodgers' Mookie Betts hit his first home run of the 2024 season. Gunnar "
 'Henderson is back and ready to ramp up for the season in Sarasota.')
