In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
import sys
from pathlib import Path
# Add the project root to the Python path
project_root = Path().resolve().parent
sys.path.insert(0, str(project_root))

In [4]:
from langgraph.graph import add_messages
from langchain.tools import tool
from langchain.messages import ToolMessage, HumanMessage, AnyMessage
from langchain_upstage import UpstageEmbeddings, ChatUpstage
from langgraph.graph import StateGraph, START, END
from langgraph.prebuilt import tools_condition
from scripts.retrieve import load_retriever
from utils.utils import format_context
from reranker.rrf import ReciprocalRankFusion
from langchain_upstage import UpstageEmbeddings, ChatUpstage
from langchain_core.documents import Document
from config import output_path_prefix
import pickle

with open(f"{output_path_prefix}_split_documents.pkl", "rb") as f:
        split_documents = pickle.load(f)

@tool
def retriever(query: str) -> list[Document]:
    """Retrieve documents from the vector database.

    Args:
        query: The query to retrieve documents from the vector database.
    """
    embeddings = UpstageEmbeddings(model="embedding-passage")
    bm25_retriever, faiss_retriever = load_retriever(split_documents, embeddings, kiwi=False, search_k=10)
    retrieved_docs_faiss = faiss_retriever.invoke(query)
    retrieved_docs_bm25 = bm25_retriever.invoke(query)
    retrieved_docs_faiss = ReciprocalRankFusion.calculate_rank_score(retrieved_docs_faiss)
    retrieved_docs_bm25 = ReciprocalRankFusion.calculate_rank_score(retrieved_docs_bm25)
    retrieved_docs = retrieved_docs_faiss + retrieved_docs_bm25
    rrf_docs = ReciprocalRankFusion.get_rrf_docs(retrieved_docs, cutoff=4)
    context = format_context(rrf_docs)

    return {"documents": rrf_docs, "context": context}


tools = [retriever]
tools_by_name = {tool.name: tool for tool in tools}

def tool_node(state: dict):
    """Performs the tool call"""

    result = []
    for tool_call in state["messages"][-1].tool_calls:
        tool = tools_by_name[tool_call["name"]]
        observation = tool.invoke(tool_call["args"])
        result.append(ToolMessage(content=observation, tool_call_id=tool_call["id"]))
    return {"messages": result, "documents": observation["documents"], "context": observation["context"]}

In [7]:
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field
from typing import Literal

In [6]:
llm = ChatOpenAI(model="gpt-5-mini", temperature=0)

In [8]:
class Route(BaseModel):
    step: Literal["vector", "web_search"] = Field(
        None, description="Given a user question choose to route it to web search or a vectorstore."
    )

router = llm.with_structured_output(Route)

In [9]:
ROUTE_PROMPT = """
You are an expert at routing a user question to a vectorstore or web search.
The vectorstore contains documents related to agents, prompt engineering, and adversarial attacks.
Use the vectorstore for questions on these topics. Otherwise, use web-search.
question: {question}
"""

In [11]:
question = "What are the types of agent memory?"


In [12]:
prompt = ROUTE_PROMPT.format(question=question)
router.invoke([{"role": "user", "content": prompt}])

Route(step='vector')

In [13]:
class GradeDocuments(BaseModel):  
    """Grade documents using a binary score for relevance check."""

    binary_score: str = Field(
        description="Relevance score: 'yes' if relevant, or 'no' if not relevant"
    )

GRADE_PROMPT = (
    "You are a grader assessing relevance of a retrieved document to a user question. \n "
    "Here is the retrieved document: \n\n {context} \n\n"
    "Here is the user question: {question} \n"
    "If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n"
    "Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."
)

grader = llm.with_structured_output(GradeDocuments)

In [17]:
question = "AI Index 2025 연례보고서의 발행 기관과 발행 시기는 언제인가?"
context = retriever.invoke(question)["context"]
prompt = GRADE_PROMPT.format(context=context, question=question)

/home/jake/RAG-end2end/faiss_index


In [19]:
context



In [18]:
grader.invoke([{"role": "user", "content": prompt}])

GradeDocuments(binary_score='yes')

In [20]:
GENERATE_PROMPT = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer the question. "
    "If you don't know the answer, just say that you don't know. "
    "Use three sentences maximum and keep the answer concise.\n"
    "Question: {question} \n"
    "Context: {context}"
)

In [21]:
question = "AI Index 2025 연례보고서의 발행 기관과 발행 시기는 언제인가?"
context = retriever.invoke(question)["context"]

/home/jake/RAG-end2end/faiss_index


In [22]:
prompt = GENERATE_PROMPT.format(question=question, context=context)
answer = llm.invoke([{"role": "user", "content": prompt}])
answer

AIMessage(content='발행 기관: AI Index 운영위원회(AI Index Steering Committee) 및 스탠포드대학교 인간중심 AI 연구소(Institute for Human‑Centered AI).  \n발행 시기: 2025년 4월.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 251, 'prompt_tokens': 1254, 'total_tokens': 1505, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 192, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-5-mini-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-Cw3Chx9XbTGpkcQ4uV6F7FtuEbOoZ', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='lc_run--6774133c-07e8-4fde-aef5-37e209512304-0', usage_metadata={'input_tokens': 1254, 'output_tokens': 251, 'total_tokens': 1505, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 192}})

In [23]:
class GradeHallucinations(BaseModel):
    """Binary score for hallucination present in generation answer."""

    binary_score: str = Field(
        description="Answer is grounded in the facts, 'yes' or 'no'"
    )
hallucination_grader = llm.with_structured_output(GradeHallucinations)

In [24]:
HALLUCINATION_PROMPT = """
You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n 
     Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts.
     Here is the LLM generation: \n\n {answer} \n\n"
     Here is the set of facts: \n\n {context} \n\n"
"""

prompt = HALLUCINATION_PROMPT.format(answer=answer, context=context)
res = hallucination_grader.invoke([{"role": "user", "content": prompt}])
res

GradeHallucinations(binary_score='yes')

In [25]:
class GradeAnswer(BaseModel):
    """Binary score to assess answer addresses question."""

    binary_score: str = Field(
        description="Answer addresses the question, 'yes' or 'no'"
    )

answer_grader = llm.with_structured_output(GradeAnswer)


In [26]:
ANSWER_PROMPT = """
You are a grader assessing whether an answer addresses / resolves a question \n 
Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question.
Here is the question: {question} \n
Here is the answer: {answer} \n
"""

prompt = ANSWER_PROMPT.format(question=question, answer=answer.content)
res = answer_grader.invoke([{"role": "user", "content": prompt}])
res



GradeAnswer(binary_score='yes')

In [27]:
REWRITE_PROMPT = """
You a question re-writer that converts an input question to a better version that is optimized \n 
for vectorstore retrieval. Look at the input and try to reason about the underlying semantic intent / meaning.
Here is the initial question: \n\n {question} \n Formulate an improved question.
"""
question = "AI Index 2025 연례보고서의 발행 기관과 발행 시기는 언제인가?"
prompt = REWRITE_PROMPT.format(question=question)
llm.invoke([{"role": "user", "content": prompt}])


AIMessage(content='AI Index 2025 연례보고서(AI Index Report 2025 Annual Report)의 발행 주체(발행 기관)는 어디이며, 공식 발행일(출간일·배포일)은 언제인가?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 440, 'prompt_tokens': 82, 'total_tokens': 522, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 384, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-5-mini-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-Cw3KMKvA7QZamrpkduQjuQyuLN9cV', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='lc_run--2e95395a-c88d-41d8-afd5-c540e0d6c16c-0', usage_metadata={'input_tokens': 82, 'output_tokens': 440, 'total_tokens': 522, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 384}})

In [30]:
from langchain_tavily import TavilySearch

web_search_tool = TavilySearch(k=3)

  class TavilyResearch(BaseTool):  # type: ignore[override, override]
  class TavilyResearch(BaseTool):  # type: ignore[override, override]
