# Corrective RAG
- 논문: https://arxiv.org/pdf/2401.15884

**배경**

- RAG의 답변 결과는 검색된 문서의 관련성에 크게 의존적
- 따라서, 검색이 잘못된 경우 답변에 대한 품질 우려가 됨

**제안**

- 사용자 입력 쿼리(query) 에 대하여 검색된 문서의 품질을 평가
- 한마디로 검색된 결과가 사용자 입력 쿼리와 관련성이 높도록 쿼리를 수정(Corrective)

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [1]:
from langchain_teddynote import logging
logging.langsmith("CH17-LANGGRAPH")

LangSmith 추적을 시작합니다.
[프로젝트명]
CH17-LANGGRAPH


In [8]:
from rag.pdf import PDFRetrievalChain
pdf = PDFRetrievalChain(["./data/SPRI_AI_Brief_2023년12월호_F.pdf"]).create_chain()
pdf_retriever = pdf.retriever
pdf_chain = pdf.chain

### 노드 함수들 정의

In [4]:
from langchain_upstage import UpstageGroundednessCheck
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

upstage_ground_checker = UpstageGroundednessCheck()

In [6]:
from typing import TypedDict
class GraphState(TypedDict):
    question: str #질문
    context: str # 문서의 검색 결과
    answer: str # 답변
    relevance: str # 답변의 문서에 대한 관련성

In [7]:
# 문서검색 함수
def retrieve_document(state: GraphState) -> GraphState:
    retrieved_docs = pdf.retriever.invoke(state["question"])
    return GraphState(context="".join([doc.page_content for doc in retrieved_docs]))

In [9]:
# chain을 사용하여 답변을 생성
def llm_answer(state: GraphState) -> GraphState:
    return GraphState(
        answer = pdf_chain.invoke(
            {"question": state["question"], "context": state["context"]}
        ),
        context = state["context"],
        question = state["question"]
    )

In [10]:
# 재작성 함수
def rewrite(state):
    question = state["question"]
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                """
                You are a professional prompt rewriter. 
                Your task is to improve the question. Question must be written in same language. Don't narrate, just reponse an improved question.
                """
            ),
            "human",
            """
            Look at the input and try to reason about the underlying semantic intent / meaning.
            Here is the initial question: 
            ----------------------{question}----------------------
            Formulate an improved question:
            """
     
        ]
    )
    # Question rewriting model
    model = ChatOpenAI(model="gpt-4o-mini", temperature=0)

    chain = prompt| model| StrOutputParser()
    response = chain.invoke({"question": question})
    return GraphState(
        context=state["context"], question=response, answer=state["answer"]
    )

In [13]:
# 관련성 체크를 실행. 결과: grounded, notGrounded, notSure
def relevance_check(state: GraphState) -> GraphState:
    response = upstage_ground_checker.run(
        {"context": state["context"], "answer": state["answer"]}
    )
    return GraphState(
        relevance=response,
        context=state["context"],
        answer=state["answer"],
        question=state["question"]
    )

def is_relevant(state: GraphState) -> GraphState:
    return state["relevance"]