### model 임포트




In [None]:
!pip install langchain_google_genai langchain_groq

In [3]:
import os

os.environ["GROQ_API_KEY"]=""
os.environ["TAVILY_API_KEY"]="tvly-dev-"
os.environ["GOOGLE_API_KEY"]=""

In [4]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_groq import ChatGroq

embeddings = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001"
)

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash"
)
llm_groq = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0,
    max_retries=2
)



In [None]:
result = llm.invoke("langgraph에 관한 발라드 음악을 작사해주세요")
print(result.content)

In [None]:
result = llm_groq.invoke("langgraph에 관한 발라드 음악을 작사해주세요")
print(result.content)

### Retriever 만들기

In [5]:
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

In [None]:
!pip install langchain_community

In [None]:
!pip install chromadb

In [16]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma

#3개의 사이트에서 가져온 문서를 chunck 단위로 나눈것을 list

docs = [WebBaseLoader(url).load() for url in urls]
docs_list =[item for sublist in docs for item in sublist]
# docs_list

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=10
)

doc_splits = text_splitter.split_documents(docs_list)

vectorstore = Chroma.from_documents(
    documents=doc_splits,
    embedding=embeddings,
    collection_name="langgraph",
    persist_directory="./chroma_db"
)

retriever = vectorstore.as_retriever()


### LangChain RAG

In [21]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser

#promt
promt = hub.pull("rlm/rag-prompt")

#Post preprocessing
def format_docs(docs):
  return "\n\n".join(doc.page_content for doc in docs)

#rag chain
rag_chain = promt | llm | StrOutputParser()
rag_chain



ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])
| ChatGoogleGenerativeAI(model='models/gemini-2.0-flash', google_api_key=SecretStr('**********'), client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x7848fcc50a50>, default_metadata=(), model_kwargs={})
| StrOutputParser

### RAG TEST

In [22]:
#run
question = "tell me about agent memory"
generation = rag_chain.invoke({"context":docs, "question":question})
print(generation)

Agent memory is a key component in LLM-powered autonomous agent systems, providing the ability to retain and recall information. It includes short-term memory, utilizing in-context learning, and long-term memory, leveraging an external vector store for extended information retention. This memory enables agents to learn from past actions and improve future results.


### grade document class

In [38]:
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field

class GradeDocuments(BaseModel):
  """
  Binary score for relevance check on retrieved documents.
  """
  binary_score: str = Field(description="Documents are relevant to the question, 'yes' or 'no'")


In [39]:
structured_llm_grader =llm.with_structured_output(GradeDocuments)

# Prompt
system = """You are a grader assessing relevance of a retrieved document to a user question. \n
    If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""

ko_system = """
    검색된 문서와 사용자 질문의 관련성을 평가하는 채점자입니다.\n
    문서에 질문과 관련된 키워드 또는 의미론적 의미가 포함된 경우 관련성이 있는 것으로 평가합니다.\n
    문서가 질문과 관련이 있는지 여부를 나타내기 위해 이진 점수 '예' 또는 '아니오'를 제공합니다.
"""

grade_promt =ChatPromptTemplate.from_messages(
    {
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    }
)

retriever_grader = grade_promt | structured_llm_grader

In [40]:
question = "tell me about agent memory"
docs = retriever.get_relevant_documents(question)
docs_txt = docs[0].page_content
print(retriever_grader.invoke({"question":question , "document":docs_txt}))

binary_score='yes'


In [41]:
question = "tell me about agent seoul"
docs = retriever.get_relevant_documents(question)
docs_txt = docs[0].page_content
print(retriever_grader.invoke({"question":question , "document":docs_txt}))

binary_score='no'


### question Rewriter

In [42]:
### Question Re-writer
# Prompt
from langchain_core.prompts import ChatPromptTemplate

system = """You a question re-writer that converts an input question to a better version that is optimized \n
     for web search. Look at the input and try to reason about the underlying semantic intent / meaning."""


re_write_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        (
            "human",
            "Here is the initial question: \n\n {question} \n Formulate an improved question.",
        ),
    ]
)

question_rewriter = re_write_prompt | llm | StrOutputParser()

In [43]:
question_rewriter.invoke({"question":question})

'Here are a few options for improved search queries, depending on the specific intent:\n\n*   **Option 1 (General Information):** "Who is Agent Seoul?" (This is a direct and simple rephrase)\n*   **Option 2 (If "Agent Seoul" is likely related to a specific organization or media):** "Agent Seoul [Organization Name/Movie/Game]" (e.g., "Agent Seoul Valorant", "Agent Seoul Netflix")\n*   **Option 3 (If looking for real-world intelligence agents):** "Seoul intelligence agents" or "South Korean intelligence agencies"\n*   **Option 4 (If context suggests a fictional character):** "Agent Seoul character description" or "Agent Seoul fictional character"\n\nThe best option depends on what "Agent Seoul" refers to. Without more context, the first option is the safest.'

In [44]:
### Search

from langchain_community.tools.tavily_search import TavilySearchResults

web_search_tool = TavilySearchResults(k=3)

In [None]:
web_search_tool({"query":"tell me about Taj Mahal"})[1]

### langgrap 실습 및 구현

#### state 정의

In [84]:
from typing import List , Dict , Any ,TypedDict ,Annotated
from langchain.schema import Document

#전달해주는 state 상태정의
class State(TypedDict):
  question:str
  orginal_question:str
  documents:List[Document]
  web_search:str
  generation:str
  web_results:List[Dict[str,Any]]
  relevance_score:str

class GraphState(State):
  question:Annotated[str , "user qeustion" ]
  documents:Annotated[List[Document] , []]
  orginal_question:Annotated[str , "original question" ]
  web_search:Annotated[str , "web search" ]
  generation:Annotated[str , "generation" ]
  web_results:Annotated[List[Dict[str,Any]] , [{}]]
  relevance_score:Annotated[str , "relevance_score" ]

### Node 정의


In [81]:
#retrieve 정의함수
def retrieve(state:GraphState):
  print("---Retrieve---")
  question = state["question"]

  #검색 실행
  documents = retriever.get_relevant_documents(question)
  # return State(documents=[f"jfwpjfpewpfwejpfjpwef{question}"])
  return GraphState(question=question, documents=documents)

In [104]:
documents = retrieve(State(question="tell me about SEOUL"))
# documents

---Retrieve---


In [114]:
#grade 노드
def grade_documents(state:GraphState):
  """
  check document relevance to question
  """
  print("---CHECKING DOCUMENT RELEVANT IS TO QUESTION OR NOT---")
  question = state["question"]
  documents = state["documents"]

  #문서가 없으면 바로 웹 검색하도록
  if not documents:
    return GraphState(
        question=question,
        orginal_question=state.get("original_questrion", question),
        documents=[],
        web_search="yes",
        web_results=[{}],
        relevance_score="no")

  # 문서 평가
  filtered_docs = []
  web_search ="no"
  relevant_count = 0
  for document in documents:
    score= retriever_grader.invoke(
        {
            "question":question,
            "document":document.page_content
        }
    )
    grade = score.binary_score
    if grade == "yes":
      print("---GRADE : DOCUMENT RELEVANT---")
      filtered_docs.append(document)
      relevant_count +=1
    else:
      print("---GRADE : DOCUMENT NOT RELEVANT---")

  #관련성 너무 적으면 웹서치 할 수 있도록 함
  if relevant_count < 2:
    web_search = "yes"
    print(f"---ONLY {relevant_count} RELEVANT DOCUMENTS, WEB SEARCH NEEDED---")
  # return GraphState(
  #     question=question,
  #     orginal_question=state.get("original_questrion", question),
  #     documents=filtered_docs,
  #     web_search=web_search,
  #     web_results=[{}],
  #     relevance_score="yes" if filtered_docs else "no"
  # )

  return GraphState(question=question,web_search=web_search,documents=filtered_docs,)




In [115]:
grade_state = grade_documents(documents)

---CHECKING DOCUMENT RELEVANT IS TO QUESTION OR NOT---
---GRADE : DOCUMENT NOT RELEVANT---
---GRADE : DOCUMENT NOT RELEVANT---
---GRADE : DOCUMENT NOT RELEVANT---
---GRADE : DOCUMENT NOT RELEVANT---
---ONLY 0 RELEVANT DOCUMENTS, WEB SEARCH NEEDED---


In [116]:
### Question Re-writer
# Prompt
from langchain_core.prompts import ChatPromptTemplate

system = """You a question re-writer that converts an input question to a better version that is optimized \n
     for web search. Look at the input and try to reason about the underlying semantic intent / meaning."""


re_write_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        (
            "human",
            "Here is the initial question: \n\n {question} \n Formulate an improved question.",
        ),
    ]
)

question_rewriter = re_write_prompt | llm | StrOutputParser()

In [119]:
def transform_query(state:GraphState):
  """질문 재작성"""
  print("---TRANSFORM QUERY---")

  #처음 질문 저장
  original_question = state.get("original_question", state["question"])
  question = state["question"]
  documents = state["documents"]
  #질문 재작성
  better_question = question_rewriter.invoke({"question":question})
  return GraphState(
      question=better_question ,
      orginal_question=original_question,
      documents=documents
      )

In [120]:
transform_query(grade_state)

---TRANSFORM QUERY---


{'question': 'Here are a few options for re-writing the question, depending on the specific intent:\n\n**Option 1 (General Overview):**\n\n*   **Search Query:** "Seoul, South Korea: history, culture, and attractions"\n\n**Option 2 (Focus on Travel/Tourism):**\n\n*   **Search Query:** "Top tourist attractions and things to do in Seoul"\n\n**Option 3 (Focus on Current Events/News):**\n\n*   **Search Query:** "Recent news and events in Seoul, South Korea"\n\n**Option 4 (Focus on History):**\n\n*   **Search Query:** "History of Seoul, South Korea: from ancient times to present"\n\n**Explanation of Improvements:**\n\n*   **Specificity:** The original question "tell me about SEOUL" is very broad. The re-written options add context (e.g., "South Korea", "history", "tourist attractions") to narrow the search and provide more relevant results.\n*   **Keywords:** Using keywords like "tourist attractions," "history," and "news" helps search engines understand the desired information.\n*   **Phras