# 시작

In [None]:
from google.colab import userdata
import os

os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
os.environ['LANGSMITH_TRACING'] = userdata.get('LANGSMITH_TRACING')
os.environ['LANGSMITH_ENDPOINT'] = userdata.get('LANGSMITH_ENDPOINT')
os.environ['LANGSMITH_API_KEY'] = userdata.get('LANGSMITH_API_KEY')
os.environ['LANGSMITH_PROJECT'] = userdata.get('LANGSMITH_PROJECT')
os.environ['COHERE_API_KEY'] = userdata.get('COHERE_API_KEY')
os.environ['TAVILY_API_KEY'] = userdata.get('TAVILY_API_KEY')

# LangSmith를 활용한 RAG 애플리케이션 평가

In [None]:
!pip install langchain-core==0.2.30 langchain-openai==0.1.21 langchain-community==0.2.12 GitPython==3.1.43 langchain-chroma==0.1.2 chromadb==0.5.3 ragas==0.1.4 nest-asyncio==1.6.0 pydantic==2.9.2 numpy==1.26.0

In [None]:
from langchain_community.document_loaders import GitLoader

def file_filter(file_path: str) -> bool:
    return file_path.endswith(".md")

loader = GitLoader(
    clone_url="https://github.com/langchain-ai/langchain",
    repo_path="./langchain",
    branch="master",
    file_filter=file_filter,
)

documents = loader.load()
print(f"로드된 문서 개수: {len(documents)}")

In [None]:
for document in documents:
  document.metadata["filename"] = document.metadata["source"]

In [None]:
import nest_asyncio
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

nest_asyncio.apply()

generator = TestsetGenerator.from_langchain(
    generator_llm=ChatOpenAI(model="gpt-4o"),
    critic_llm=ChatOpenAI(model="gpt-4o"),
    embeddings=OpenAIEmbeddings(),
)

testset = generator.generate_with_langchain_docs(
    documents,
    test_size=4,
    distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25},
)

In [None]:
testset.to_pandas()

In [None]:
from langsmith import Client

dataset_name = "agent-book"
client = Client()

if client.has_dataset(dataset_name=dataset_name):
  client.delete_dataset(dataset_name=dataset_name)

dataset = client.create_dataset(dataset_name=dataset_name)

In [None]:
from datasets.utils import metadata
inputs = []
outputs = []
metadatas = []

for testset_record in testset.test_data:
  inputs.append({
      "question": testset_record.question,
  })
  outputs.append({
      "contexts": testset_record.contexts,
      "ground_truth": testset_record.ground_truth,
  })
  metadatas.append({
      "evolution_type": testset_record.evolution_type,
  })

In [None]:
client.create_examples(
    dataset_id=dataset.id,
    inputs=inputs,
    outputs=outputs,
    metadatas=metadatas,
)

In [None]:
from typing import Any
from langchain_core.embeddings import Embeddings
from langchain_core.language_models import BaseChatModel
from langsmith.schemas import Example, Run
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.llms import LangchainLLMWrapper
from ragas.metrics.base import Metric, MetricWithEmbeddings, MetricWithLLM

class RagasMetricEvaluator:
  def __init__(self, metric: Metric, llm: BaseChatModel, embeddings: Embeddings):
    self.metric = metric

    # LLM과 Embeddings를 Metric에 설정
    if isinstance(self.metric, MetricWithLLM):
      self.metric.llm = LangchainLLMWrapper(llm)
    if isinstance(self.metric, MetricWithEmbeddings):
      self.metric.embeddings = LangchainEmbeddingsWrapper(embeddings)

  def evaluate(self, example: Example, run: Run) -> dict[str, Any]:
    context_strs = [doc.page_content for doc in run.outputs["contexts"]]

    # Ragas의 평가 메트릭의 score 메서드로 점수 계산
    score = self.metric.score(
        {
            "question": example.inputs["question"],
            "answer": run.outputs["answer"],
            "contexts": context_strs,
            "ground_truth": example.outputs["ground_truth"],
        },
    )

    return {"key": self.metric.name, "score": score}


In [None]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from ragas.metrics import answer_relevancy, context_precision

metrics = [context_precision, answer_relevancy]
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

evaluators = [
    RagasMetricEvaluator(metric, llm, embeddings).evaluate
    for metric in metrics
]

In [None]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
db = Chroma.from_documents(documents, embeddings)

In [None]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
db = Chroma.from_documents(documents, embeddings)

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_openai import ChatOpenAI

prompt = ChatPromptTemplate.from_template('''
다음 문맥만을 고려해 질문에 대답하세요.

문맥: """{context}"""
질문: {question}
''')

model = ChatOpenAI(model="gpt-4o-mini",temperature=0)
retriever = db.as_retriever()

chain = RunnableParallel(
    {
        "question": RunnablePassthrough(),
        "context": retriever,
    }
).assign(
    answer=prompt | model | StrOutputParser()
)

In [None]:
def predict(inputs: dict[str, Any]) -> dict[str, Any]:
  question = inputs["question"]
  output = chain.invoke(question)
  return {
      "contexts": output["context"],
      "answer": output["answer"],
  }

In [None]:
from langsmith.evaluation import evaluate

evaluate(
    predict,
    data="agent-book",
    evaluators=evaluators,
)

In [None]:
from uuid import UUID
import ipywidgets as widgets
from IPython.display import display
from langsmith import Client

def display_feedback_buttons(run_id: UUID) -> None:
  # Good 버튼과 Bad 버튼 준비
  good_button = widgets.Button(description="Good",button_style="success",icon="thumbs-up")
  bad_button = widgets.Button(description="Bad",button_style="danger",icon="thumbs-down")

  # 클릭됐을 때 실행되는 함수 정의
  def on_button_clicked(button: widgets.Button) -> None:
    if button == good_button:
      score = 1
    elif button == bad_button:
      score = 0
    else:
      raise ValueError(f"Invalid button: {button}")

    client = Client()
    client.create_feedback(run_id=run_id, key="thumbs", score=score)
    print("피드백을 전송 했습니다.")

  good_button.on_click(on_button_clicked)
  bad_button.on_click(on_button_clicked)

  display(good_button, bad_button)

In [None]:
from langchain_core.tracers.context import collect_runs

# LangSmith의 트레이스 ID(Run ID)를 얻기 위해 collect_runs 함수 사용
with collect_runs() as runs_cb:
  output = chain.invoke("LangChain의 개요를 알려줘")
  print(output["answer"])
  run_id = runs_cb.traced_runs[0].id

display_feedback_buttons(run_id)

# Advanced RAG

In [None]:
!pip install langchain-core==0.3.0 langchain-openai==0.2.0 langchain-community==0.3.0 GitPython==3.1.43 langchain-chroma==0.1.4 tavily-python==0.5.0

In [None]:
from langchain_community.document_loaders import GitLoader

def file_filter(file_path: str) -> bool:
    return file_path.endswith(".md")

loader = GitLoader(
    clone_url="https://github.com/langchain-ai/langchain",
    repo_path="./langchain",
    branch="master",
    file_filter=file_filter,
)
documents = loader.load()
print(f"로드된 문서 개수: {len(documents)}")

In [None]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
db = Chroma.from_documents(documents, embeddings)

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI

prompt = ChatPromptTemplate.from_template('''
다음 문맥만을 고려해 질문에 대답하세요.

문맥: """
{context}
"""

질문: {question}
'''
)

model = ChatOpenAI(model="gpt-4o-mini",temperature=0)
retriever = db.as_retriever()

chain = {
    "question": RunnablePassthrough(),
    "context": retriever,
} | prompt | model | StrOutputParser()

chain.invoke("LangChain의 개요를 알려줘")

In [None]:
from pydantic import BaseModel, Field

class QueryGenerationOutput(BaseModel):
    queries: list[str] = Field(..., description="검색 쿼리 목록")

query_generation_prompt = ChatPromptTemplate.from_template("""
질문에 대해 벡터 데이터베이스에서 관련 문서를 검색하기 위한
3개의 서로 다른 검색 쿼리를 생성하세요.
거리 기반 유사성 검색의 한계를 극복하기 위해
사용자의 질문에 대해 여러 관점을 제공하는 것이 목표입니다.

질문: {question}
""")

query_generation_chain = (
    query_generation_prompt
    | model.with_structured_output(QueryGenerationOutput)
    | (lambda x: x.queries)
)

multi_query_rag_chain = {
    "question": RunnablePassthrough(),
    "context": query_generation_chain | retriever.map(),
} | prompt | model | StrOutputParser()

multi_query_rag_chain.invoke("LangChain의 개요를 알려줘")

In [None]:
from langchain_core.documents import Document

def reciprocal_rank_fusion(
    retriever_outputs: list[list[Document]],
    k: int = 60,
) -> list[str]:
    # 각 문서의 콘텐츠(문자열)와 그 점수의 매핑을 저장하는 딕셔너리 준비
    content_score_mapping = {}

    # 검색 쿼리마다 반복
    for docs in retriever_outputs:
        # 검색 결과의 문서마다 반복
        for rank, doc in enumerate(docs):
            content = doc.page_content

            # 처음 등장한 콘텐츠인 경우 점수를 0으로 초기화
            if content not in content_score_mapping:
                content_score_mapping[content] = 0

            # (1 / (순위 + k)) 점수를 추가
            content_score_mapping[content] += 1

    # 점수가 큰 순서로 정렬
    ranked = sorted(content_score_mapping.items(),key=lambda x: x[1], reverse=True) # nopa
    return [content for content,_ in ranked]

rag_fusion_chain = {
    "question": RunnablePassthrough(),
    "context": query_generation_chain | retriever.map() | reciprocal_rank_fusion,
} | prompt | model | StrOutputParser()

rag_fusion_chain.invoke("Langchain의 개요를 알려줘")


In [None]:
!pip install langchain-cohere==0.3.0

In [None]:
from typing import Any
from langchain_cohere import CohereRerank
from langchain_core.documents import Document

def rerank(inp: dict[str, Any], top_n: int =3 ) -> list[Document]:
  question = inp["question"]
  documents = inp["documents"]

  cohere_reranker = CohereRerank(model="rerank-multilingual-v3.0",top_n=top_n)
  return cohere_reranker.compress_documents(documents=documents, query= question)

rerank_rag_chain = (
    {
        "question": RunnablePassthrough(),
        "documents": retriever,
    }
    | RunnablePassthrough.assign(context=rerank)
    | prompt | model | StrOutputParser()
)

rerank_rag_chain.invoke("Langchain의 개요를 알려줘")

In [None]:
from langchain_community.retrievers import TavilySearchAPIRetriever

langchain_document_retrierver = retriever.with_config({"run_name": "langchain_document_retrierver"})
web_retriever = TavilySearchAPIRetriever(k=3).with_config({"run_name": "web_retriever"})

In [None]:
from enum import Enum

class Route(str, Enum):
  langchain_document = "langchain_document"
  web = "web"

class RouteOutput(BaseModel):
  route: Route

route_prompt = ChatPromptTemplate.from_template("""
질문에 답변하기 위해 적절한 Retriever를 선택하세요.

질문: {question}
""")

route_chain = (
    route_prompt
    | model.with_structured_output(RouteOutput)
    | (lambda x: x.route)
)

In [None]:
def routed_retriever(inp: dict[str, Any]) -> list[Document]:
  question = inp["question"]
  route = inp["route"]

  if route == Route.langchain_document:
    return langchain_document_retrierver.invoke(question)
  elif route == Route.web:
    return web_retriever.invoke(question)

  raise ValueError(f"Invalid route: {route}")

route_rag_chain = (
    {
        "question": RunnablePassthrough(),
        "route": route_chain,
    } | RunnablePassthrough.assign(context=routed_retriever)
    | prompt | model | StrOutputParser()
)

In [None]:
route_rag_chain.invoke("Langchain의 개요를 알려줘")

In [None]:
route_rag_chain.invoke("오늘 날씨는 어때?")

In [None]:
!pip install rank-bm25==0.2.2

In [None]:
from langchain_community.retrievers import BM25Retriever

chroma_retriever = retriever.with_config({"run_name": "chroma_retriever"})
bm25_retriever = BM25Retriever.from_documents(documents).with_config({"run_name": "bm25_retriever"})

In [None]:
from langchain_core.runnables import RunnableParallel

hybrid_retriever = RunnableParallel(
    {"bm25_documents": bm25_retriever, "chroma_documents": chroma_retriever}
) | (lambda x: [x["bm25_documents"], x["chroma_documents"]]) | reciprocal_rank_fusion

In [None]:
hybrid_rag_chain = (
    {
        "question": RunnablePassthrough(),
        "context": hybrid_retriever,
    }
    | prompt | model | StrOutputParser()
)

In [None]:
hybrid_rag_chain.invoke("Langchain의 개요를 알려줘")

# 기초

In [None]:
!pip install langchain_openai

In [None]:
from langchain_openai import OpenAI

model = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0)
output = model.invoke("안녕하세요.")
print(output)

In [None]:
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain_openai import ChatOpenAI

model = ChatOpenAI(model="gpt-4o-mini", temperature=0)

messages = [
    SystemMessage("You are a helpful assistant."),
    HumanMessage("안녕하세요! 저는 존이라고 합니다."),
    AIMessage(content="안녕하세요, 존님! 어떤 도움이 필요하신각요?"),
    HumanMessage(content="제 이름을 아시나요?"),
]

ai_message = model.invoke(messages)
print(ai_message.content)

In [None]:
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_openai import ChatOpenAI

model = ChatOpenAI(model="gpt-4o-mini", temperature=0)

messages = [
    SystemMessage("You are a helpful assistant."),
    HumanMessage("안녕하세요!"),
]

for chunk in model.stream(messages):
  print(chunk.content, end="", flush=True)

In [None]:
from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate.from_template("""다음 요리의 레시피를 생각해 주세요.

요리명: {dish}""")

prompt_value = prompt.invoke({"dish": "김치볶음밥"})
print(prompt_value.text)

In [None]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "사용자가 입력한 요리의 레시피를 생각해 주세요."),
        ("human","{dish}"),
    ]
)

prompt_value = prompt.invoke({"dish":"카레"})
print(prompt_value)

In [None]:
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "사용자가 입력한 요리의 레시피를 생각해 주세요."),
        MessagesPlaceholder("chat_history",optional=True),
        ("human","{input}"),
    ]
)

prompt_value = prompt.invoke(
    {
        "chat_history": [
            HumanMessage(content="카레는 어떤 음식인가요?"),
            AIMessage("안녕하세요, 존님! 어떻게 도와드릴까요?"),
        ],
        "input":"제 이름은 아시나요?",
    }
)

print(prompt_value)