In [1]:
from dotenv import load_dotenv
import os

load_dotenv()
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANG_SMITH")
os.environ["ANTHROPIC_API_KEY"] = os.getenv("ANTHROPIC_API_KEY")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")

In [2]:
from langchain_anthropic import ChatAnthropic

llm = ChatAnthropic(model="claude-3-sonnet-20240229")

In [6]:
# 알아서 크롤링

import bs4
from langchain_community.document_loaders import WebBaseLoader

bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content"))
loader = WebBaseLoader(
    web_path=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs={"parse_only": bs4_strainer}
)
docs = loader.load()

In [12]:
# document 청크하기

from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index = True
)
all_splits = text_splitter.split_documents(docs)

In [24]:
# 지식베이스 저장소 생성

from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

vectorstore = Chroma.from_documents(documents=all_splits, 
                                    embedding=OpenAIEmbeddings())

In [25]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":6})
retriever_docs = retriever.invoke("What are the approaches to Task Decomposition?")

In [28]:
print(retriever_docs[0].page_content)

Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.
Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.


In [31]:
# RAG용 프롬프트 템플릿 작성
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

example_messages = prompt.invoke(
    {"context": "filler context", "question": "filler question"}
).to_messages()

example_messages

[HumanMessage(content="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: filler question \nContext: filler context \nAnswer:")]

In [44]:
# LCEL로 실행 런너블하게 만들기


from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context":retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    # | StrOutputParser()
)

for chunk in rag_chain.stream("What is Task Decomposition?"):
    print(chunk, end="", flush=True)
    print(chunk)

content='' id='run-9f016ad6-5479-4485-98b1-f499c14eeba5' usage_metadata={'input_tokens': 983, 'output_tokens': 0, 'total_tokens': 983}content='' id='run-9f016ad6-5479-4485-98b1-f499c14eeba5' usage_metadata={'input_tokens': 983, 'output_tokens': 0, 'total_tokens': 983}
content='Task Decomposition' id='run-9f016ad6-5479-4485-98b1-f499c14eeba5'content='Task Decomposition' id='run-9f016ad6-5479-4485-98b1-f499c14eeba5'
content=' is the process of' id='run-9f016ad6-5479-4485-98b1-f499c14eeba5'content=' is the process of' id='run-9f016ad6-5479-4485-98b1-f499c14eeba5'
content=' breaking down a complex' id='run-9f016ad6-5479-4485-98b1-f499c14eeba5'content=' breaking down a complex' id='run-9f016ad6-5479-4485-98b1-f499c14eeba5'
content=' task into smaller,' id='run-9f016ad6-5479-4485-98b1-f499c14eeba5'content=' task into smaller,' id='run-9f016ad6-5479-4485-98b1-f499c14eeba5'
content=' more' id='run-9f016ad6-5479-4485-98b1-f499c14eeba5'content=' more' id='run-9f016ad6-5479-4485-98b1-f499c14eeba5

''