In [5]:
import getpass
import os
from langchain.chat_models import init_chat_model
from langchain_openai import OpenAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
from dotenv import load_dotenv
load_dotenv()

os.environ["LANGSMITH_TRACING"] = "true"

if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

if not os.environ.get("LANGSMITH_API_KEY"):
    os.environ["LANGSMITH_API_KEY"] = getpass.getpass("Enter API key for LangSmith")

llm = init_chat_model("gpt-4o-mini", model_provider='openai')

In [6]:
embeddings = OpenAIEmbeddings(model='text-embedding-3-large')
vector_store = InMemoryVectorStore(embeddings)

In [7]:
# Load and chunk contesnts of webpage

loader = WebBaseLoader(
    web_paths = ('https://lilianweng.github.io/posts/2023-06-23-agent/',),
    bs_kwargs = dict(
        parse_only=bs4.SoupStrainer(
            class_=('post-content', 'post-title', 'post-header')
        )
    )
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
all_splits = text_splitter.split_documents(docs)

# Index chunks
_ = vector_store.add_documents(documents=all_splits)

# Defining prompt for question-answering
prompt = hub.pull('rlm/rag-prompt')

# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state['question'])
    return {'context': retrieved_docs}

def generate(state: State):
    docs_content = '\n\n'.join(doc.page_content for doc in state['context'])
    messages = prompt.invoke({'question': state['question'], 'context': docs_content})
    response = llm.invoke(messages)
    return {'answer': response.content}


# Complile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, 'retrieve')
graph = graph_builder.compile()

In [8]:
response = graph.invoke({'question': "What is Task Decomposition?"})
print(response['answer'])

Task decomposition is the process of breaking down a complex task into smaller, manageable sub-tasks or steps. It can be achieved through techniques like simple prompting, task-specific instructions, or human inputs, and often involves methods such as Chain of Thought and Tree of Thoughts to enhance reasoning. This approach helps in organizing and simplifying the execution of complicated tasks.


In [9]:
bs4_strainer = bs4.SoupStrainer(class_=('post-title', 'post-header', 'post-content'))
loader = WebBaseLoader(
    web_paths=('https://lilianweng.github.io/posts/2023-06-23-agent/',),
    bs_kwargs={'parse_only': bs4_strainer},
)
docs = loader.load()

assert len(docs) == 1
print(f'Total characters: {len(docs[0].page_content)}')

Total characters: 43047


In [11]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200,
    add_start_index = True,
)
all_splits = text_splitter.split_documents(docs)

print(f'Split blog post into {len(all_splits)} sub-documents')

Split blog post into 63 sub-documents


In [13]:
document_ids = vector_store.add_documents(documents = all_splits)
print(document_ids[:3])

['347ff8a4-2e78-4e92-a44a-9c430e213631', '2ec4b770-65af-44cf-99ba-2230fdce5f25', 'fbdc641c-4573-4428-bc04-ead0c2c7d996']


In [14]:
prompt = hub.pull('rlm/rag-prompt')

example_messages = prompt.invoke(
    {'context': '(context goes here)', 'question': '(question goes here)'}
).to_messages()

assert len(example_messages) == 1
print(example_messages[0].content)

You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: (question goes here) 
Context: (context goes here) 
Answer:
