In [118]:
import os
import bs4
from langchain_anthropic import ChatAnthropic
from langchain.text_splitter import RecursiveCharacterTextSplitter
from dotenv import load_dotenv
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
load_dotenv()
langchain_api = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'

In [60]:
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(parse_only =bs4.SoupStrainer( class_=("post-content", "post-title", "post-header" )))
)

In [61]:
docs= loader.load()

In [77]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

In [89]:
out = vectorstore._collection.get(include=["documents", "metadatas"])
print(out.keys())

for i in range(min(5,len(out['ids']))):
    print("ID:", out["ids"][i])
    print("Text:", out["documents"][i][:200])
    print("Meta:", out["metadatas"][i])
    print("="*50)

dict_keys(['ids', 'embeddings', 'documents', 'uris', 'included', 'data', 'metadatas'])
ID: 1af38bdd-bed6-4f35-83a1-404056b542cf
Text: LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


Building agents with LLM (large language model) as its core controller is a cool con
Meta: {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}
ID: c571a2ee-dc45-459e-8434-fe4fd265e369
Text: Memory

Short-term memory: I would consider all the in-context learning (See Prompt Engineering) as utilizing short-term memory of the model to learn.
Long-term memory: This provides the agent with th
Meta: {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}
ID: 4548f622-1661-4f88-9dec-40e1ebc846ea
Text: Component One: Planning#
A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.
Task Decomposition#
Chain of thought (CoT; Wei et al. 2022) has become a s
Meta: {'source': 'https

In [69]:
vectorstore._collection.count()

63

In [94]:
retriever = vectorstore.as_retriever()
prompt = "you are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"
print(prompt)

you are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:


In [127]:
llm = ChatOpenAI(model_name = 'gpt-3.5-turbo',temperature=0)
def format_docs(docs):
    return '\n\n'.join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition?")

"Task decomposition is a technique used to break down complex tasks into smaller and simpler steps. It involves transforming big tasks into multiple manageable tasks to enhance model performance. Different methods like Chain of Thought and Tree of Thoughts are used to decompose tasks and improve understanding of the model's thinking process."