In [None]:
!pip install langchain langchain_community langchain_chroma

In [2]:
!pip install -qU langchain-openai

In [None]:
!pip install langchainhub

In [4]:
import getpass
import os

In [5]:
#os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["OPENAI_API_KEY"] = getpass.getpass()

 ········


In [6]:
os.environ['USER_AGENT'] = 'DefaultLangchainUserAgent'
# https://github.com/langchain-ai/rag-from-scratch/issues/24
# https://api.python.langchain.com/en/latest/_modules/langchain_community/utils/user_agent.html

In [7]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo-0125")

In [8]:
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [9]:
# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

In [10]:
# print(docs) # can use this command to see how the entire webpage has been condensed to a huge text block 

In [11]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

In [12]:
# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")

In [13]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [14]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


In [15]:
rag_chain.invoke("What is Task Decomposition?")

'Task Decomposition is a technique that breaks down complex tasks into smaller and simpler steps to enhance model performance. It involves transforming big tasks into multiple manageable tasks using methods like Chain of Thought or Tree of Thoughts. Task decomposition can be done through simple prompting, task-specific instructions, or with human inputs.'

In [16]:
rag_chain.invoke("What are types of memory?")

'The types of memory include Short-Term Memory (STM) or Working Memory, Long-Term Memory (LTM) with subtypes Explicit/Declarative and Implicit/Procedural memory, and Sensory Memory which retains sensory information briefly after stimuli end. Each type of memory serves different functions and durations in the human brain.'

In [17]:
rag_chain.invoke("Whats mips?")

'MIPS stands for Maximum Inner Product Search, which involves saving embedding representations of information into a vector store database to support fast maximum inner-product search. Commonly used with approximate nearest neighbors algorithms to optimize retrieval speed. It is a tool that can extend the capabilities of models by alleviating the restriction of finite attention span.'

In [22]:
rag_chain.invoke("what's FAISS")

'FAISS is a similarity search system developed by Facebook AI. It applies vector quantization to partition the vector space into clusters for efficient search. The search process involves looking for cluster candidates with coarse quantization followed by finer quantization within clusters.'

In [19]:
rag_chain.invoke("what is chain of hindsight?")

'Chain of Hindsight (CoH) is a framework that presents a model with a sequence of past outputs, each annotated with feedback, to improve its own outputs over time. The model is fine-tuned to predict the final output based on a sequence of feedback tuples ranked by reward. CoH aims to train the model to self-reflect and produce better outputs by learning from a history of sequentially improved outputs.'