# RAG From Scratch 

### Environment

(1) Package

In [None]:
!pip install -q langchain_community tiktoken langchain-openai langchainhub chromadb langchain

: 

(2) LangSmith 

### Step 1: Overview

In [36]:
import bs4
from langchain import hub 
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from dotenv import load_dotenv

path = ".env"
load_dotenv(dotenv_path=path)
  
  

True

#### INDEXING 

In [37]:
# Load Document 
loader = WebBaseLoader(
    web_path=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=('post-content', 'post-title','post-header')
        )
    ),
)

doc = loader.load()

# Split 
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap=200)
split = text_splitter.split_documents(doc)


# Embed
vectorstore = Chroma.from_documents(documents=split, embedding= OpenAIEmbeddings())

# Retriever

retriever = vectorstore.as_retriever()

#### RETRIEVER & GENERATION ####

# Prompt

prompt = hub.pull('rlm/rag-prompt')

# LLM
llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=0)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain 
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm 
    | StrOutputParser() 
) 

# Question

rag_chain.invoke("What is Task Decomposition?")  



"Task decomposition is a technique used to break down complex tasks into smaller and simpler steps. It involves transforming big tasks into multiple manageable tasks to enhance model performance. Different methods like Chain of Thought and Tree of Thoughts are used to decompose tasks and improve understanding of the model's thinking process."

### Part 2: Indexing

In [23]:
# Document
question = "What kind of pet do I like?"
document = "My favorite pet is Penguin."

In [24]:
import tiktoken

def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """ Returns the number of tokens in a text string. """
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens


num_tokens_from_string(question, "cl100k_base") 

8

In [25]:
embedding = OpenAIEmbeddings()
query_result = embedding.embed_query(question)
document_result = embedding.embed_query(document)
len(query_result)

1536

In [26]:
print(f'query_result: {type(query_result)} ')

query_result: <class 'list'> 


cosine similarity is recommended for Open AI Embeddings

In [27]:
import numpy as np 

def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1,vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    
    return dot_product/(norm_vec1 * norm_vec2)

similarity = cosine_similarity(query_result, document_result)
print(similarity)    

0.8773972981178011


In [28]:
# Load Document
loader = WebBaseLoader(
    web_path=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=('post-content', 'post-title','post-header')
        )
    ),
)

doc = loader.load()


In [29]:
# Split Document
# Split 
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 1000, chunk_overlap=200)
split = text_splitter.split_documents(doc)

In [30]:

# Embed
vectorstore = Chroma.from_documents(documents=split, embedding= OpenAIEmbeddings())

# Retriever

retriever = vectorstore.as_retriever(search_kwargs={"k":1})

### Part 3: Retrieval 

In [31]:
docs = retriever.get_relevant_documents("What is Task Decomposition?")

In [32]:
len(docs)

1

In [33]:
print(docs)

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Component One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a

### Part 4: Generation

In [38]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

# Prompt
template = """Answer the question based on the following context.
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based on the following context.\n{context}\n\nQuestion: {question}\n'), additional_kwargs={})])

In [39]:
# LLM
llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=0)


In [40]:
# Chain
chain = prompt | llm 

In [None]:
# Run

result = chain.invoke({"context": docs, "question":"What is Task Decomposition"})
print(result)

content='Task decomposition is a technique used by agents to break down complex tasks into smaller and simpler steps. This allows the agent to plan ahead and tackle the task more effectively.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 33, 'prompt_tokens': 248, 'total_tokens': 281, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-BmQrTyEcsoYSVSUiMeZiuwIMUzOpD', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None} id='run--9fd2074f-5d38-4097-9b2b-13625281be2b-0' usage_metadata={'input_tokens': 248, 'output_tokens': 33, 'total_tokens': 281, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}


In [42]:
from langchain import hub  

prompt_hub_rag = hub.pull("rlm/rag-prompt")



In [43]:
prompt_hub_rag

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

In [44]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is task decomposition?")

'Task decomposition is the process of breaking down a complex task into smaller and simpler steps to make it more manageable. This can be done through techniques like Chain of Thought (CoT) or Tree of Thoughts, which involve exploring multiple reasoning possibilities at each step. Additionally, task decomposition can also involve using simple prompting, task-specific instructions, or relying on an external classical planner for long-horizon planning.'