In [1]:
# Let's go about building RAG "from scratch"

In [2]:
from dotenv import load_dotenv
load_dotenv() # load environment variables

True

### Part 1: Indexing

In [3]:
# Embed a question (query) and 2 documents using an embedding model

from langchain_ollama import OllamaEmbeddings

question = "What kinds of pets do I like?"
document1 = "My favorite pet is a cat."
document2 = "I went to Lake Tahoe last weekend."

embed_model = OllamaEmbeddings(model="nomic-embed-text")
question_embed = embed_model.embed_query(question)
document1_embed = embed_model.embed_query(document1)
document2_embed = embed_model.embed_query(document2)

print(len(question_embed), len(document1_embed), len(document2_embed))

768 768 768


In [4]:
# Use cosine similarity to test how related each document is to the query.

import numpy as np

def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product / (norm_vec1 * norm_vec2)

similarity1 = cosine_similarity(question_embed, document1_embed)
similarity2 = cosine_similarity(question_embed, document2_embed)

print("Similarity to Document 1:", similarity1)  # ~0.749: high similarity
print("Similarity to Document 2:", similarity2)  # ~0.445: lower similarity

Similarity to Document 1: 0.7487750832631482
Similarity to Document 2: 0.44519811332617293


In [5]:
# Now that we have the general idea down, let's ingest documents from the internet. We will:
#   - fetch a webpage's content (specifically, a blog post about "LLM Powered Autonomous Agents")
#   - split the content into chunks; these will be our documents
#   - embed these document and store them in a vector database (ChromaDB)

import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma

# Load blog
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=300, chunk_overlap=50) # uses tiktoken before splitting
splits = text_splitter.split_documents(blog_docs)

# Index
vectorstore = Chroma.from_documents(documents=splits, embedding=OllamaEmbeddings(model="nomic-embed-text"))
retriever = vectorstore.as_retriever()

print(len(splits))  # 50 documents in our vector database

50


### Step 2: Retrieval

In [6]:
# Let's retrieve relevant documents based on our query

docs = retriever.invoke("What is Task Decomposition?")
len(docs)

4

In [7]:
# We can use these documents to form a context to pass alongside our query

context = ""
for doc in docs:
    context += doc.page_content + "\n\n"

print(context)

Component One: Planning#
A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.
Task Decomposition#
Chain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.
Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.
Task decomposition can be done (1) by LLM with simple prompt

### Step 3: Generation

In [8]:
# Now, we create a prompt that takes both our retrieved context and user question

from langchain_ollama import OllamaLLM
from langchain_core.prompts import PromptTemplate

prompt_template = PromptTemplate.from_template('''
    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
    Context: {context}
    Question: {question}
    Answer:
''')
prompt = prompt_template.invoke({"context": context, "question": "What is Task Decomposition?"})

print(prompt)

text='\n    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don\'t know the answer, just say that you don\'t know. Use three sentences maximum and keep the answer concise.\n    Context: Component One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates m

In [9]:
# Finally, we pass this prompt to the model. Congratulations! That's the basics of RAG.

llm = OllamaLLM(model="llama3.2:1b")
llm.invoke(prompt)

"Task decomposition refers to breaking down complex tasks into smaller and simpler steps for model training or improvement. It involves decomposing hard tasks into manageable parts using techniques such as Chain of Thought (CoT), Tree of Thoughts, Algorithm Distillation (AD), and HuggingGPT's Task Planning stages. The goal is to make the task more interpretable and easier to understand."