# Langchain practise


## SETUP


In [1]:
from langchain.chat_models import ChatOpenAI
from dotenv import load_dotenv, find_dotenv
import os

In [2]:
# load the apikeys
_ = load_dotenv(find_dotenv())

# openai

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# langchain
# os.environ["LANGCHAIN_TRACING_V2"] = "true"
# os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")

# -- PART 1 - 4 (Basic RAG)


## Part 1: Overview


In [29]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI

### Indexing the documents


In [30]:
# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

In [31]:
# split the document
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [32]:
# embed the splits of documents
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=OpenAIEmbeddings(
        openai_api_key=OPENAI_API_KEY),  # openai embeddings
)

In [33]:
# define the document retriever
retriever = vectorstore.as_retriever()

In [34]:
retriever.get_relevant_documents

<bound method BaseRetriever.get_relevant_documents of VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x14817b5d0>)>

### Retrieval and Generation


In [35]:
# get prompt
prompt = hub.pull("rlm/rag-prompt")
prompt

# question -> query by user
# context -> retreived documents

ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

In [36]:
# define the llm
llm = ChatOpenAI(
    openai_api_key=OPENAI_API_KEY, model_name="gpt-3.5-turbo", temperature=0
)

In [37]:
# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [38]:
# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Question
rag_chain.invoke("What is Task Decomposition?")

'Task Decomposition is a technique used to break down complex tasks into smaller and simpler steps. This approach helps agents to plan and execute tasks more efficiently by dividing them into manageable components. Task decomposition can be achieved through various methods such as prompting with specific instructions or utilizing human inputs.'

## Part 2: Indexing


In [39]:
# Documents
question = "What kinds of pets do I like?"
document = "My favorite pet is a cat."

In [40]:
# count number of tokens per model
import tiktoken


def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens


num_tokens_from_string(question, "cl100k_base")

8

In [41]:
# openai embedding model
from langchain_openai import OpenAIEmbeddings

embedding = OpenAIEmbeddings(api_key=OPENAI_API_KEY)
query_result = embedding.embed_query(question)  # question from user
document_result = embedding.embed_query(document)  # ref document / split(s)

len(query_result)  # show the embedding size of the query per model

1536

In [42]:
# find how similar the query to a document

import numpy as np


def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)

    # returns the number between 0 to 1 to show how similar
    return dot_product / (norm_vec1 * norm_vec2)


similarity = cosine_similarity(query_result, document_result)
print("Cosine Similarity:", similarity)  # closer to 1 is very similar

Cosine Similarity: 0.8807044730847651


In [43]:
# document loaders
# Load blog
import bs4
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

In [44]:
# split document into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=50
)

# make splits
splits = text_splitter.split_documents(blog_docs)

In [45]:
# get chunk embeddings and store them into vector store
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

vectorstore = Chroma.from_documents(
    documents=splits, embedding=OpenAIEmbeddings(api_key=OPENAI_API_KEY)
)

retriever = vectorstore.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x148162b50>)

## Part 3: Retrieval


In [47]:
# index
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

vectorstore = Chroma.from_documents(
    documents=splits, embedding=OpenAIEmbeddings(api_key=OPENAI_API_KEY)
)

retriever = vectorstore.as_retriever(search_kwargs={"k": 1})
retriever

VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x148171a50>, search_kwargs={'k': 1})

In [48]:
docs = retriever.get_relevant_documents("What is Task Decomposition?")
docs

[Document(page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'})]

In [49]:
len(docs)

1

## Part 4:


In [50]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

# define the prompt template
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template='Answer the question based only on the following context:\n{context}\n\nQuestion: {question}\n'))])

In [51]:
# define LLM
llm = ChatOpenAI(api_key=OPENAI_API_KEY, temperature=0, model_name="gpt-3.5-turbo")

In [52]:
# chain
chain = prompt | llm

In [53]:
# Run
chain.invoke({"context": docs, "question": "What is Task Decomposition?"})

AIMessage(content='Task Decomposition is a technique that involves breaking down a complex task into smaller and simpler steps in order to make it more manageable for an autonomous agent.')

In [54]:
from langchain import hub

prompt_hub_rag = hub.pull("rlm/rag-prompt")

In [55]:
prompt_hub_rag

ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

In [56]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition?")

"Task Decomposition is a technique that involves breaking down complex tasks into smaller and simpler steps in order to enhance model performance on those tasks. This technique transforms big tasks into multiple manageable tasks, allowing the model to utilize more test-time computation and shed light on the interpretation of the model's thinking process."

## Part 5: Multi Query


### Indexing - get retrievers


In [4]:
# load blog
import bs4
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=(
            bs4.SoupStrainer(class_=("post-content", "post-title", "post-header"))
        )
    ),
)

blog_docs = loader.load()
# blog_docs[:1]

In [6]:
# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, chunk_overlap=50
)

In [9]:
# make splits
splits = text_splitter.split_documents(blog_docs)
# splits[:2]

In [21]:
# OPENAI_API_KEY

In [20]:
# index / embeddings into vector store
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

vectorstore = Chroma.from_documents(
    documents=splits, embedding=OpenAIEmbeddings(api_key=OPENAI_API_KEY)
)

retriever = vectorstore.as_retriever()

### define prompt


In [23]:
from langchain.prompts import ChatPromptTemplate

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five different versions of the given user question to retrieve relavant documents from a vector database. By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based similarity search. Provide these alternative Questions separated by newlines.

Original Question: {question}"""

prompt_perspectives = ChatPromptTemplate.from_template(template)

prompt_perspectives

ChatPromptTemplate(input_variables=['question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='You are an AI language model assistant. Your task is to generate five different versions of the given user question to retrieve relavant documents from a vector database. By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based similarity search. Provide these alternative Questions separated by newlines.\n\nOriginal Question: {question}'))])

In [27]:
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_queries = (
    prompt_perspectives
    | ChatOpenAI(temperature=0, api_key=OPENAI_API_KEY)  # llm
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

generate_queries

ChatPromptTemplate(input_variables=['question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='You are an AI language model assistant. Your task is to generate five different versions of the given user question to retrieve relavant documents from a vector database. By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based similarity search. Provide these alternative Questions separated by newlines.\n\nOriginal Question: {question}'))])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x28df7d010>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x28df7fe90>, temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy='')
| StrOutputParser()
| RunnableLambda(...)

In [28]:
from langchain.load import dumps, loads


def get_unique_union(documents: list[list]):
    """Unique union of retrieved docs"""
    # Flatten list of lists, and convert each document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # get unique documents
    unique_docs = list(set(flattened_docs))
    # return
    return [loads(doc) for doc in unique_docs]


# Retrieve
question = "What is task decomposition for LLM agents?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question": question})

len(docs)

5

In [30]:
from operator import itemgetter
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough

In [33]:
# RAG to answer the question
template = """Answer the following question based on this context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
llm = ChatOpenAI(api_key=OPENAI_API_KEY)

In [36]:
# final rag chain
final_rag_chain = (
    {"context": retrieval_chain, "question": itemgetter("question")}
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain

{
  context: ChatPromptTemplate(input_variables=['question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='You are an AI language model assistant. Your task is to generate five different versions of the given user question to retrieve relavant documents from a vector database. By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based similarity search. Provide these alternative Questions separated by newlines.\n\nOriginal Question: {question}'))])
           | ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x28df7d010>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x28df7fe90>, temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy='')
           | StrOutputParser()
           | RunnableLambda(...)
           | RunnableEach(bound=VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbe

In [37]:
final_rag_chain.invoke({"question": question})

'Task decomposition for LLM agents involves breaking down large tasks into smaller, manageable subgoals. This allows the agent to efficiently handle complex tasks by dividing them into more manageable steps. The agent can use techniques such as Chain of Thought and Tree of Thoughts to decompose tasks into multiple manageable tasks and explore multiple reasoning possibilities at each step. Task decomposition can be achieved through simple prompting by the LLM, task-specific instructions, or with human inputs.'

In [38]:
question

'What is task decomposition for LLM agents?'

# -- PART 5 - 9 (Query Transformation)


## Part 6: RAG-Fusion


In [39]:
from langchain.prompts import ChatPromptTemplate

# RAG-Fusion: Related
template = """You are a helpful assistant that generates multiple search queries based on a single input query.\n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""

prompt_rag_fusion = ChatPromptTemplate.from_template(template)
prompt_rag_fusion

ChatPromptTemplate(input_variables=['question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='You are a helpful assistant that generates multiple search queries based on a single input query.\n \nGenerate multiple search queries related to: {question} \n\nOutput (4 queries):'))])

In [42]:
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_queries = (
    prompt_rag_fusion
    | ChatOpenAI(temperature=0, api_key=OPENAI_API_KEY)
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)  # chain

generate_queries

ChatPromptTemplate(input_variables=['question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='You are a helpful assistant that generates multiple search queries based on a single input query.\n \nGenerate multiple search queries related to: {question} \n\nOutput (4 queries):'))])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x28e496890>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x28fafac50>, temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy='')
| StrOutputParser()
| RunnableLambda(...)

In [43]:
from langchain.load import dumps, loads


def reciprocal_rank_fusion(results: list[list], k=60):
    """Reciprocal_rank_fusion that takes multiple lists of ranked documents
    and an optional parameter k used in the RRF formula"""

    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results


retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion
docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)

7

In [46]:
from langchain_core.runnables import RunnablePassthrough

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain_rag_fusion, "question": itemgetter("question")}
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question": question})

"Task decomposition for LLM agents involves breaking down large tasks into smaller, manageable subgoals. This enables the agent to efficiently handle complex tasks by transforming big tasks into multiple manageable tasks and shedding light on the interpretation of the model's thinking process. Task decomposition can be done using simple prompting techniques, task-specific instructions, or with human inputs."

## Part 7: Decomposition (sub-questions)


In [47]:
from langchain.prompts import ChatPromptTemplate

# Decomposition
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Generate multiple search queries related to: {question} \n
Output (3 queries):"""

prompt_decomposition = ChatPromptTemplate.from_template(template)

In [49]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

# LLM
llm = ChatOpenAI(temperature=0, api_key=OPENAI_API_KEY)

In [51]:
# create chain
generate_queries_decomposition = (
    prompt_decomposition | llm | StrOutputParser() | (lambda x: x.split("\n"))
)

In [53]:
# create question
question = "What are the main components of an LLM-powered autonomous agent system?"

# invoke chain
questions = generate_queries_decomposition.invoke({"question": question})

In [54]:
questions

['1. What is LLM technology and how does it work in autonomous agent systems?',
 '2. What are the specific components that make up an LLM-powered autonomous agent system?',
 '3. How do the main components of an LLM-powered autonomous agent system interact with each other to enable autonomous behavior?']

### Answer recursively


In [65]:
# promp-
template = """Here is the question you need to answser:

\n --- \n {question} \n --- \n

Here is any available background question + answer pairs:

\n --- \n {q_a_pairs} \n --- \n

Here is additional context relevant to the question:

\n --- \n {context} \n --- \n

Use the above context and any background question + answer pairs to answer the question: \n {question}
"""

decomposition_prompt = ChatPromptTemplate.from_template(template)

In [66]:
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser


def format_qa_pair(question, answer):
    """Format Q and A pair"""

    formatted_string = ""
    formatted_string += f"Question: {question}\nAnswer: {answer}\n\n"
    return formatted_string.strip()

In [62]:
llm = ChatOpenAI(temperature=0, api_key=OPENAI_API_KEY)
llm

ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x171ce5dd0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x171ce82d0>, temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy='')

In [67]:
q_a_pairs = ""
for q in questions:
    rag_chain = (
        {
            "context": itemgetter("question") | retriever,
            "question": itemgetter("question"),
            "q_a_pairs": itemgetter("q_a_pairs"),
        }
        | decomposition_prompt
        | llm
        | StrOutputParser()
    )

    answer = rag_chain.invoke({"question": q, "q_a_pairs": q_a_pairs})
    q_a_pair = format_qa_pair(q, answer)
    q_a_pairs = q_a_pairs + "\n---\n" + q_a_pair

In [68]:
answer

"The main components of an LLM-powered autonomous agent system interact with each other in a cohesive manner to enable autonomous behavior. \n\n1. Planning: The planning component involves task decomposition, where the agent breaks down complex tasks into smaller, manageable subgoals. This enables efficient handling of tasks and sets the stage for further interactions with other components.\n\n2. External Tools Integration: LLM-powered autonomous agents can interact with external tools like classical planners for long-horizon planning. By translating the problem into natural language, utilizing Planning Domain Definition Language (PDDL) for planning, and translating plans back into natural language, the agent can leverage external tools to enhance its problem-solving capabilities.\n\n3. Self-Reflection: Self-reflection is a crucial aspect that allows autonomous agents to improve iteratively by refining past action decisions and correcting mistakes. Systems like ReAct integrate reasonin

### answer individually


In [69]:
from langchain import hub
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

In [70]:
# RAG prompt
prompt_rag = hub.pull("rlm/rag-prompt")
prompt_rag

ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

In [76]:
def retrieve_and_rag(question, prompt_rag, sub_question_generator_chain):
    """RAG on each sub-question"""

    # user our decomposition/
    sub_questions = sub_question_generator_chain.invoke({"question": question})

    # initialize a list to hold RAG chain results
    rag_results = []

    for sub_question in sub_questions:
        # retrieve documents for each sub-question
        retrieved_docs = retriever.get_relevant_documents(sub_question)

        # use retrieved documents and sub-question in RAG chain
        answer = (prompt_rag | llm | StrOutputParser()).invoke(
            {"context": retrieved_docs, "question": sub_question}
        )
        rag_results.append(answer)

    return rag_results, sub_questions

In [77]:
# Wrap the retrieval and RAG process in a RunnableLambda for integration into a chain
answers, questions = retrieve_and_rag(
    question, prompt_rag, generate_queries_decomposition
)

In [78]:
def format_qa_pairs(questions, answers):
    """Format Q and A pairs"""

    formatted_string = ""
    for i, (question, answer) in enumerate(zip(questions, answers), start=1):
        formatted_string += f"Question {i}: {
            question}\nAnswer {i}: {answer}\n\n"
    return formatted_string.strip()


context = format_qa_pairs(questions, answers)

# Prompt
template = """Here is a set of Q+A pairs:

{context}

Use these to synthesize an answer to the question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = prompt | llm | StrOutputParser()

final_rag_chain.invoke({"context": context, "question": question})

'The main components of an LLM-powered autonomous agent system include LLM technology as the core controller, planning for task decomposition, subgoal and reflection for iterative improvement, and task-specific actions for interacting with the environment. These components work together to enable autonomous functionality by utilizing planning for breaking down tasks, self-reflection for refining past actions, and integrating reasoning and acting within LLM through tools like ReAct.'

## Part 8: Step Back


In [87]:
# Few Shot examples
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate

In [80]:
examples = [
    {
        "input": "Could the members of the Police perform lawful arrests?",
        "output": "what can the members of the Police do?",
    },
    {
        "input": "Jan Sindel's was born in what counry?",
        "output": "What is Jan Sindel's personal history?",
    },
]
examples

[{'input': 'Could the members of the Police perform lawful arrests?',
  'output': 'what can the members of the Police do?'},
 {'input': "Jan Sindel's was born in what counry?",
  'output': "What is Jan Sindel's personal history?"}]

In [84]:
# we now transform these to examples messages
example_prompt = ChatPromptTemplate.from_messages(
    [("human", "{input}"), ("ai", "{output}")]
)
example_prompt

ChatPromptTemplate(input_variables=['input', 'output'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')), AIMessagePromptTemplate(prompt=PromptTemplate(input_variables=['output'], template='{output}'))])

In [97]:
few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt, examples=examples
)
few_shot_prompt

FewShotChatMessagePromptTemplate(examples=[{'input': 'Could the members of the Police perform lawful arrests?', 'output': 'what can the members of the Police do?'}, {'input': "Jan Sindel's was born in what counry?", 'output': "What is Jan Sindel's personal history?"}], example_prompt=ChatPromptTemplate(input_variables=['input', 'output'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')), AIMessagePromptTemplate(prompt=PromptTemplate(input_variables=['output'], template='{output}'))]))

In [100]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer. Here are a few examples:""",
        ),
        # Few shot examples
        few_shot_prompt,
        # New question
        ("user", "{question}"),
    ]
)

In [101]:
generate_queries_step_back = (
    prompt | ChatOpenAI(temperature=0, api_key=OPENAI_API_KEY) | StrOutputParser()
)

question = "What is task decomposition for LLM agents?"
generate_queries_step_back.invoke({"question": question})

'How can tasks be broken down for LLM agents?'

In [103]:
# response prompt
response_prompt_template = """You are an expert of world knowledge. I am going to ask you a question. Your response should be comprehensive and not contradicted with the following context if they are relevant. Otherwise, ignore them if they are not relevant.

# {normal_context}
# {step_back_context}

# Original Question: {question}
# Answer:"""

response_prompt = ChatPromptTemplate.from_template(response_prompt_template)
response_prompt

ChatPromptTemplate(input_variables=['normal_context', 'question', 'step_back_context'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['normal_context', 'question', 'step_back_context'], template='You are an expert of world knowledge. I am going to ask you a question. Your response should be comprehensive and not contradicted with the following context if they are relevant. Otherwise, ignore them if they are not relevant.\n\n# {normal_context}\n# {step_back_context}\n\n# Original Question: {question}\n# Answer:'))])

In [104]:
chain = (
    {
        # Retrieve context using normal question
        "normal_context": RunnableLambda(lambda x: x["question"]) | retriever,
        # Retrieve context using the step-back question
        "step_back_context": generate_queries_step_back | retriever,
        # pass on the question
        "question": lambda x: x["question"],
    }
    | response_prompt
    | ChatOpenAI(api_key=OPENAI_API_KEY, temperature=0)
    | StrOutputParser()
)

In [105]:
chain.invoke({"question": question})

'Task decomposition for LLM agents refers to the process of breaking down large and complex tasks into smaller, more manageable subgoals. This allows the LLM-powered autonomous agent to efficiently handle intricate tasks by dividing them into smaller steps that can be tackled individually. Task decomposition is a crucial component of the planning phase in an LLM-powered autonomous agent system, where the agent needs to understand the steps involved in a task and plan ahead to achieve the desired outcome.\n\nThere are various techniques and approaches to task decomposition for LLM agents. One common method is the use of prompting techniques such as Chain of Thought (CoT) and Tree of Thoughts. CoT involves instructing the model to "think step by step" to decompose hard tasks into smaller and simpler steps, transforming big tasks into multiple manageable tasks. On the other hand, Tree of Thoughts extends CoT by exploring multiple reasoning possibilities at each step, creating a tree struc

## Part 9: HyDE


In [8]:
from langchain.prompts import ChatPromptTemplate

In [9]:
# HyDE document generation
template = """Please write a scientific paper passage to answer the question
Question: {question}
Passage:
"""

prompt_hyde = ChatPromptTemplate.from_template(template)
prompt_hyde

ChatPromptTemplate(input_variables=['question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='Please write a scientific paper passage to answer the question\nQuestion: {question}\nPassage:\n'))])

In [10]:
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(temperature=0, api_key=OPENAI_API_KEY)
llm

ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x10e746c90>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x10db08250>, temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy='')

In [11]:
# define the hyde retrieval chain
generate_docs_for_retreival = prompt_hyde | llm | StrOutputParser()

generate_docs_for_retreival

ChatPromptTemplate(input_variables=['question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='Please write a scientific paper passage to answer the question\nQuestion: {question}\nPassage:\n'))])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x10e746c90>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x10db08250>, temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy='')
| StrOutputParser()

In [12]:
# get the hyDE documents for retrieval
question = "What is task decomposition for LLM agents?"

generate_docs_for_retreival.invoke({"question": question})

'Task decomposition is a fundamental concept in the field of machine learning and artificial intelligence, particularly for LLM (Large Language Model) agents. Task decomposition refers to the process of breaking down a complex task into smaller, more manageable sub-tasks that can be solved independently or in parallel. This approach allows LLM agents to efficiently tackle complex problems by dividing them into smaller, more manageable components.\n\nIn the context of LLM agents, task decomposition is crucial for improving the efficiency and effectiveness of the learning process. By breaking down a complex task into smaller sub-tasks, LLM agents can focus on solving each sub-task individually, which can lead to faster and more accurate results. Additionally, task decomposition allows LLM agents to leverage their knowledge and experience from solving one sub-task to inform the solution of another sub-task, leading to improved overall performance.\n\nOverall, task decomposition plays a cr

In [14]:
#### Retriever ####

# Load blog
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
import bs4
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, chunk_overlap=50
)

# Make splits
splits = text_splitter.split_documents(blog_docs)

# Index
vectorstore = Chroma.from_documents(
    documents=splits, embedding=OpenAIEmbeddings(api_key=OPENAI_API_KEY)
)

retriever = vectorstore.as_retriever()

In [17]:
question

'What is task decomposition for LLM agents?'

In [19]:
# create chain to Retrieve the documents matching the HyDE
retrieval_chain = generate_docs_for_retreival | retriever

In [21]:
# perform RAG to retrieve the documents

retrieved_docs = retrieval_chain.invoke({"question": question})

retrieved_docs[:1]

[Document(page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via 

In [22]:
# get the question answered by retrieved documents
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = prompt | llm | StrOutputParser()

final_rag_chain.invoke(
    # retrieved based on HyDE docs
    {"context": retrieved_docs, "question": question}
)

'Task decomposition for LLM agents involves breaking down large tasks into smaller, manageable subgoals. This enables the agent to efficiently handle complex tasks by dividing them into smaller and simpler steps. Task decomposition can be achieved through techniques like Chain of Thought (CoT) and Tree of Thoughts, as well as simple prompting or task-specific instructions.'

# -- PART 10-11 (Routing)


## Part 10: Logical and Semantic Routing


### Logical (function calling)


In [3]:
from typing import Literal

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI

In [4]:
# Data model
class RouteQuery(BaseModel):
    """Route a user query to the most relevant datasource."""

    datasource: Literal["python_docs", "js_docs", "golang_docs"] = Field(
        ...,
        description="Given a user question choose which datasource would be most relevant for answering their question",
    )

In [5]:
# llm w/ function call
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
structured_llm = llm.with_structured_output(RouteQuery)
structured_llm

  warn_beta(


RunnableBinding(bound=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x10b0e3560>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x10fce15e0>, model_name='gpt-3.5-turbo-0125', temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy=''), kwargs={'tools': [{'type': 'function', 'function': {'name': 'RouteQuery', 'description': 'Route a user query to the most relevant datasource.', 'parameters': {'type': 'object', 'properties': {'datasource': {'description': 'Given a user question choose which datasource would be most relevant for answering their question', 'enum': ['python_docs', 'js_docs', 'golang_docs'], 'type': 'string'}}, 'required': ['datasource']}}}], 'tool_choice': {'type': 'function', 'function': {'name': 'RouteQuery', 'description': 'Route a user query to the most relevant datasource.', 'parameters': {'type': 'object', 'properties': {'datasource': {'description': 'Given a user question choose which datasource 

In [6]:
# prompt
system = """You are an expert at routing a user question to the appropriate data source.

Based on the programming language the question is referring to, route it to the relevant data source.
"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

In [7]:
# define the router
router = prompt | structured_llm

In [8]:
# ask the question and get the result
question = """Why doesn't the following code work:

from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(["human", "speak in {language}"])
prompt.invoke("french")
"""

result = router.invoke({"question": question})
result

RouteQuery(datasource='python_docs')

In [9]:
result.datasource

'python_docs'

In [11]:
def choose_route(result):
    if "python_docs" in result.datasource.lower():
        # Logic here
        return "chain for python_docs"
    elif "js_docs" in result.datasource.lower():
        return "chain for js_docs"
    else:
        return "golang_docs"

In [12]:
from langchain_core.runnables import RunnableLambda

full_chain = router | RunnableLambda(choose_route)

In [13]:
question

'Why doesn\'t the following code work:\n\nfrom langchain_core.prompts import ChatPromptTemplate\n\nprompt = ChatPromptTemplate.from_messages(["human", "speak in {language}"])\nprompt.invoke("french")\n'

In [14]:
full_chain.invoke({"question": question})

'chain for python_docs'

### Semantic Routing


- helps in picking up the best prompt
- eg: should i converse with the user based on their question or should i just provide a sql query as each will have a different logic and prompt


In [15]:
from langchain.utils.math import cosine_similarity
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

In [16]:
# Two prompts
physics_template = """You are a very smart physics professor. \
You are great at answering questions about physics in a concise and easy to understand manner. \
When you don't know the answer to a question you admit that you don't know.

Here is a question:
{query}"""

math_template = """You are very good mathematician. You are great at answering math questions. \
Your are so good because you are able to break down hard problems into their component parts, \
answer the component parts, and then put them together to answer the broader question.

Here is a question:
{query}"""

In [19]:
# embed prompts
embeddings = OpenAIEmbeddings()
prompt_templates = [physics_template, math_template]
prompt_embeddings = embeddings.embed_documents(prompt_templates)

In [24]:
# Route question to prompt
def prompt_router(input):
    # embed question
    query_embedding = embeddings.embed_query(input["query"])
    # compute similarity b/w 2 embeddings
    similarity = cosine_similarity([query_embedding], prompt_embeddings)[0]
    most_similar = prompt_templates[similarity.argmax()]
    # chosen prompt
    print("Using MATH" if most_similar == math_template else "Using PHYSICS")
    return PromptTemplate.from_template(most_similar)

In [26]:
chain = (
    {"query": RunnablePassthrough()}
    | RunnableLambda(prompt_router)
    | ChatOpenAI()
    | StrOutputParser()
)

print(chain.invoke("what is a derivative?"))

Using MATH
A derivative is a mathematical concept that represents the rate of change of a function at a particular point. It essentially tells us how a function is changing at that specific point. It is calculated by finding the slope of the tangent line to the function at that point. Derivatives are used in calculus to solve a variety of problems, including finding maximum and minimum points of a function, determining the speed or acceleration of an object, and analyzing the behavior of functions.


## Part 11: Query Construction


In [29]:
from langchain_community.document_loaders import YoutubeLoader

docs = YoutubeLoader.from_youtube_url(
    "https://www.youtube.com/watch?v=pbAd8O1Lvm4", add_video_info=True
).load()

docs[0].metadata

{'source': 'pbAd8O1Lvm4',
 'title': 'Self-reflective RAG with LangGraph: Self-RAG and CRAG',
 'description': 'Unknown',
 'view_count': 12085,
 'thumbnail_url': 'https://i.ytimg.com/vi/pbAd8O1Lvm4/hq720.jpg',
 'publish_date': '2024-02-07 00:00:00',
 'length': 1058,
 'author': 'LangChain'}

In [30]:
# define schema for output/similar to KOR approach
import datetime
from typing import Literal, Optional, Tuple
from langchain_core.pydantic_v1 import BaseModel, Field


class TutorialSearch(BaseModel):
    """Search over a database of tutorial videos about a software library."""

    content_search: str = Field(
        ...,
        description="Similarity search query applied to video transcripts.",
    )
    title_search: str = Field(
        ...,
        description=(
            "Alternate version of the content search query to apply to video titles. "
            "Should be succinct and only include key words that could be in a video "
            "title."
        ),
    )
    min_view_count: Optional[int] = Field(
        None,
        description="Minimum view count filter, inclusive. Only use if explicitly specified.",
    )
    max_view_count: Optional[int] = Field(
        None,
        description="Maximum view count filter, exclusive. Only use if explicitly specified.",
    )
    earliest_publish_date: Optional[datetime.date] = Field(
        None,
        description="Earliest publish date filter, inclusive. Only use if explicitly specified.",
    )
    latest_publish_date: Optional[datetime.date] = Field(
        None,
        description="Latest publish date filter, exclusive. Only use if explicitly specified.",
    )
    min_length_sec: Optional[int] = Field(
        None,
        description="Minimum video length in seconds, inclusive. Only use if explicitly specified.",
    )
    max_length_sec: Optional[int] = Field(
        None,
        description="Maximum video length in seconds, exclusive. Only use if explicitly specified.",
    )

    def pretty_print(self) -> None:
        for field in self.__fields__:
            if getattr(self, field) is not None and getattr(self, field) != getattr(
                self.__fields__[field], "default", None
            ):
                print(f"{field}: {getattr(self, field)}")

In [31]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

system = """You are an expert at converting user questions into database queries. \
You have access to a database of tutorial videos about a software library for building LLM-powered applications. \
Given a question, return a database query optimized to retrieve the most relevant results.

If there are acronyms or words you are not familiar with, do not try to rephrase them."""

prompt = ChatPromptTemplate.from_messages([("system", system), ("human", "{question}")])

In [32]:
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
structured_llm = llm.with_structured_output(TutorialSearch)

query_analyzer = prompt | structured_llm

content_search: rag from scratch
title_search: rag from scratch


### ask questions and llm return results based on the schema


In [33]:
query_analyzer.invoke({"question": "rag from scratch"}).pretty_print()

content_search: rag from scratch
title_search: rag from scratch


In [34]:
query_analyzer.invoke(
    {"question": "videos on chat langchain published in 2023"}
).pretty_print()

content_search: chat langchain
title_search: chat langchain
earliest_publish_date: 2023-01-01
latest_publish_date: 2024-01-01


In [35]:
query_analyzer.invoke(
    {
        "question": "videos that are focused on the topic of chat langchain that are published before 2024"
    }
).pretty_print()

content_search: chat langchain
title_search: chat langchain
earliest_publish_date: 2024-01-01


In [36]:
query_analyzer.invoke(
    {
        "question": "how to use multi-modal models in an agent, only videos under 5 minutes"
    }
).pretty_print()

content_search: multi-modal models agent
title_search: multi-modal models agent
max_length_sec: 300


# -- PART 12-14 (Indexing)


## Part 12: Multi-representation Indexing


In [37]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [44]:
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
docs = loader.load()
len(docs)

1

In [45]:
loader = WebBaseLoader(
    "https://lilianweng.github.io/posts/2024-02-05-human-data-quality/"
)
docs.extend(loader.load())
len(docs)

2

In [47]:
# get document summaries
import uuid

from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

chain = (
    {
        # not using Runnablelambda as there is no outside function used here rather direct lambda
        "doc": lambda x: x.page_content
    }
    | ChatPromptTemplate.from_template("Summarize the following document:\n\n{doc}")
    | ChatOpenAI(model="gpt-3.5-turbo", max_retries=0)
    | StrOutputParser()
)

summaries = chain.batch(docs, {"max_concurrency": 5})

In [48]:
summaries

['The document discusses the concept of building autonomous agents powered by large language models (LLMs). It covers key components of these agents, including planning, memory, and tool use, with examples such as AutoGPT and GPT-Engineer. The challenges of finite context length, long-term planning, and reliability of natural language interfaces are also addressed. The document provides references and citations for further reading.',
 'The document discusses the importance of high-quality human data for training deep learning models. It covers various aspects of collecting human data, including task design, selecting and training annotators, and aggregating data. The document also explores methods for measuring rater agreement, dealing with rater disagreement, and two paradigms for data annotation. Additionally, it discusses techniques for identifying mislabeled data, such as influence functions, tracking prediction changes during training, and noisy cross-validation. The document prov

In [49]:
# use summary to find out which document to use for llm
from langchain.storage import InMemoryByteStore
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.retrievers.multi_vector import MultiVectorRetriever

In [50]:
# the vectorstore to use to index the child chunks/summaries
vectorstore = Chroma(collection_name="summaries", embedding_function=OpenAIEmbeddings())

In [51]:
# the storage layer for the parent documents
store = InMemoryByteStore()
id_key = "doc_id"

In [57]:
# The retriever that has access to multiple individual retrievers
retriever = MultiVectorRetriever(
    vectorstore=vectorstore,  # summary retriever
    byte_store=store,  # doc retriever
    id_key=id_key,
)
doc_ids = [str(uuid.uuid4()) for _ in docs]
doc_ids

['f4c5087f-1c2f-4e5b-a98b-de324aa4da2a',
 '440c099f-5764-4eda-a1fa-873b8a9ea08c']

In [58]:
# docs linked to summaries
summary_docs = [
    Document(page_content=summary, metadata={id_key: doc_ids[idx]})
    for idx, summary in enumerate(summaries)
]

summary_docs

[Document(page_content='The document discusses the concept of building autonomous agents powered by large language models (LLMs). It covers key components of these agents, including planning, memory, and tool use, with examples such as AutoGPT and GPT-Engineer. The challenges of finite context length, long-term planning, and reliability of natural language interfaces are also addressed. The document provides references and citations for further reading.', metadata={'doc_id': 'f4c5087f-1c2f-4e5b-a98b-de324aa4da2a'}),
 Document(page_content='The document discusses the importance of high-quality human data for training deep learning models. It covers various aspects of collecting human data, including task design, selecting and training annotators, and aggregating data. The document also explores methods for measuring rater agreement, dealing with rater disagreement, and two paradigms for data annotation. Additionally, it discusses techniques for identifying mislabeled data, such as influ

In [61]:
# add docs to the retreivers
retriever.vectorstore.add_documents(summary_docs)
retriever.docstore.mset(list(zip(doc_ids, docs)))

In [69]:
# perform query to the retriever
query = "Memory in agents"
sub_docs = vectorstore.similarity_search(query, k=1)

In [70]:
# show the retrieved summary based on similarity search
sub_docs[0]

Document(page_content='The document discusses the concept of building autonomous agents powered by large language models (LLMs). It covers key components of these agents, including planning, memory, and tool use, with examples such as AutoGPT and GPT-Engineer. The challenges of finite context length, long-term planning, and reliability of natural language interfaces are also addressed. The document provides references and citations for further reading.', metadata={'doc_id': 'f4c5087f-1c2f-4e5b-a98b-de324aa4da2a'})

In [73]:
# get the parent doc based on the summary doc id
retrieved_docs = retriever.get_relevant_documents(query, n_results=1)

In [82]:
retrieved_docs[0].page_content[-1000:]

"st? Sep 13, 2022.\n[12] Li et al. “API-Bank: A Benchmark for Tool-Augmented LLMs” arXiv preprint arXiv:2304.08244 (2023).\n[13] Shen et al. “HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in HuggingFace” arXiv preprint arXiv:2303.17580 (2023).\n[14] Bran et al. “ChemCrow: Augmenting large-language models with chemistry tools.” arXiv preprint arXiv:2304.05376 (2023).\n[15] Boiko et al. “Emergent autonomous scientific research capabilities of large language models.” arXiv preprint arXiv:2304.05332 (2023).\n[16] Joon Sung Park, et al. “Generative Agents: Interactive Simulacra of Human Behavior.” arXiv preprint arXiv:2304.03442 (2023).\n[17] AutoGPT. https://github.com/Significant-Gravitas/Auto-GPT\n[18] GPT-Engineer. https://github.com/AntonOsika/gpt-engineer\n\n\n\nnlp\nlanguage-model\nagent\nsteerability\nprompting\n\n\n\n« \n\nAdversarial Attacks on LLMs\n\n\n »\n\nPrompt Engineering\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n© 2024 Lil'Log

## Part 13: RAPTOR


## PART 14: ColBERT


In [3]:
from ragatouille import RAGPretrainedModel

RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")

[Mar 30, 14:15:07] Loading segmented_maxsim_cpp extension (set COLBERT_LOAD_TORCH_EXTENSION_VERBOSE=True for more info)...




In [6]:
import requests


def get_wikipidia_page(title: str):
    """
    Retrieve the full text content of a Wikipedia page.

    :param tile: str - Title of the Wikipedia page.
    :return: str - Full text content of the page as raw string.
    """
    # Wikipedia API endpoint
    URL = "https://en.wikipedia.org/w/api.php"

    # params for the API request
    params = {
        "action": "query",
        "format": "json",
        "titles": title,
        "prop": "extracts",
        "explaintext": True,
    }

    # custom user-agent header to comply w/ Wikipedia best practices
    headers = {"User-Agent": "RAGatouille_tutorial/0.0.1 (ben@clavie.edu)"}

    response = requests.get(URL, params=params, headers=headers)
    data = response.json()

    # extracting page content
    page = next(iter(data["query"]["pages"].values()))
    return page["extract"] if "extract" in page else None


full_document = get_wikipidia_page("Hayao_Miyazaki")

In [8]:
RAG.index(
    collection=[full_document],
    index_name="Miyazaki-123",
    max_document_length=180,
    split_documents=True,
)

This is a behaviour change from RAGatouille 0.8.0 onwards.
This works fine for most users and smallish datasets, but can be considerably slower than FAISS and could cause worse results in some situations.
If you're confident with FAISS working on your machine, pass use_faiss=True to revert to the FAISS-using behaviour.
--------------------


[Mar 30, 14:18:54] #> Creating directory .ragatouille/colbert/indexes/Miyazaki-123 


[Mar 30, 14:18:55] [0] 		 #> Encoding 81 passages..


100%|██████████| 3/3 [00:03<00:00,  1.29s/it]

[Mar 30, 14:18:59] [0] 		 avg_doclen_est = 129.88888549804688 	 len(local_sample) = 81
[Mar 30, 14:18:59] [0] 		 Creating 1,024 partitions.
[Mar 30, 14:18:59] [0] 		 *Estimated* 10,520 embeddings.
[Mar 30, 14:18:59] [0] 		 #> Saving the indexing plan to .ragatouille/colbert/indexes/Miyazaki-123/plan.json ..





used 15 iterations (0.2906s) to cluster 9995 items into 1024 clusters
[0.037, 0.041, 0.038, 0.034, 0.031, 0.037, 0.034, 0.037, 0.033, 0.034, 0.037, 0.038, 0.035, 0.037, 0.034, 0.04, 0.032, 0.032, 0.034, 0.035, 0.038, 0.039, 0.037, 0.035, 0.039, 0.032, 0.038, 0.032, 0.038, 0.035, 0.036, 0.037, 0.039, 0.033, 0.033, 0.034, 0.035, 0.032, 0.034, 0.04, 0.034, 0.038, 0.037, 0.032, 0.04, 0.035, 0.039, 0.035, 0.036, 0.037, 0.036, 0.036, 0.034, 0.039, 0.037, 0.037, 0.04, 0.038, 0.041, 0.031, 0.035, 0.034, 0.034, 0.033, 0.038, 0.038, 0.036, 0.04, 0.032, 0.032, 0.035, 0.036, 0.034, 0.035, 0.035, 0.032, 0.035, 0.038, 0.038, 0.036, 0.037, 0.039, 0.036, 0.041, 0.034, 0.037, 0.038, 0.037, 0.034, 0.04, 0.035, 0.037, 0.034, 0.035, 0.035, 0.037, 0.038, 0.037, 0.035, 0.036, 0.04, 0.039, 0.033, 0.035, 0.037, 0.035, 0.036, 0.034, 0.037, 0.036, 0.034, 0.037, 0.038, 0.032, 0.037, 0.037, 0.035, 0.036, 0.036, 0.039, 0.036, 0.035, 0.034, 0.033, 0.032, 0.037, 0.038, 0.032]


0it [00:00, ?it/s]

[Mar 30, 14:19:00] [0] 		 #> Encoding 81 passages..


100%|██████████| 3/3 [00:03<00:00,  1.07s/it]
1it [00:03,  3.24s/it]
100%|██████████| 1/1 [00:00<00:00, 1517.48it/s]

[Mar 30, 14:19:03] #> Optimizing IVF to store map from centroids to list of pids..
[Mar 30, 14:19:03] #> Building the emb2pid mapping..
[Mar 30, 14:19:03] len(emb2pid) = 10521



100%|██████████| 1024/1024 [00:00<00:00, 108313.80it/s]

[Mar 30, 14:19:03] #> Saved optimized IVF to .ragatouille/colbert/indexes/Miyazaki-123/ivf.pid.pt
Done indexing!





'.ragatouille/colbert/indexes/Miyazaki-123'

In [9]:
results = RAG.search(query="What animation studio did Miyazaki found?", k=3)
results

Loading searcher for index Miyazaki-123 for the first time... This may take a few seconds
[Mar 30, 14:19:46] #> Loading codec...
[Mar 30, 14:19:46] #> Loading IVF...
[Mar 30, 14:19:46] Loading segmented_lookup_cpp extension (set COLBERT_LOAD_TORCH_EXTENSION_VERBOSE=True for more info)...




[Mar 30, 14:19:52] #> Loading doclens...


100%|██████████| 1/1 [00:00<00:00, 5983.32it/s]

[Mar 30, 14:19:52] #> Loading codes and residuals...



100%|██████████| 1/1 [00:00<00:00, 572.37it/s]

[Mar 30, 14:19:52] Loading filter_pids_cpp extension (set COLBERT_LOAD_TORCH_EXTENSION_VERBOSE=True for more info)...





[Mar 30, 14:19:58] Loading decompress_residuals_cpp extension (set COLBERT_LOAD_TORCH_EXTENSION_VERBOSE=True for more info)...
Searcher loaded!

#> QueryTokenizer.tensorize(batch_text[0], batch_background[0], bsize) ==
#> Input: . What animation studio did Miyazaki found?, 		 True, 		 None
#> Output IDs: torch.Size([32]), tensor([  101,     1,  2054,  7284,  2996,  2106,  2771,  3148, 18637,  2179,
         1029,   102,   103,   103,   103,   103,   103,   103,   103,   103,
          103,   103,   103,   103,   103,   103,   103,   103,   103,   103,
          103,   103])
#> Output Mask: torch.Size([32]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0])





[{'content': 'In April 1984, Miyazaki opened his own office in Suginami Ward, naming it Nibariki.\n\n\n=== Studio Ghibli ===\n\n\n==== Early films (1985–1996) ====\nIn June 1985, Miyazaki, Takahata, Tokuma and Suzuki founded the animation production company Studio Ghibli, with funding from Tokuma Shoten. Studio Ghibli\'s first film, Laputa: Castle in the Sky (1986), employed the same production crew of Nausicaä. Miyazaki\'s designs for the film\'s setting were inspired by Greek architecture and "European urbanistic templates".',
  'score': 25.90369987487793,
  'rank': 1,
  'document_id': 'b4bb90cf-75a5-4280-8839-dba4c1a453ce',
  'passage_id': 28},
 {'content': 'Hayao Miyazaki (宮崎 駿 or 宮﨑 駿, Miyazaki Hayao, Japanese: [mijaꜜzaki hajao]; born January 5, 1941) is a Japanese animator, filmmaker, and manga artist. A co-founder of Studio Ghibli, he has attained international acclaim as a masterful storyteller and creator of Japanese animated feature films, and is widely regarded as one of the

# -- PART 15-18 (Retrieval)


## Part 15: Re-ranking


## PART 16:


## PART 17:


## PART 18:


# -- PART 19 (Ranking)
