In [1]:
from langchain.vectorstores import ElasticsearchStore
from langchain.document_loaders import WebBaseLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

import dotenv
dotenv.load_dotenv()

# Load blog post
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
data = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
splits = text_splitter.split_documents(data)

In [2]:
# VectorDB
embedding = OpenAIEmbeddings()
vectordb = ElasticsearchStore(
    embedding=embedding,
    es_url = "http://localhost:9200",
    index_name="test_index")

In [3]:
ids = vectordb.add_documents(splits)

In [10]:
vectordb.search(query="Which is the best solution", search_type="similarity")

[Document(page_content='... (Repeated many times)', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en'}),
 Document(page_content='answer \\"Nothing more to clarify.\\"."', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log", 'description': 'Building agents with LLM (large language model) as its core controller is a cool

In [42]:
from langchain.chat_models import ChatOpenAI
from langchain.retrievers.multi_query import MultiQueryRetriever

llm = ChatOpenAI(temperature=0)
retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=vectordb.as_retriever(
        search_type="similarity",
        search_kwargs={'k': 3}
    ),
    llm=llm
)

In [43]:
# Set logging for the queries
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

In [41]:
question = "What are the approaches to Task Decomposition?"
unique_docs = retriever_from_llm.get_relevant_documents(query=question)
len(unique_docs)

INFO:langchain.retrievers.multi_query:Generated queries: ['1. How can Task Decomposition be approached?', '2. What are the different methods for Task Decomposition?', '3. What are the various approaches to decomposing tasks?']


4

In [44]:
from typing import List
from pydantic import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser


# Output parser will split the LLM result into a list of queries
class LineList(BaseModel):
    # "lines" is the key (attribute name) of the parsed output
    lines: List[str] = Field(description="Lines of text")


class LineListOutputParser(PydanticOutputParser):
    def __init__(self) -> None:
        super().__init__(pydantic_object=LineList)

    def parse(self, text: str) -> LineList:
        lines = text.strip().split("\n")
        return LineList(lines=lines)

In [45]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

output_parser = LineListOutputParser()

QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five 
    different versions of the given user question to retrieve relevant documents from a vector 
    database. By generating multiple perspectives on the user question, your goal is to help
    the user overcome some of the limitations of the distance-based similarity search. 
    Provide these alternative questions separated by newlines.
    Original question: {question}""",
)
llm = ChatOpenAI(temperature=0)

# Chain
llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT, output_parser=output_parser)


In [47]:
llm_chain.run(question="What does the course say about regression?")

LineList(lines=["1. What is the course's perspective on regression?", '2. Can you provide information on regression as discussed in the course?', '3. How does the course cover the topic of regression?', "4. What are the course's teachings on regression?", '5. In relation to the course, what is mentioned about regression?'])

In [46]:
# Run
retriever = MultiQueryRetriever(
    retriever=vectordb.as_retriever(), llm_chain=llm_chain, parser_key="lines"
)  # "lines" is the key (attribute name) of the parsed output

# Results
unique_docs = retriever.get_relevant_documents(
    query="What does the course say about regression?"
)
len(unique_docs)

INFO:langchain.retrievers.multi_query:Generated queries: ["1. What is the course's perspective on regression?", '2. Can you provide information on regression as discussed in the course?', '3. How does the course cover the topic of regression?', "4. What are the course's teachings on regression?", '5. In what way does the course address regression?']


9

20

In [18]:
unique_docs

[Document(page_content='}\n]\nChallenges#\nAfter going through key ideas and demos of building LLM-centered agents, I start to see a couple common limitations:', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en'}),
 Document(page_content='to start a new trial depending on the self-reflection results.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM P

In [1]:
print("Speaker: Doesn't really look like anyone's been doing cocaine off that table, does it? (0.00)\nSpeaker: I'll do respect, I'm not sure you know how that works. (5.60)\nSpeaker: Masking, if you do, you testified you've done cocaine. (8.80)\nSpeaker: I have. (12.16)\nSpeaker: Doesn't really look like Mr. Depp or anyone was doing cocaine off that table, does it? (13.28)\nSpeaker: I begged a differ with you on that. (17.84)\nSpeaker: When you snort cocaine typically, it goes into your nose. (20.08)")

Speaker: Doesn't really look like anyone's been doing cocaine off that table, does it? (0.00)
Speaker: I'll do respect, I'm not sure you know how that works. (5.60)
Speaker: Masking, if you do, you testified you've done cocaine. (8.80)
Speaker: I have. (12.16)
Speaker: Doesn't really look like Mr. Depp or anyone was doing cocaine off that table, does it? (13.28)
Speaker: I begged a differ with you on that. (17.84)
Speaker: When you snort cocaine typically, it goes into your nose. (20.08)
