In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter


In [2]:
loader = TextLoader("state_of_union.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
db = FAISS.from_documents(texts, embeddings)


In [3]:
retriever = db.as_retriever()


In [4]:
docs = retriever.invoke("what did he say about ketanji brown jackson")

In [10]:
retriever = db.as_retriever(search_type="mmr")

In [11]:
docs = retriever.invoke("what did he say about ketanji brown jackson")

In [14]:
retriever = db.as_retriever(
    search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.5}
)

In [15]:
docs = retriever.invoke("what did he say about ketanji brown jackson")



In [1]:
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Load blog post
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
data = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
splits = text_splitter.split_documents(data)

# VectorDB
embedding = OpenAIEmbeddings()
vectordb = FAISS.from_documents(documents=splits, embedding=embedding)

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [12]:
from langchain_groq import ChatGroq
from langchain_core.output_parsers import BaseOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List
import os

class LineListOutputParser(BaseOutputParser[List[str]]):
    """Output parser for a list of lines."""

    def parse(self, text: str) -> List[str]:
        lines = text.strip().split("\n")
        return list(filter(None, lines))  # Remove empty lines


output_parser = LineListOutputParser()

In [13]:

QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five 
    different versions of the given user question to retrieve relevant documents from a vector 
    database. By generating multiple perspectives on the user question, your goal is to help
    the user overcome some of the limitations of the distance-based similarity search. 
    Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

In [14]:
llm = ChatGroq(api_key=os.getenv('GROQ_API_KEY'),model='mixtral-8x7B-32768')

In [15]:
llm_chain = QUERY_PROMPT | llm | output_parser

question = "What are the approaches to Task Decomposition?"



In [19]:
from langchain.retrievers import MultiQueryRetriever
retriever = MultiQueryRetriever(
    retriever=vectordb.as_retriever(), llm_chain=llm_chain, parser_key="lines"
)  # "lines" is the key (attribute name) of the parsed output

# Results
unique_docs = retriever.invoke("What are the approaches to Task Decomposition?")
len(unique_docs)

7

In [22]:
def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )

In [26]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter

documents = TextLoader("state_of_union.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
retriever = FAISS.from_documents(texts, OpenAIEmbeddings()).as_retriever()

docs = retriever.invoke("what was the content of the annual message")
pretty_print_docs(docs)

Document 1:

Most annual messages outline the President's legislative agenda and national priorities in general or specific terms. James Monroe in 1823 discussed the centerpiece of his foreign policy, now-known as the Monroe Doctrine, which called on European countries to end western colonization. Lincoln famously expressed his desire for slave emancipation in 1862, and Franklin Roosevelt spoke about the now-famous four freedoms during his State of the Union message in 1941.

Whatever the form, content, delivery method or broadcast medium, the President's annual address is a backdrop for national unity. The State of the Union gives the President an opportunity to reflect on the past while presenting his hopes for the future to Congress, the American people and the world.
----------------------------------------------------------------------------------------------------
Document 2:

With the advent of radio and television, the President's annual message has become not only a conversati

In [33]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain_groq import ChatGroq
import os
llm = ChatGroq(api_key=os.getenv('GROQ_API_KEY'),model='mixtral-8x7B-32768')
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(
    "what was the content of the annual message in 1941"
)
pretty_print_docs(compressed_docs)

Document 1:

Franklin Roosevelt spoke about the now-famous four freedoms during his State of the Union message in 1941.
----------------------------------------------------------------------------------------------------
Document 2:

NO_OUTPUT

The context does not provide information about the content of the annual message in 1941.
----------------------------------------------------------------------------------------------------
Document 3:

The content of the annual message in 1941 is not provided in the context. However, the context does mention that Woodrow Wilson was the first President to revive Washington's spoken precedent of delivering the annual message in person to both houses of Congress, starting in 1913. Therefore, it can be inferred that the President, Franklin D. Roosevelt, who was in office in 1941, would have delivered the annual message that year in person.

So, the relevant parts of the context are:

* Woodrow Wilson delivered his first annual message in person to

In [28]:
from langchain.retrievers.document_compressors import LLMChainFilter

_filter = LLMChainFilter.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=_filter, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(
    "what was the content of the annual message"
)
pretty_print_docs(compressed_docs)

Document 1:

Most annual messages outline the President's legislative agenda and national priorities in general or specific terms. James Monroe in 1823 discussed the centerpiece of his foreign policy, now-known as the Monroe Doctrine, which called on European countries to end western colonization. Lincoln famously expressed his desire for slave emancipation in 1862, and Franklin Roosevelt spoke about the now-famous four freedoms during his State of the Union message in 1941.

Whatever the form, content, delivery method or broadcast medium, the President's annual address is a backdrop for national unity. The State of the Union gives the President an opportunity to reflect on the past while presenting his hopes for the future to Congress, the American people and the world.
----------------------------------------------------------------------------------------------------
Document 2:

For example, Thomas Jefferson thought Washington's oral presentation was too kingly for the new republic

In [29]:
from langchain.retrievers.document_compressors import LLMListwiseRerank
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)

_filter = LLMListwiseRerank.from_llm(llm, top_n=1)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=_filter, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(
    "what was the content of the annual message"
)
pretty_print_docs(compressed_docs)

Document 1:

Most annual messages outline the President's legislative agenda and national priorities in general or specific terms. James Monroe in 1823 discussed the centerpiece of his foreign policy, now-known as the Monroe Doctrine, which called on European countries to end western colonization. Lincoln famously expressed his desire for slave emancipation in 1862, and Franklin Roosevelt spoke about the now-famous four freedoms during his State of the Union message in 1941.

Whatever the form, content, delivery method or broadcast medium, the President's annual address is a backdrop for national unity. The State of the Union gives the President an opportunity to reflect on the past while presenting his hopes for the future to Congress, the American people and the world.


In [30]:
from langchain.retrievers.document_compressors import EmbeddingsFilter
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
embeddings_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=embeddings_filter, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(
    "what was the content of the annual message in 1941"
)
pretty_print_docs(compressed_docs)

Document 1:

Most annual messages outline the President's legislative agenda and national priorities in general or specific terms. James Monroe in 1823 discussed the centerpiece of his foreign policy, now-known as the Monroe Doctrine, which called on European countries to end western colonization. Lincoln famously expressed his desire for slave emancipation in 1862, and Franklin Roosevelt spoke about the now-famous four freedoms during his State of the Union message in 1941.

Whatever the form, content, delivery method or broadcast medium, the President's annual address is a backdrop for national unity. The State of the Union gives the President an opportunity to reflect on the past while presenting his hopes for the future to Congress, the American people and the world.
----------------------------------------------------------------------------------------------------
Document 2:

With the advent of radio and television, the President's annual message has become not only a conversati

In [31]:
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain_community.document_transformers import EmbeddingsRedundantFilter
from langchain_text_splitters import CharacterTextSplitter

splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=0, separator=". ")
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)
pipeline_compressor = DocumentCompressorPipeline(
    transformers=[splitter, redundant_filter, relevant_filter]
)

In [32]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=pipeline_compressor, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(
    "what was the content of the annual message"
)
pretty_print_docs(compressed_docs)

Created a chunk of size 310, which is longer than the specified 300
Created a chunk of size 426, which is longer than the specified 300


Document 1:

Jefferson detailed his priorities in his first annual message in 1801 and sent copies of the written message to each house of Congress. The President's annual message, as it was then called, was not spoken by the President for the next 112 years
----------------------------------------------------------------------------------------------------
Document 2:

With the advent of radio and television, the President's annual message has become not only a conversation between the President and Congress but also an opportunity for the President to communicate with the American people at the same time
----------------------------------------------------------------------------------------------------
Document 3:

Although controversial at the time, Wilson delivered his first annual message in person to both houses of Congress and outlined his legislative priorities.
----------------------------------------------------------------------------------------------------
Document 4:

Wh