In [1]:
# Algorithm to avoid redundancy in the retrieved results while maintaining high relevance to the query (not using Similarity search)
from langchain_core.documents import Document 

documents = [
    Document(page_content="LangChain helps developers build LLM applications easily."),
    Document(page_content="Chroma is a vector database optimized for LLM-based search."),
    Document(page_content="Embeddings convert text into high-dimensional vectors."),
    Document(page_content="OpenAI provides powerful embedding models."),
]

In [5]:
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

# Vector Stores 
vector_store = FAISS.from_documents(
    documents=documents, 
    embedding=embeddings
)

In [12]:
# MMR in the retriever 
retriever = vector_store.as_retriever(
    search_type = "mmr", 
    search_kwargs = {'k': 3, "lambda_mult": 1}
)

In [13]:
query = "What is langchain ?"
result = retriever.invoke(query)

In [18]:
for i, doc in enumerate(result):
    print(f"\n=== Result {i + 1} ===")
    print(doc.page_content)


=== Result 1 ===
LangChain helps developers build LLM applications easily.

=== Result 2 ===
Chroma is a vector database optimized for LLM-based search.

=== Result 3 ===
OpenAI provides powerful embedding models.
