# Vector Store Retriever

In [5]:
from langchain_community.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_core.documents import Document

In [6]:
# Step 1: Your source code docs
documents = [
    Document(page_content="LangChain helps developers build LLM applications easily."),
    Document(page_content="Chroma is a vector database optimized for LLM-based search."),
    Document(page_content="Embeddings convert text into high-dimensional vectors."),
    Document(page_content="OpenAI provides powerful embedding models."),
]

In [7]:
# Initialize embedding model
embedding_model = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")


# Step 3: Create Chroma vector store in memory
vectorstore = Chroma.from_documents(
    documents=documents,
    embedding=embedding_model,
    collection_name="my_collection"
)

In [9]:
# Convert vectorstore into a retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 2})

In [None]:
query= "words converted into embeddings are called?"
results = retriever.invoke(query)  # runnable object can be run in chain

In [13]:
for i,doc in enumerate(results):
    print(f"\n--- Result {i+1} ---")
    print(doc.page_content)


--- Result 1 ---
Embeddings convert text into high-dimensional vectors.

--- Result 2 ---
OpenAI provides powerful embedding models.


In [14]:
# or 
results = vectorstore.similarity_search(query,k=2)

In [15]:
for i,doc in enumerate(results):
    print(f"\n--- Result {i+1} ---")
    print(doc.page_content)


--- Result 1 ---
Embeddings convert text into high-dimensional vectors.

--- Result 2 ---
OpenAI provides powerful embedding models.


# MMR

In [27]:
# sample docuements
docs = [
    Document(page_content="LangChain makes it easy to work with LLMs."),
    Document(page_content="LangChain is used to build LLM based applications."),
    Document(page_content="Chroma is used to store and search document embeddings."),
    Document(page_content="Embeddings are vector representations of text."),
    Document(page_content="MMR helps you get diverse results when doing similarity search."),
    Document(page_content="LangChain supports Chroma, FAISS, Pinecone, and more."),
]

In [28]:
from langchain_community.vectorstores import FAISS

# Initialize Gemini embeddings
embedding_model = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

# Create the FAISS vector store from documents
vectorstore = FAISS.from_documents(
    documents=docs,
    embedding=embedding_model
)

In [29]:
# Enable MMR in the Retriever
retriever = vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={"k":3, "lambda_mult":0.5} # k = top results, lambda_mult = relevance-diversity balance
)

In [30]:
query = "What is Langchain?"
results = retriever.invoke(query)

In [31]:
for i, doc in enumerate(results):
    print(f"\n --- Result {i+1} ---")
    print(doc.page_content)


 --- Result 1 ---
LangChain is used to build LLM based applications.

 --- Result 2 ---
MMR helps you get diverse results when doing similarity search.

 --- Result 3 ---
LangChain supports Chroma, FAISS, Pinecone, and more.
