In [None]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_core.documents import Document

In [2]:
documents = [
    Document(page_content="LangChain helps developers build LLM applications easily."),
    Document(page_content="Chroma is a vector database optimized for LLM-based search."),
    Document(page_content="Embeddings convert text into high-dimensional vectors."),
    Document(page_content="OpenAI provides powerful embedding models."),
]

In [4]:
embedding_model = GoogleGenerativeAIEmbeddings(model='models/embedding-001')

vector_store = Chroma.from_documents(
    documents=documents,
    embedding=embedding_model,
    collection_name="my_collection"
)


In [5]:
retriever = vector_store.as_retriever(search_kwargs={'k':2})

In [6]:
query = 'What is Chroma used for?'
results = retriever.invoke(query)

In [8]:
for i, doc in enumerate(results):
    print(f"Document {i+1}:")
    print(doc.page_content)

Document 1:
Chroma is a vector database optimized for LLM-based search.
Document 2:
Embeddings convert text into high-dimensional vectors.


#### we can retrieve document by using similarity_search in vector store, so what retriever really used for? Retriever is to perform more complex searching lik mmr, etc.


In [9]:
results = vector_store.similarity_search(query, k=2)

In [10]:
for i, doc in enumerate(results):
    print(f"Document {i+1}:")
    print(doc.page_content)

Document 1:
Chroma is a vector database optimized for LLM-based search.
Document 2:
Embeddings convert text into high-dimensional vectors.
