In [12]:
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.documents import Document

In [13]:
# Step1-: Your source documents
documents = [
    Document(page_content="LangChain helps developers build LLM applications easily."),
    Document(page_content="Chroma is a vector database optimized for LLM-based search."),
    Document(page_content="Embeddings convert text into high-dimensional vectors."),
    Document(page_content="OpenAI provides powerful embedding models."),
]

In [14]:
# Step2- Initialize Embedding model 
embedding_model = HuggingFaceEmbeddings(
    model_name = 'intfloat/e5-small',
    encode_kwargs = {'normalize_embeddings':True}
)

In [15]:
# setp3- Create a vector store
vectorstore = Chroma.from_documents(
    documents=documents,
    embedding=embedding_model,
    collection_name='my_collection'
)

In [16]:
# Step4: Convert vector store into a retriever
retriever = vectorstore.as_retriever(search_kwargs={'k':2})

In [17]:
query = "what is Chroma user for?"
result = retriever.invoke(query)

In [18]:
for i, doc in enumerate(result):
    print(f"\n--- Result {i+1}---")
    print(doc.page_content)


--- Result 1---
Chroma is a vector database optimized for LLM-based search.

--- Result 2---
Chroma is a vector database optimized for LLM-based search.


In [19]:
results = vectorstore.similarity_search(query,k=2)
for i, doc in enumerate(result):
    print(f"\n--- Result {i+1}---")
    print(doc.page_content)


--- Result 1---
Chroma is a vector database optimized for LLM-based search.

--- Result 2---
Chroma is a vector database optimized for LLM-based search.
