In [2]:
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.documents import Document

In [3]:
documents = [
    Document(page_content="Langchain helps developers build applications with LLMs easily"),
    Document(page_content="Chroma is a vector store optimized for LLM based search"),
    Document(page_content="Embeddings are used to convert text into numerical representations of vectors"),
    Document(page_content="HuggingFace provides a wide range of pre-trained models for NLP tasks"),
    Document(page_content="Python is a versatile programming language widely used in data science and machine learning"),
]

In [None]:
# initialize the embeddings model
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
)

# initialize the chroma store
vector_store = Chroma.from_documents(
    documents=documents,
    embedding=embedding_model,
    collection_name="sample_collection"
)

In [8]:
retriever = vector_store.as_retriever(search_kwargs={"k": 2})

In [9]:
query = "what is python used for?"

results = retriever.invoke(query)

In [10]:
results

[Document(metadata={}, page_content='Python is a versatile programming language widely used in data science and machine learning'),
 Document(metadata={}, page_content='Embeddings are used to convert text into numerical representations of vectors')]

In [12]:
for i , doc in enumerate(results):
    print(f'\n---Result {i}---')
    print(f'Page Content: {doc.page_content}')


---Result 0---
Page Content: Python is a versatile programming language widely used in data science and machine learning

---Result 1---
Page Content: Embeddings are used to convert text into numerical representations of vectors
