In [22]:
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings

In [23]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [24]:
#source documents
documents =[
    Document(page_content="LangChain makes it easy to work with LLMs."),
    Document(page_content="LangChain is used to build LLM based applications."),
    Document(page_content="Chroma is used to store and search document embeddings."),
    Document(page_content="Embeddings are vector representations of text."),
    Document(page_content="MMR helps you get diverse results when doing similarity search."),
    Document(page_content="LangChain supports Chroma, FAISS, Pinecone, and more."),
]

In [26]:
vector_store = Chroma(
    embedding_function= embeddings,
    persist_directory= 'store_retriever',
    collection_name= 'try_man'
)

In [27]:
vector_store.add_documents(documents)

['6afaa7fc-142c-4dae-b8a5-8aa50799bcc8',
 '3ab3578b-693b-4dcd-925b-063bc856d1e2',
 'fef19651-f309-47ac-9347-e82b5f9259d1',
 'abaab7b7-e702-4863-b4d4-6928d64029f0',
 'ff59e0bc-08a2-4ee5-bd3d-94967afe04f0',
 '59ea22e9-ba5d-4af0-9cbf-e071e3f8e627']

In [28]:
# Convert vectorstore into a retriever

#this is like the similarity_search but in this we get flexibility to use MMR, multi query etc
#also retrievers are runnables

retriever = vector_store.as_retriever(search_kwargs={"k": 3})
# retriever = vector_store.as_retriever(kwargs=2)


In [29]:
query = "What is Chroma used for?"
results = retriever.invoke(query)

In [30]:
# here enumerate gives the position and the value there of that iterable
for i, doc in enumerate(results):
    print(f"\n--- Result {i+1} ---")
    print(doc.page_content)


--- Result 1 ---
Chroma is used to store and search document embeddings.

--- Result 2 ---
LangChain supports Chroma, FAISS, Pinecone, and more.

--- Result 3 ---
LangChain is used to build LLM based applications.


In [31]:
print(results)

[Document(metadata={}, page_content='Chroma is used to store and search document embeddings.'), Document(metadata={}, page_content='LangChain supports Chroma, FAISS, Pinecone, and more.'), Document(metadata={}, page_content='LangChain is used to build LLM based applications.')]


In [32]:
vector_store.get(include=['embeddings'])

{'ids': ['6afaa7fc-142c-4dae-b8a5-8aa50799bcc8',
  '3ab3578b-693b-4dcd-925b-063bc856d1e2',
  'fef19651-f309-47ac-9347-e82b5f9259d1',
  'abaab7b7-e702-4863-b4d4-6928d64029f0',
  'ff59e0bc-08a2-4ee5-bd3d-94967afe04f0',
  '59ea22e9-ba5d-4af0-9cbf-e071e3f8e627'],
 'embeddings': array([[-0.04241481, -0.06280898,  0.04777258, ..., -0.02446854,
          0.02867932,  0.04860828],
        [-0.04207314, -0.02860173, -0.00226641, ..., -0.0127493 ,
          0.09139377,  0.0466856 ],
        [-0.10464291,  0.0380908 , -0.08724858, ...,  0.05971808,
          0.13009915,  0.01820262],
        [-0.01859744, -0.03252062,  0.00397878, ...,  0.09267598,
          0.09615937, -0.01724097],
        [-0.05295337, -0.03759833, -0.01045916, ..., -0.0437077 ,
         -0.03832104,  0.02221226],
        [-0.06332805, -0.06574257, -0.0010553 , ..., -0.03947791,
          0.14585049,  0.01767414]], shape=(6, 384)),
 'documents': None,
 'uris': None,
 'included': ['embeddings'],
 'data': None,
 'metadatas': Non

In [20]:
vector_store.delete(['a0bd8a9c-24ac-4b2b-8724-a6ef535ea1b4',
  'bc627960-fefc-4b3b-8781-546c282cee14',
  'df405f34-6c58-48d7-a1b3-2710a7a32fa3',
  '562d3324-f468-45d8-b696-cecde7cbc96d'])