### ChromaDB

ChromaDB is a vector database designed for machine learning applications, particularly those involving embeddings. It allows for efficient storage, retrieval, and management of high-dimensional vectors, making it suitable for tasks like similarity search, recommendation systems, and more.

In [2]:
pip install chromadb --quiet 

Note: you may need to restart the kernel to use updated packages.


In [3]:
import chromadb

chroma_client = chromadb.Client()

In [4]:
collection = chroma_client.get_or_create_collection(name="documents")

In [5]:
collection.add(
    documents=["This is a sample document.", "Another document for testing."],  
    ids=["doc1", "doc2"],
)

In [6]:
from pprint import pprint
results = collection.query(
    query_texts=["This is a query document about hawaii"], # Chroma will embed this for you
    n_results=2, # how many results to return
    where_document= {"$contains": "document"} # filter results to only those that contain "pineapple"  
)
pprint(results)

{'data': None,
 'distances': [[1.1787493228912354, 1.5264713764190674]],
 'documents': [['This is a sample document.', 'Another document for testing.']],
 'embeddings': None,
 'ids': [['doc1', 'doc2']],
 'included': ['metadatas', 'documents', 'distances'],
 'metadatas': [[None, None]],
 'uris': None}


In [7]:
collection.update(
    ids=["doc1"],
    documents=["This is an updated version of the first document."],
)

In [8]:
collection.delete(ids=["doc1"])

In [10]:
collection.add(
    ids=["doc3"],
    documents=["Custom embedded doc"]
)


In [11]:
collection.add(
    ids=["doc4"],
    documents=["A travel guide to Hawaii."],
    metadatas=[{"category": "travel", "region": "hawaii"}],
)

# Filter using metadata
results = collection.query(
    query_texts=["beach vacation"],
    n_results=2,
    where={"region": "hawaii"},
)
pprint(results)


{'data': None,
 'distances': [[1.1021239757537842]],
 'documents': [['A travel guide to Hawaii.']],
 'embeddings': None,
 'ids': [['doc4']],
 'included': ['metadatas', 'documents', 'distances'],
 'metadatas': [[{'category': 'travel', 'region': 'hawaii'}]],
 'uris': None}


In [12]:
print(chroma_client.list_collections())

[Collection(name=documents)]


In [13]:
chroma_client.delete_collection("documents")