In [29]:
import chromadb
from chromadb.config import Settings

import numpy as np

In [15]:
# Initialize the ChromaDB client with default settings

chroma_client = chromadb.Client(Settings())

In [16]:
# Create a new collection

collection = chroma_client.create_collection(name="my_documents")

In [17]:
# Add documents to the collection

collection.add(
    documents=[
        "This is the first document.",
        "Here is the second document.",
        "The third document is right here."
    ],
    metadatas=[
        {"category": "text"},
        {"category": "text"},
        {"category": "text"}
    ],
    ids=["doc1", "doc2", "doc3"]
)

In [28]:
# Run a query to search for similar documents

results = collection.query(
    query_texts=["second document"],
    n_results=3
)

print(results)

{'ids': [['doc2', 'doc1', 'doc3']], 'distances': [[0.5167185068130493, 0.6896637082099915, 0.7063825726509094]], 'metadatas': [[{'category': 'text'}, {'category': 'text'}, {'category': 'text'}]], 'embeddings': None, 'documents': [['Here is the second document.', 'This is the first document.', 'The third document is right here.']], 'uris': None, 'data': None, 'included': ['metadatas', 'documents', 'distances']}


In [30]:
# Initialize client
chroma_client = chromadb.Client(Settings())

# Create a new collection "images_embeddings"
collection = chroma_client.create_collection(name="images_embeddings")

# Suppose we have some embeddings of images
embeddings = [
    np.random.rand(128),  # image 1 embedding
    np.random.rand(128),  # image 2 embedding
    np.random.rand(128)   # image 3 embedding
]

# Convert embeddings to lists for compatibility with ChromaDB
embeddings = [embedding.tolist() for embedding in embeddings]

# Add the embeddings to the collection
collection.add(
    embeddings=embeddings,
    metadatas=[
        {"name": "image1.jpg", "category": "nature"},
        {"name": "image2.jpg", "category": "city"},
        {"name": "image3.jpg", "category": "nature"}
    ],
    ids=["img1", "img2", "img3"]
)

# Generate a query embedding (same format/dimensions as the space we'll explore for similarities)
query_embedding = np.random.rand(128).tolist()

# Run the similarity search
results = collection.query(
    query_embeddings=[query_embedding],
    n_results=2
)

print("Results of the unfiltered query:")
print(results)

# Now try filtering the metadata before the query
filtered_results = collection.query(
    query_embeddings=[query_embedding],
    n_results=2,
    where={"category": "nature"}
)

print("\nResults of the query with a filter on 'nature' category:")
print(filtered_results)

# Update 'img1' metadata
collection.update(
    ids=["img1"],
    metadatas=[{"name": "image1_modified.jpg", "category": "landscape"}]
)

# Remove 'img2' from the collection
collection.delete(
    ids=["img2"]
)

Results of the unfiltered query:
{'ids': [['img2', 'img1']], 'distances': [[20.872116088867188, 23.36772918701172]], 'metadatas': [[{'category': 'city', 'name': 'image2.jpg'}, {'category': 'nature', 'name': 'image1.jpg'}]], 'embeddings': None, 'documents': [[None, None]], 'uris': None, 'data': None, 'included': ['metadatas', 'documents', 'distances']}

Results of the query with a filter on 'nature' category:
{'ids': [['img1', 'img3']], 'distances': [[23.36772918701172, 23.89348793029785]], 'metadatas': [[{'category': 'nature', 'name': 'image1.jpg'}, {'category': 'nature', 'name': 'image3.jpg'}]], 'embeddings': None, 'documents': [[None, None]], 'uris': None, 'data': None, 'included': ['metadatas', 'documents', 'distances']}
