In [None]:
# Create Chroma client (persistent storage)
# Note: ChromaDB 0.4.0+ uses PersistentClient instead of Client(Settings(...))
client = chromadb.PersistentClient(path="./chroma_db")

# Create or get collection
collection = client.get_or_create_collection(
    name="my_documents",
    metadata={"description": "Sample document collection"}
)

print(f"‚úÖ Collection created: {collection.name}")
print(f"Current count: {collection.count()} documents")
print(f"üìÅ Data persisted to: ./chroma_db/")

In [None]:
documents = [
    {
        "text": "Python 3.12 introduces new performance improvements...",
        "metadata": {
            "category": "tech",
            "date": "2024-01-15",
            "author": "Tech Team",
            "priority": 1
        }
    },
    # Add 29 more...
]

In [None]:

# IDs for each document
ids = [f"doc_{i}" for i in range(len(documents))]

# Add to collection (Chroma handles embedding automatically!)
collection.add(
    documents=documents,
    metadatas=metadatas,
    ids=ids
)

print(f"‚úÖ Added {len(documents)} documents to collection")
print(f"Total documents: {collection.count()}")

In [None]:
# Query the collection
results = collection.query(
    query_texts=["What is artificial intelligence?"],
    n_results=3
)

print("Query: What is artificial intelligence?\n")
print("Top 3 results:\n")

for i, (doc, metadata, distance) in enumerate(zip(
    results['documents'][0],
    results['metadatas'][0],
    results['distances'][0]
), 1):
    print(f"{i}. (Distance: {distance:.4f})")
    print(f"   Document: {doc}")
    print(f"   Metadata: {metadata}")
    print()

In [None]:
# Query with metadata filter
results = collection.query(
    query_texts=["Tell me about AI"],
    n_results=3,
    where={"category": "AI"}  # Only return AI documents
)

print("Query: Tell me about AI (filtered by category='AI')\n")
print("Results:\n")

for i, (doc, metadata) in enumerate(zip(
    results['documents'][0],
    results['metadatas'][0]
), 1):
    print(f"{i}. {doc}")
    print(f"   Category: {metadata['category']}, Topic: {metadata['topic']}")
    print()