In [6]:
from chromadb import Client
from chromadb.config import Settings
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
# Initialize Chroma client (in-memory DB by default)
chroma_client = Client(Settings())

In [8]:
# Create a collection (or get an existing one)
collection = chroma_client.create_collection(name="my_collection")

In [9]:
# Load embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

In [10]:
# Your texts
texts = [
    "Machine learning is awesome.",
    "Transformers are revolutionizing NLP.",
    "Chroma is a vector database for LLMs."
]

In [11]:
# Generate embeddings
embeddings = embedding_model.encode(texts).tolist()

In [12]:
# Add to the collection
collection.add(
    documents=texts,
    embeddings=embeddings,
    ids=[f"doc{i}" for i in range(len(texts))]
)

print("Documents added to Chroma.")

Documents added to Chroma.


In [13]:
# Your query
query_text = "What are transformers in AI?"

In [14]:
# Embed the query
query_embedding = embedding_model.encode(query_text).tolist()

In [15]:
# Query the collection
results = collection.query(
    query_embeddings=[query_embedding],
    n_results=2  # number of top matches to return
)

In [16]:
# Print results
print("Top matches:")
for doc, dist in zip(results['documents'][0], results['distances'][0]):
    print(f"- {doc} (distance: {dist:.4f})")

Top matches:
- Transformers are revolutionizing NLP. (distance: 0.8475)
- Machine learning is awesome. (distance: 1.3247)
