In [2]:
# Install dependencies
!pip install chromadb sentence-transformers

import chromadb
from sentence_transformers import SentenceTransformer

# Initialize ChromaDB client
client = chromadb.PersistentClient(path="./chroma_db")  # Persistent storage
collection = client.get_or_create_collection(name="text_store")

# Load embedding model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Sample texts
documents = [
    "The sky is blue and the sun is shining.",
    "Artificial Intelligence is transforming the world.",
    "Machine learning is a subset of AI."
]

# Generate embeddings
embeddings = model.encode(documents).tolist()

# Store in ChromaDB
for i, (doc, embedding) in enumerate(zip(documents, embeddings)):
    collection.add(ids=[str(i)], embeddings=[embedding], metadatas=[{"text": doc}])

# Query function
def search(query, top_k=2):
    query_embedding = model.encode([query]).tolist()
    results = collection.query(query_embeddings=query_embedding, n_results=top_k)
    return results["metadatas"]

# Example query
query_text = "Tell me about AI"
results = search(query_text)
print("Retrieved Documents:")
for res in results[0]:
    print(res["text"])






Retrieved Documents:
Machine learning is a subset of AI.
Artificial Intelligence is transforming the world.


In [3]:
# Install dependencies
!pip install chromadb sentence-transformers

import chromadb
from sentence_transformers import SentenceTransformer
import os

# Define storage path
DB_PATH = "./my_chroma_db"
os.makedirs(DB_PATH, exist_ok=True)

# Initialize ChromaDB client
client = chromadb.PersistentClient(path=DB_PATH)  # Persistent storage
collection = client.get_or_create_collection(name="text_store")

# Load embedding model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Sample texts
documents = [
    "The sky is blue and the sun is shining.",
    "Artificial Intelligence is transforming the world.",
    "Machine learning is a subset of AI."
]

# Generate embeddings
embeddings = model.encode(documents).tolist()

# Store in ChromaDB
for i, (doc, embedding) in enumerate(zip(documents, embeddings)):
    collection.add(ids=[str(i)], embeddings=[embedding], metadatas=[{"text": doc}])

# Query function
def search(query, top_k=2):
    query_embedding = model.encode([query]).tolist()
    results = collection.query(query_embeddings=query_embedding, n_results=top_k)
    return results["metadatas"]

# Example query
query_text = "Tell me about AI"
results = search(query_text)
print("Retrieved Documents:")
for res in results[0]:
    print(res["text"])


Retrieved Documents:
Machine learning is a subset of AI.
Artificial Intelligence is transforming the world.
