<a href="https://colab.research.google.com/github/Simeon-Dhinakaran/GenAI/blob/main/vector-databases/simple_semantic_search_using_chromadb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import chromadb
from chromadb.config import Settings
from sentence_transformers import SentenceTransformer

# Step 1: Initialize ChromaDB Client
# Removed the 'chroma_db_impl' setting from Settings
client = chromadb.Client(chromadb.config.Settings(
    persist_directory="./chromadb_data"  # Directory to store the database
))

# Step 2: Set up a ChromaDB Collection
collection_name = "semantic_search"
if collection_name not in client.list_collections():
    collection = client.create_collection(name=collection_name)
else:
    collection = client.get_collection(name=collection_name)

# Step 3: Embed and Insert Documents
model = SentenceTransformer("all-MiniLM-L6-v2")  # Pre-trained model for embeddings
documents = [
    "Milvus is a vector database.",
    "Semantic search is a powerful technique.",
    "Machine learning models can create embeddings.",
    "Vector search finds relevant documents."
]

# Generate embeddings for the documents
embeddings = model.encode(documents).tolist()

# Add documents to the collection
ids = [f"doc_{i}" for i in range(len(documents))]
metadata = [{"source": f"Document {i+1}"} for i in range(len(documents))]
collection.add(
    documents=documents,
    embeddings=embeddings,
    metadatas=metadata,
    ids=ids
)

# Step 4: Perform Semantic Search
query = "How does vector search work?"
query_embedding = model.encode([query]).tolist()[0]

# Search the collection for top 3 similar results
results = collection.query(
    query_embeddings=[query_embedding],
    n_results=3
)

# Step 5: Display Results
print("Search Results:")
for i, (doc, score, meta) in enumerate(zip(results["documents"][0], results["distances"][0], results["metadatas"][0])):
    print(f"{i+1}. Text: {doc} | Score: {1 - score:.4f} | Source: {meta['source']}")


Search Results:
1. Text: Vector search finds relevant documents. | Score: 0.3185 | Source: Document 4
2. Text: Semantic search is a powerful technique. | Score: -0.0947 | Source: Document 2
3. Text: Milvus is a vector database. | Score: -0.1120 | Source: Document 1


In [None]:
!pip install chromadb sentence-transformers



In [None]:
client.delete_collection(name=collection_name)