In [11]:
from langchain_huggingface import HuggingFaceEmbeddings
import chromadb
from chromadb.config import Settings

# Initialize Chroma client to load the data from the disk
client = chromadb.PersistentClient(path='./chromaDB', settings=Settings(allow_reset=False))

# Load the collection from ChromaDB (ensure the collection name is the same as when it was created)
collection = client.get_collection("sentence_embeddings")

# Initialize the embedding model
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en", encode_kwargs={'normalize_embeddings': True})


In [2]:
# Example query for semantic search
query = "Where does bear live?"

# Step 1: Convert the query into an embedding
query_embedding = embeddings.embed_query(query)

# Step 2: Perform the semantic search over the Chroma collection
results = collection.query(
    query_embeddings=[query_embedding],  # Query embeddings to search for
    n_results=3  # Number of most similar sentences to return
)

# Step 3: Display the results
print("Query:", query)
print("\nTop 3 most similar sentences:")
for doc, meta in zip(results["documents"][0], results["metadatas"][0]):
    print(f"Document: {doc}")
    print(f"Metadata: {meta}")

Query: Where does bear live?

Top 3 most similar sentences:
Document: There is a bear in the town square.
Metadata: {'id': 5, 'source': 'quora'}
Document: The sky is clear and the sun is shining.
Metadata: {'id': 2, 'source': 'wiki'}
Document: Artificial intelligence is transforming the world.
Metadata: {'id': 1, 'source': 'quora'}
