Vector DB

In [None]:
# Install required libraries
!pip install faiss-cpu numpy sentence-transformers

from sentence_transformers import SentenceTransformer
import numpy as np
import faiss

# -------------------------------
# 1. Sample Sentences
# -------------------------------
sentences = [
    "Natural language processing is a fascinating field.",
    "Word embeddings capture semantic meanings.",
    "NLP is used in chatbots and virtual assistants.",
    "Word2Vec is a powerful tool for creating word embeddings.",
    "Deep learning improves many NLP applications."
]

# -------------------------------
# 2. Generate Sentence Embeddings
# -------------------------------
model = SentenceTransformer('all-MiniLM-L6-v2')  # Pretrained model for sentence embeddings
embeddings = model.encode(sentences).astype('float32')  # FAISS needs float32

# -------------------------------
# 3. Create Vector Database (FAISS)
# -------------------------------
dimension = embeddings.shape[1]                  # Dimension of embeddings
index = faiss.IndexFlatL2(dimension)            # L2 distance (Euclidean)
index.add(embeddings)                            # Add embeddings to the DB

print(f"Total sentences in DB: {index.ntotal}")

# -------------------------------
# 4. Query the Vector Database
# -------------------------------
query_sentence = "How can chatbots use NLP effectively?"
query_embedding = model.encode([query_sentence]).astype('float32')

# Search top 3 most similar sentences
k = 3
distances, indices = index.search(query_embedding, k)

print("\nQuery Sentence:", query_sentence)
print("\nTop similar sentences:")
for i, idx in enumerate(indices[0]):
    print(f"{i+1}. {sentences[idx]} (Distance: {distances[0][i]:.4f})")
