In [1]:
!pip install chromadb sentence-transformers --quiet
!pip install ipywidgets



In [2]:
import json
from chromadb import PersistentClient
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2")

def embed(text):
    return model.encode(text).tolist()  # Chroma requires Python lists

In [3]:
hr_data = [
    {
        "question": "Tell me about yourself",
        "ideal_answer": "A structured response focusing on background, experience, and relevance to the role."
    },
    {
        "question": "Describe a challenge you overcame",
        "ideal_answer": "Use STAR method: Situation, Task, Action, Result. Highlight learning and impact."
    }
]

technical_data = [
    {
        "question": "Explain OOP concepts",
        "ideal_answer": "OOP includes Encapsulation, Inheritance, Polymorphism, Abstraction. Provide examples."
    },
    {
        "question": "What are Python decorators?",
        "ideal_answer": "Decorators wrap a function to extend behavior without modifying the function."
    }
]

all_data = hr_data + technical_data


# ======================================================
# 5. Initialize NEW Chroma DB Client (Fixed)
# ======================================================
client = PersistentClient(path="vector_db")

collection = client.get_or_create_collection(
    name="interview_knowledge",
    metadata={"hnsw:space": "cosine"}   # similarity metric
)

print("Vector DB ready!")


# ======================================================
# 6. Ingest Data into Chroma DB
# ======================================================
ids = []
documents = []
embeddings = []
metadatas = []

for i, item in enumerate(all_data):
    vec = embed(item["ideal_answer"])

    ids.append(f"doc_{i}")
    documents.append(item["ideal_answer"])
    embeddings.append(vec)
    metadatas.append({"question": item["question"]})

collection.add(
    ids=ids,
    embeddings=embeddings,
    documents=documents,
    metadatas=metadatas
)

print("Data ingestion complete!")


# ======================================================
# 7. Search Function
# ======================================================
def search_similar(text, top_k=3):
    query_vec = embed(text)
    results = collection.query(
        query_embeddings=[query_vec],
        n_results=top_k
    )
    return results


# ======================================================
# 8. Test Search
# ======================================================
query = "Polymorphism allows methods to behave differently."
results = search_similar(query)

print("\n===== SEARCH RESULTS =====")
for i in range(len(results["documents"][0])):
    print(f"\nResult {i+1}:")
    print("Score:", results["distances"][0][i])
    print("Question:", results["metadatas"][0][i]["question"])
    print("Ideal Answer:", results["documents"][0][i])

Vector DB ready!
Data ingestion complete!

===== SEARCH RESULTS =====

Result 1:
Score: 0.4635941982269287
Question: Explain OOP concepts
Ideal Answer: OOP includes Encapsulation, Inheritance, Polymorphism, Abstraction. Provide examples.

Result 2:
Score: 0.6978434920310974
Question: What are Python decorators?
Ideal Answer: Decorators wrap a function to extend behavior without modifying the function.

Result 3:
Score: 0.8762738704681396
Question: Describe a challenge you overcame
Ideal Answer: Use STAR method: Situation, Task, Action, Result. Highlight learning and impact.
