In [1]:
# runtime/retriever.py
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import SentenceTransformerEmbeddings
import os

def load_vectorstore(persist_directory: str, collection_name: str, model_name: str = None):
    """
    Load existing Chroma vectorstore from disk. Returns Chroma instance or None.
    """
    model_name = model_name or os.getenv("EMBED_MODEL", "all-MiniLM-L6-v2")
    emb = SentenceTransformerEmbeddings(model_name=model_name)
    if not os.path.exists(persist_directory) or not any(os.scandir(persist_directory)):
        return None
    return Chroma(persist_directory=persist_directory, embedding_function=emb, collection_name=collection_name)

def retrieve(query: str, vectordb, k: int = 3):
    """
    Simple semantic search wrapper.
    Returns list of Documents (langchain schema objects).
    """
    if vectordb is None:
        return []
    docs = vectordb.similarity_search(query, k=k)
    return docs



In [None]:
if __name__ == "__main__":
    persist_dir = os.getenv("PERSIST_DIRECTORY", "vectorstore")
    collection_name = "my_collection"
    # Load vectorstore
    loaded_vectordb = load_vectorstore(persist_directory=persist_dir, collection_name=collection_name)

    # Test retrieval
    query = "Sample query text"
    results = retrieve(query, loaded_vectordb, k=5)
    print(f"Retrieved {len(results)} documents for query: '{query}'")

  emb = SentenceTransformerEmbeddings(model_name=model_name)
  from .autonotebook import tqdm as notebook_tqdm
  return Chroma(persist_directory=persist_directory, embedding_function=emb, collection_name=collection_name)


Retrieved 2 documents for query: 'Sample query text'


In [3]:
results

[Document(metadata={'page': 0, 'producer': 'Microsoft® Word for Microsoft 365', 'page_label': '1', 'total_pages': 2, 'source': '/Users/I325907/Desktop/AIML/GenAI/genai-rag-lab/data/Rajesh_B_Resume-AI-ML.pdf', 'creationdate': '2025-04-18T07:25:31+00:00', 'creator': 'Microsoft® Word for Microsoft 365', 'moddate': '2025-04-18T07:25:31+00:00'}, page_content='commitment to continuous learning, ethical AI development, and impactful \ninnovation. \n CAREER TIMELINE \n \n \n WORK EXPERIENCE  \nVantiva India Pvt Ltd. | Staff Engineer | 01/2024 – Present  \n \nKey Result Areas:  \n− AI-Driven Code Generation Tool: Designed and implemented an internal solution to \nautomate code creation, reducing development time. \n− Test Case Generation Tool: Built an AI-powered tool for precise and efficient test case \ncreation, improving software testing and productivity. \n− Bug Similarity Detection: Developed a Generative AI tool that identifies duplicate bugs, \nreducing test execution time by 30% and st

In [4]:
    # Test retrieval
    query = "Sample query text"
    results = retrieve(query, loaded_vectordb, k=5)
    print(f"Retrieved {len(results)} documents for query: '{query}'")

Retrieved 5 documents for query: 'Sample query text'


In [5]:
results

[Document(metadata={'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'moddate': '2025-04-18T07:25:31+00:00', 'creationdate': '2025-04-18T07:25:31+00:00', 'total_pages': 2, 'page': 0, 'source': '/Users/I325907/Desktop/AIML/GenAI/genai-rag-lab/data/Rajesh_B_Resume-AI-ML.pdf', 'page_label': '1'}, page_content='commitment to continuous learning, ethical AI development, and impactful \ninnovation. \n CAREER TIMELINE \n \n \n WORK EXPERIENCE  \nVantiva India Pvt Ltd. | Staff Engineer | 01/2024 – Present  \n \nKey Result Areas:  \n− AI-Driven Code Generation Tool: Designed and implemented an internal solution to \nautomate code creation, reducing development time. \n− Test Case Generation Tool: Built an AI-powered tool for precise and efficient test case \ncreation, improving software testing and productivity. \n− Bug Similarity Detection: Developed a Generative AI tool that identifies duplicate bugs, \nreducing test execution time by 30% and st