In [None]:
"""
Build Vector Database (FAISS) for Offline RAG
---------------------------------------------

This script:
1. Loads all documents from /data
2. Splits them into chunks
3. Creates the FAISS vector database using local embeddings (MiniLM)
4. Saves vector DB to faiss_db/

Run this before starting Streamlit:
    python build_vector_db.py
"""

from rag_pipeline import load_documents, split_documents, create_vector_db


def main():
    print("üì• Loading documents from /data...")
    docs = load_documents()
    print(f"Loaded docs: {len(docs)}")

    if not docs:
        print("‚ö†Ô∏è No documents found in the data/ folder.")
        print("Make sure you have placed PDF/TXT files inside: data/")
        return

    print("\n‚úÇÔ∏è Splitting into chunks...")
    chunks = split_documents(docs)
    print(f"Chunks created: {len(chunks)}")

    print("\nüß† Creating FAISS vector database...")
    create_vector_db(chunks)

    print("\n‚úÖ FAISS DB created successfully in faiss_db/")


if __name__ == "__main__":
    main()


üì• Loading documents from /data...
Loaded docs: 47
‚úÇÔ∏è Splitting into chunks...
Chunks: 174
üß† Creating FAISS vector database...
[INFO] FAISS DB saved at: D:\Resume_parser\Simple RAG\faiss_db
‚úÖ FAISS DB created successfully in faiss_db/
